# Historical Portfolio Construction
#### Northeastern University Student Value Fund
#### Miles Child - Portfolio Manager
____________

In this notebook, we will conduct a historical analysis of fund performance from inception to present (Sept 2023). This will be done with the purpose of:

- Understanding our performance and risk/return metrics against various benchmarks
- Illustrating the negative impact of haphazard timing/trade execution on portfolio performance
- Demonstrating how a theoretical portfolio with rational holdings, weights, and execution times would have performed during the same period

____________

In [1]:
# imports
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import yfinance as yf  # <-- for hist px data
from portfolio_builder import Trade, PortfolioBuilder
import datetime

Ingesting historical portfolio data

In [2]:
transactions = pd.read_excel('../data/Investment_Transaction_Detail (1).xlsx')
transactions.head()

Unnamed: 0,Consolidation,Account Number,From date,Through date,I-TRAN,C-TRD-STAT-7,Value,Quantity,Currency code,Date,...,N-INV-SUB-CATG,Error code,Ticker,N-CNTRY-P2,Consolidation Audit Indicator Flag,Accrued Interest,C-ASSET-LIAB,Narrative - Short,Mutual Fund Flag,Wash sale adjustment
0,4475530,4475530,2009-01-01,2023-09-08,647038213,Settled,200000.0,200000.0,USD,2019-01-09,...,Cash,,CASH,United States,N,0,,,,0
1,4475530,4475530,2009-01-01,2023-09-08,647038213,Settled,3847.5,25.0,USD,2019-01-09,...,Common Stock,,PH,United States,N,0,,PURCHASED 25.00 SHARES 01-07-19 AT A PRICE OF ...,,0
2,4475530,4475530,2009-01-01,2023-09-08,647038213,Settled,-3847.5,-3874.5,USD,2019-01-09,...,Cash,,CASH,United States,N,0,,,,0
3,4475530,4475530,2009-01-01,2023-09-08,573190268,Settled,6208.75,725.0,USD,2019-04-26,...,Common Stock,,NTDOY,Japan,N,0,,PURCHASED 145.00 SHARES 04-24-19 AT A PRICE OF...,,0
4,4475530,4475530,2009-01-01,2023-09-08,647038213,Settled,-6208.75,-6208.75,USD,2019-04-26,...,Cash,,CASH,United States,N,0,,,,0


In [3]:
trades = []
for i in range(len(transactions)):
    trade = Trade(ticker=transactions['Ticker'][i],
                  date=transactions['Date'][i],
                  qty=transactions['Quantity'][i],
                  price=transactions['Value'][i] / transactions['Quantity'][i])
    trades.append(trade)
trades

[CASH 200000.0 2019-01-09 00:00:00 1,
 PH 25.0 2019-01-09 00:00:00 153.9,
 CASH -3874.5 2019-01-09 00:00:00 1,
 NTDOY 725.0 2019-04-26 00:00:00 8.563793103448276,
 CASH -6208.75 2019-04-26 00:00:00 1,
 ABMD 32.0 2019-04-26 00:00:00 260.3725,
 CASH -8331.92 2019-04-26 00:00:00 1,
 HCA -35.0 2019-04-26 00:00:00 80.69657142857143,
 CASH 2824.38 2019-04-26 00:00:00 1,
 CSIQ 350.0 2019-10-31 00:00:00 17.779,
 CASH -6222.65 2019-10-31 00:00:00 1,
 NTDOY 570.0 2019-12-06 00:00:00 10.205543859649122,
 CASH -5817.16 2019-12-06 00:00:00 1,
 SATS 298.0 2019-12-06 00:00:00 40.0701677852349,
 CASH -11940.91 2019-12-06 00:00:00 1,
 SCPL 1062.0 2019-12-06 00:00:00 12.789096045197741,
 CASH -13582.02 2019-12-06 00:00:00 1,
 EVVTY 190.0 2021-04-15 00:00:00 166.04263157894735,
 CASH -31548.1 2021-04-15 00:00:00 1,
 REGI 199.0 2021-04-15 00:00:00 60.39015075376884,
 CASH -12017.64 2021-04-15 00:00:00 1,
 GLXZ 11299.0 2021-04-15 00:00:00 3.005799628285689,
 CASH -33962.53 2021-04-15 00:00:00 1,
 PCYO 1725

In [4]:
backup_data = pd.read_excel('../data/backup_data.xlsx')
backup_data.head()

Unnamed: 0,Date,ABMD,MAXR,ECOM,REGI
0,2022-11-12,374.1,52.99,23.09,61.5
1,2022-11-11,373.99,52.99,23.08,61.45
2,2022-11-10,372.78,52.99,23.08,61.47
3,2022-11-09,374.0,52.99,23.06,61.37
4,2022-11-08,374.0,52.99,23.06,61.39


In [5]:
dt = datetime.datetime(2022, 11, 12)
px = backup_data[backup_data['Date'] <= dt]['MAXR'].values[0]
px

52.99

In [6]:
pb = PortfolioBuilder(trades, backup_price_data=backup_data)
transactions = pb.get_transactions()
transactions.head()

Unnamed: 0,ticker,qty,price,date
0,CASH,200000.0,1.0,2019-01-09
0,PH,25.0,153.9,2019-01-09
0,CASH,-3874.5,1.0,2019-01-09
0,NTDOY,725.0,8.563793,2019-04-26
0,CASH,-6208.75,1.0,2019-04-26


In [7]:
portfolio = pb.get_portfolio()
portfolio.head(25)

Unnamed: 0,Ticker,Weight,Value,Qty,Avg,Current,Gain/Loss $,Gain/Loss %
0,CASH,0.222162,46815.81,46815.81,1.0,1.0,0.0,0.0
0,NTDOY,0.080383,16938.899817,1595.0,9.796683,10.62,1313.189817,0.08404
0,EVVTY,0.088867,18726.750374,175.0,166.042629,107.010002,-10330.709626,-0.355527
0,CVLT,0.087102,18354.83091,271.0,68.409483,67.730003,-184.13909,-0.009933
0,NSIT,0.102904,21684.750443,145.0,104.323931,149.550003,6557.780443,0.433516
0,DAR,0.087721,18485.289852,323.0,58.64065,57.23,-455.640148,-0.024056
0,SMLP,0.075394,15887.760309,1158.0,15.870898,13.72,-2490.739691,-0.135525
0,PYPL,0.075567,15924.079773,248.0,74.277218,64.209999,-2496.670227,-0.135536
0,GM,0.09022,19012.000427,560.0,31.839607,33.950001,1181.820427,0.066282
0,MTN,0.089682,18898.49968,75.0,239.853333,251.979996,909.49968,0.050559


In [12]:
prices = pb.get_price_df(ticker="MAXR", start_date=datetime.datetime(2021, 1, 1), end_date=datetime.datetime(2023, 9, 1))
prices.to_excel('prices.xlsx')
prices.head(25)

Unnamed: 0,Date,MAXR
2021-01-01,NaT,
2021-01-02,NaT,
2021-01-03,NaT,
2021-01-04,NaT,
2021-01-05,NaT,
2021-01-06,NaT,
2021-01-07,NaT,
2021-01-08,NaT,
2021-01-09,NaT,
2021-01-10,NaT,


In [9]:
df = pb.backup_price_data

start_date = datetime.datetime(2020, 1, 1)
end_date = datetime.datetime(2022, 11, 12)
ticker = "MAXR"
price_df = pd.DataFrame(index=[d.date() for d in pd.date_range(start=start_date, end=end_date)])
        
if ticker in df.columns:
    tmp_price_df = df[df['Date'] <= end_date].drop(columns=[c for c in df.columns if c != ticker and c != 'Date'])
else:
    tmp_price_df = yf.Ticker(ticker).history(period='1d', start=start_date, end=end_date)
    tmp_price_df.index = tmp_price_df.index.date
    tmp_price_df.drop(columns=[c for c in tmp_price_df.columns if c != 'Close'], inplace=True)

# merge on the dates that we have
price_df = price_df.merge(tmp_price_df, how='left', left_index=True, right_index=True)
# fill forward
price_df = price_df.fillna(method='ffill')
# fill backward
price_df = price_df.fillna(method='bfill')
tmp_price_df.head()

Unnamed: 0,Date,MAXR
0,2022-11-12,52.99
1,2022-11-11,52.99
2,2022-11-10,52.99
3,2022-11-09,52.99
4,2022-11-08,52.99


In [10]:
vot = pb.get_value_over_time(start_date=datetime.datetime(2020, 1, 1), end_date=datetime.datetime(2023, 9, 12))
vot.to_excel('vot.xlsx')

AttributeError: 'NotImplementedType' object has no attribute '_indexed_same'

In [10]:
vot.head()

Unnamed: 0,Value
2020-01-01,
2020-01-02,
2020-01-03,
2020-01-04,
2020-01-05,


________

In [None]:
dated_portfolio = pb.get_portfolio(up_to=datetime.datetime(2022, 11, 12))
dated_portfolio.head(25)

MAXR: No data found, symbol may be delisted


Unnamed: 0,Ticker,Weight,Value,Qty,Avg,Current,Gain/Loss $,Gain/Loss %
0,NTDOY,0.127612,16173.300548,1595.0,9.796683,10.14,547.590548,0.035044
0,EVVTY,0.131068,16611.32412,175.0,166.042629,94.921852,-12446.13588,-0.428328
0,GLXZ,0.093258,11819.339566,5051.0,3.005799,2.34,-3362.950434,-0.221505
0,FREE,0.042253,5355.120112,1684.0,13.572298,3.18,-17500.629888,-0.765699
0,CVLT,0.143648,18205.780083,271.0,68.409483,67.18,-333.189917,-0.017972
0,MITK,0.107625,13640.279955,1182.0,15.7328,11.54,-4955.890045,-0.266501
0,MAXR,0.187311,23739.52,448.0,30.315089,52.99,10158.36,0.747974
0,NSIT,0.113928,14439.100266,145.0,104.323931,99.580002,-687.869734,-0.045473
0,TUP,0.053297,6754.799752,1299.0,15.1906,5.2,-12977.790248,-0.657683


In [14]:
import matplotlib.pyplot as plt
vot = pb.get_value_over_time(start_date=datetime.datetime(2019, 1, 1), end_date=datetime.datetime(2023, 9, 1))

APG: No data found for this date range, symbol may be delisted
CVLT: No data found for this date range, symbol may be delisted
MITK: No data found for this date range, symbol may be delisted
PH: No data found for this date range, symbol may be delisted
NTDOY: No data found for this date range, symbol may be delisted
ABMD: No data found for this date range, symbol may be delisted
CSIQ: No data found for this date range, symbol may be delisted
SATS: No data found for this date range, symbol may be delisted
SCPL: No data found for this date range, symbol may be delisted
EVVTY: No data found for this date range, symbol may be delisted
REGI: No data found for this date range, symbol may be delisted
GLXZ: No data found for this date range, symbol may be delisted
PCYO: No data found for this date range, symbol may be delisted
FREE: No data found for this date range, symbol may be delisted
APG: No data found for this date range, symbol may be delisted
CVLT: No data found for this date range, s

In [16]:
import plotly.graph_objects as go

fig = go.Figure()
fig.add_trace(go.Scatter(x=[d for d in pd.date_range(datetime.datetime(2023, 1, 1), datetime.datetime(2023, 9, 1))], y=vot, mode='lines'))
fig.show()