### Importing Files ###

In [1]:
import pandas as pd
import glob
import os

In [2]:
csv_files = glob.glob("inputs/*.csv")

dfs = []

for file in csv_files:
    df = pd.read_csv(file)
    df['ETF'] = os.path.splitext(os.path.basename(file))[0]
    dfs.append(df)

etfs_tall = pd.concat(dfs, ignore_index=True)
etfs_tall.drop(columns=['Open', 'High', 'Low', 'Close', 'Volume'], inplace=True)
etfs_tall

Unnamed: 0,Date,Adj Close,ETF
0,2008-12-01,17.391161,AOR
1,2009-01-01,16.519028,AOR
2,2009-02-01,15.420570,AOR
3,2009-03-01,16.174002,AOR
4,2009-04-01,17.332678,AOR
...,...,...,...
1391,2023-12-01,17.706827,VTTVX
1392,2024-01-01,18.360001,VTTVX
1393,2024-02-01,18.700001,VTTVX
1394,2024-03-01,19.080000,VTTVX


### Converting DF to Wide ###

In [3]:
etfs_wide = etfs_tall.pivot(index='Date', columns='ETF', values='Adj Close')
etfs_wide

ETF,AOR,BND,SPY,VFWAX,VNQ,VTTVX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1993-05-01,,,25.606243,,,
1993-06-01,,,25.517757,,,
1993-07-01,,,25.573849,,,
1993-08-01,,,26.554047,,,
1993-09-01,,,26.197598,,,
...,...,...,...,...,...,...
2024-01-01,52.968086,72.796074,481.384338,34.206818,83.179100,18.360001
2024-02-01,54.153343,71.596512,506.506256,35.332760,84.825035,18.700001
2024-03-01,55.448162,72.211449,521.449829,36.289314,85.747154,19.080000
2024-04-01,53.874458,70.738991,505.649994,35.669998,80.430000,18.650000


### Keeping Rows with all ETFs present ###

In [4]:
etfs_full = etfs_wide.dropna()
etfs_full = etfs_full.reset_index(drop=False)
etfs_full

ETF,Date,AOR,BND,SPY,VFWAX,VNQ,VTTVX
0,2011-10-01,23.416935,59.494217,99.502342,18.080978,35.416828,7.643564
1,2011-11-01,23.312395,59.461224,99.097977,17.562647,34.070572,7.577151
2,2011-12-01,23.252653,59.739643,99.502342,16.512526,35.331528,7.408100
3,2012-01-01,24.246120,60.697159,104.777054,18.377979,38.002003,7.927385
4,2012-02-01,24.855276,60.574455,109.324966,19.313280,37.564690,8.181111
...,...,...,...,...,...,...,...
146,2023-12-01,52.458183,72.501465,471.924408,34.263214,86.543938,17.706827
147,2024-01-01,52.968086,72.796074,481.384338,34.206818,83.179100,18.360001
148,2024-02-01,54.153343,71.596512,506.506256,35.332760,84.825035,18.700001
149,2024-03-01,55.448162,72.211449,521.449829,36.289314,85.747154,19.080000


### Expanding Data (80 years) ###

In [5]:
wide_sim_life = etfs_full.sample(n=12*80, random_state=42, replace=True)
wide_sim_life

ETF,Date,AOR,BND,SPY,VFWAX,VNQ,VTTVX
102,2020-04-01,40.182205,78.732315,273.519714,24.291124,65.491737,14.265389
92,2019-06-01,41.217808,73.105362,270.368225,27.297112,72.547440,14.358755
14,2012-12-01,25.905033,61.966293,115.327164,20.047646,41.517250,8.410084
106,2020-08-01,45.697868,80.224556,330.362305,28.772760,71.041458,16.082241
71,2017-09-01,37.724438,68.533974,224.240814,26.888041,63.831364,12.958105
...,...,...,...,...,...,...,...
39,2015-01-01,31.981800,65.861916,169.192520,22.344320,59.899872,10.825174
40,2015-02-01,32.918674,64.870323,178.701935,23.569088,57.699039,11.207318
10,2012-08-01,25.255085,62.413334,113.707870,18.308184,41.771847,8.298693
150,2024-04-01,53.874458,70.738991,505.649994,35.669998,80.430000,18.650000


### Converting back to tall ###

In [6]:
# etfs_full_tall = etfs_full.reset_index(drop=True)


etfs_full_tall = etfs_full.melt(id_vars=['Date'], value_name='Adj Close')

# etfs_full_tall = pd.DataFrame(wide_sim_life.stack().swaplevel().sort_index())

# etfs_full_tall.columns = ['Date', 'ETF', 'Adj Close']
etfs_full_tall
# etfs_full_tall.query('ETF=="AOR"')

Unnamed: 0,Date,ETF,Adj Close
0,2011-10-01,AOR,23.416935
1,2011-11-01,AOR,23.312395
2,2011-12-01,AOR,23.252653
3,2012-01-01,AOR,24.246120
4,2012-02-01,AOR,24.855276
...,...,...,...
901,2023-12-01,VTTVX,17.706827
902,2024-01-01,VTTVX,18.360001
903,2024-02-01,VTTVX,18.700001
904,2024-03-01,VTTVX,19.080000


### Getting Returns ###

In [7]:
rets = etfs_full_tall.drop(columns=['Adj Close'])
rets['ret'] = etfs_full_tall.groupby('ETF')['Adj Close'].pct_change()
rets

Unnamed: 0,Date,ETF,ret
0,2011-10-01,AOR,
1,2011-11-01,AOR,-0.004464
2,2011-12-01,AOR,-0.002563
3,2012-01-01,AOR,0.042725
4,2012-02-01,AOR,0.025124
...,...,...,...
901,2023-12-01,VTTVX,0.005462
902,2024-01-01,VTTVX,0.036888
903,2024-02-01,VTTVX,0.018519
904,2024-03-01,VTTVX,0.020321


### Converting Df to Wide ###

In [8]:
rets_wide = rets.pivot(index='Date', columns='ETF', values='ret')
rets_wide

ETF,AOR,BND,SPY,VFWAX,VNQ,VTTVX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2011-10-01,,,,,,
2011-11-01,-0.004464,-0.000555,-0.004064,-0.028667,-0.038012,-0.008689
2011-12-01,-0.002563,0.004682,0.004080,-0.059793,0.037010,-0.022311
2012-01-01,0.042725,0.016028,0.053011,0.112972,0.075583,0.070097
2012-02-01,0.025124,-0.002022,0.043406,0.050892,-0.011508,0.032006
...,...,...,...,...,...,...
2023-12-01,0.034825,0.032739,0.041433,0.034064,0.081121,0.005462
2024-01-01,0.009720,0.004063,0.020045,-0.001646,-0.038880,0.036888
2024-02-01,0.022377,-0.016478,0.052187,0.032916,0.019788,0.018519
2024-03-01,0.023910,0.008589,0.029503,0.027073,0.010871,0.020321


### Creating Additional ETFs ###

In [12]:
rets_wide['SPY_VFWAX'] = 0.5*rets_wide['SPY'] + 0.5 * rets_wide['VFWAX'] #domestic/international stock split
rets_wide['SPY_BND'] = 0.5*rets_wide['SPY'] + 0.5 * rets_wide['BND'] #stock/bond split
rets_wide['SPY_VNQ_BND'] = 0.5*rets_wide['SPY'] + 0.5 * rets_wide['VNQ'] #stock/bond/real estate split
rets_wide['SPY_VFWAX_BND'] = 0.4*rets_wide['SPY'] + 0.4 * rets_wide['VFWAX'] + 0.2 * rets_wide['BND'] #domestic/ international stock/bond split

In [13]:
rets_wide

ETF,AOR,BND,SPY,VFWAX,VNQ,VTTVX,SPY_VFWAX,SPY_BND,SPY_VNQ,SPY_VFWAX_BND
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2011-10-01,,,,,,,,,,
2011-11-01,-0.004464,-0.000555,-0.004064,-0.028667,-0.038012,-0.008689,-0.016366,-0.002309,-0.021038,-0.013203
2011-12-01,-0.002563,0.004682,0.004080,-0.059793,0.037010,-0.022311,-0.027856,0.004381,0.020545,-0.021348
2012-01-01,0.042725,0.016028,0.053011,0.112972,0.075583,0.070097,0.082991,0.034520,0.064297,0.069599
2012-02-01,0.025124,-0.002022,0.043406,0.050892,-0.011508,0.032006,0.047149,0.020692,0.015949,0.037315
...,...,...,...,...,...,...,...,...,...,...
2023-12-01,0.034825,0.032739,0.041433,0.034064,0.081121,0.005462,0.037748,0.037086,0.061277,0.036747
2024-01-01,0.009720,0.004063,0.020045,-0.001646,-0.038880,0.036888,0.009200,0.012054,-0.009417,0.008172
2024-02-01,0.022377,-0.016478,0.052187,0.032916,0.019788,0.018519,0.042551,0.017854,0.035987,0.030745
2024-03-01,0.023910,0.008589,0.029503,0.027073,0.010871,0.020321,0.028288,0.019046,0.020187,0.024348
