In [1]:
#!/usr/bin/env python

import numpy as np
import pandas as pd
import datetime as dt


In [2]:

## --- Data Wrangling ---

# Group A
VFIAX = pd.read_csv("Data/VFIAX.csv")
VFIAX.columns = ['Date','Open','High','Low','Close','VFIAX Close','Volume']
VBTLX = pd.read_csv("Data/VBTLX.csv")
VBTLX.columns = ['Date','Open','High','Low','Close','VBTLX Close','Volume']
VGSLX = pd.read_csv("Data/VGSLX.csv")
VGSLX.columns = ['Date','Open','High','Low','Close','VGSLX Close','Volume']

# Group B
VIMAX = pd.read_csv("Data/VIMAX.csv")
VIMAX.columns = ['Date','Open','High','Low','Close','VIMAX Close','Volume']
VSMAX = pd.read_csv("Data/VSMAX.csv")
VSMAX.columns = ['Date','Open','High','Low','Close','VSMAX Close','Volume']
VGHCX = pd.read_csv("Data/VGHCX.csv")
VGHCX.columns = ['Date','Open','High','Low','Close','VGHCX Close','Volume']

# Group C
AMZN = pd.read_csv("Data/AMZN.csv")
AMZN.columns = ['Date','Open','High','Low','Close','AMZN Close','Volume']
WMT = pd.read_csv("Data/WMT.csv")
WMT.columns = ['Date','Open','High','Low','Close','WMT Close','Volume']
CVS = pd.read_csv("Data/CVS.csv")
CVS.columns = ['Date','Open','High','Low','Close','CVS Close','Volume']


In [3]:

## --- Assemble -- code into a dataframe for Close of Day ---

close = pd.concat([VFIAX['Date'], VFIAX['VFIAX Close'], VBTLX['VBTLX Close'], VGSLX['VGSLX Close'], VIMAX['VIMAX Close'], VSMAX['VSMAX Close'], VGHCX['VGHCX Close'], AMZN['AMZN Close'], WMT['WMT Close'], CVS['CVS Close'] ], axis=1)
#print(close)

## --- generate mean daily return ---

dailyReturn = pd.DataFrame(columns = ['Date', 'VFIAX Daily Return','VBTLX Daily Return','VGSLX Daily Return', 'VIMAX Daily Return', 'VSMAX Daily Return', 'VGHCX Daily Return','AMZN Daily Return', 'WMT Daily Return','CVS Daily Return'])
for index, row in close.iterrows():
    if index == 0: continue
    #print((close['VFIAX Close'][index] - close['VFIAX Close'][index-1])/ (close['VFIAX Close'][index-1]))
    dailyReturn = dailyReturn.append({'Date': close['Date'][index],
                'VFIAX Daily Return': ((close['VFIAX Close'][index] - close['VFIAX Close'][index-1])/(close['VFIAX Close'][index-1])),
                'VBTLX Daily Return': ((close['VBTLX Close'][index] - close['VBTLX Close'][index-1])/(close['VBTLX Close'][index-1])),
                'VGSLX Daily Return': ((close['VGSLX Close'][index] - close['VGSLX Close'][index-1])/(close['VGSLX Close'][index-1])),
                'VIMAX Daily Return': ((close['VIMAX Close'][index] - close['VIMAX Close'][index-1])/(close['VIMAX Close'][index-1])),
                'VSMAX Daily Return': ((close['VSMAX Close'][index] - close['VSMAX Close'][index-1])/(close['VSMAX Close'][index-1])),
                'VGHCX Daily Return': ((close['VGHCX Close'][index] - close['VGHCX Close'][index-1])/(close['VGHCX Close'][index-1])),
                'AMZN Daily Return': ((close['AMZN Close'][index] - close['AMZN Close'][index-1])/(close['AMZN Close'][index-1])),
                'WMT Daily Return': ((close['WMT Close'][index] - close['WMT Close'][index-1])/(close['WMT Close'][index-1])),
                'CVS Daily Return': ((close['CVS Close'][index] - close['CVS Close'][index-1])/(close['CVS Close'][index-1]))},ignore_index=True)



In [26]:
## --- Equal weights ---

equalWeights = 1/dailyReturn.shape[0]
wBar = np.sum(np.square(np.ones(dailyReturn.shape[0]) * equalWeights))


In [28]:
## --- Expected Returns Data Frame ---

ExpectedReturn = pd.DataFrame(columns = ['Date', 'ER'])

for index, row in dailyReturn.iterrows():
    returns = row[1:]
    ExpectedReturn = ExpectedReturn.append({'Date':dailyReturn['Date'][index],'ER':np.sum(returns * equalWeights)},ignore_index=True)

# print(ExpectedReturn)
ExpectedReturn

Unnamed: 0,Date,ER
0,2016-01-04,-0.000114
1,2016-01-05,0.000041
2,2016-01-06,-0.000048
3,2016-01-07,-0.000111
4,2016-01-08,-0.000076
...,...,...
1254,2020-12-24,0.000010
1255,2020-12-28,0.000045
1256,2020-12-29,-0.000017
1257,2020-12-30,0.000008


In [31]:
## ---  Variance Data Frame ---

VarianceEstimator = pd.DataFrame(columns = ['Date', 'Var'])

for index, row in dailyReturn.iterrows():
    returns = row[1:]
    variance = 1/(1 - wBar) * np.sum(equalWeights * np.square(returns - ExpectedReturn['ER'][index]))
    VarianceEstimator = VarianceEstimator.append({'Date':dailyReturn['Date'][index],'Var':variance},ignore_index=True)
VarianceEstimator

Unnamed: 0,Date,Var
0,2016-01-04,3.711154e-06
1,2016-01-05,8.303826e-07
2,2016-01-06,8.827020e-07
3,2016-01-07,3.769124e-06
4,2016-01-08,1.151271e-06
...,...,...
1254,2020-12-24,8.998327e-08
1255,2020-12-28,1.223887e-06
1256,2020-12-29,3.820385e-07
1257,2020-12-30,2.356175e-07


In [32]:
StdOfExpectedValueEstimator = pd.DataFrame(columns = ['Date', 'Std Ev'])

for index, row in dailyReturn.iterrows():
    returns = row[1:]
    StdOfExpectedValue = np.sqrt(wBar) * np.sqrt(VarianceEstimator['Var'][index])
    StdOfExpectedValueEstimator = StdOfExpectedValueEstimator.append({'Date':dailyReturn['Date'][index],'Std Ev':StdOfExpectedValue},ignore_index=True)


StdOfExpectedValueEstimator

Unnamed: 0,Date,Std Ev
0,2016-01-04,0.000054
1,2016-01-05,0.000026
2,2016-01-06,0.000026
3,2016-01-07,0.000055
4,2016-01-08,0.000030
...,...,...
1254,2020-12-24,0.000008
1255,2020-12-28,0.000031
1256,2020-12-29,0.000017
1257,2020-12-30,0.000014


In [34]:
signalToNoiseDF = pd.DataFrame(columns = ['Date', 'signalToNoise'])

for index, row in dailyReturn.iterrows():
    returns = row[1:]
    signalToNoise = ExpectedReturn['ER'][index]/StdOfExpectedValueEstimator['Std Ev'][index]
    signalToNoiseDF = signalToNoiseDF.append({'Date':dailyReturn['Date'][index],'signalToNoise':StdOfExpectedValue},ignore_index=True)


signalToNoiseDF

Unnamed: 0,Date,signalToNoise
0,2016-01-04,0.000015
1,2016-01-05,0.000015
2,2016-01-06,0.000015
3,2016-01-07,0.000015
4,2016-01-08,0.000015
...,...,...
1254,2020-12-24,0.000015
1255,2020-12-28,0.000015
1256,2020-12-29,0.000015
1257,2020-12-30,0.000015


In [None]:
## --- split into years 2015 to 2020 ---

ER2016 = pd.DataFrame(columns = ['Date', 'ER'])
ER2017 = pd.DataFrame(columns = ['Date', 'ER'])
ER2018 = pd.DataFrame(columns = ['Date', 'ER'])
ER2019 = pd.DataFrame(columns = ['Date', 'ER'])
ER2020 = pd.DataFrame(columns = ['Date', 'ER'])

for index, row in ExpectedReturn.iterrows():
    if dt.datetime.strptime(ExpectedReturn['Date'][index],'%Y-%m-%d').year == 2016:
        ER2016 = ER2016.append({'Date':ExpectedReturn['Date'][index],'ER':ExpectedReturn['ER'][index]},ignore_index=True)
    if dt.datetime.strptime(ExpectedReturn['Date'][index],'%Y-%m-%d').year == 2017:
        ER2017 = ER2017.append({'Date':ExpectedReturn['Date'][index],'ER':ExpectedReturn['ER'][index]},ignore_index=True)
    if dt.datetime.strptime(ExpectedReturn['Date'][index],'%Y-%m-%d').year == 2018:
        ER2018 = ER2018.append({'Date':ExpectedReturn['Date'][index],'ER':ExpectedReturn['ER'][index]},ignore_index=True)
    if dt.datetime.strptime(ExpectedReturn['Date'][index],'%Y-%m-%d').year == 2019:
        ER2019 = ER2019.append({'Date':ExpectedReturn['Date'][index],'ER':ExpectedReturn['ER'][index]},ignore_index=True)
    if dt.datetime.strptime(ExpectedReturn['Date'][index],'%Y-%m-%d').year == 2020:
        ER2020 = ER2020.append({'Date':ExpectedReturn['Date'][index],'ER':ExpectedReturn['ER'][index]},ignore_index=True)

print("2016: ")
print(ER2016)
print("2017: ")
print(ER2017)
print("2018: ")
print(ER2018)
print("2019: ")
print(ER2019)
print("2020: ")
print(ER2020)