In [2]:
#!/usr/bin/env python

import numpy as np
import pandas as pd
import datetime as dt


In [3]:

## --- Data Wrangling ---

# Group A
VFIAX = pd.read_csv("Data/VFIAX.csv")
VFIAX.columns = ['Date','Open','High','Low','Close','VFIAX Close','Volume']
VBTLX = pd.read_csv("Data/VBTLX.csv")
VBTLX.columns = ['Date','Open','High','Low','Close','VBTLX Close','Volume']
VGSLX = pd.read_csv("Data/VGSLX.csv")
VGSLX.columns = ['Date','Open','High','Low','Close','VGSLX Close','Volume']

# Group B
VIMAX = pd.read_csv("Data/VIMAX.csv")
VIMAX.columns = ['Date','Open','High','Low','Close','VIMAX Close','Volume']
VSMAX = pd.read_csv("Data/VSMAX.csv")
VSMAX.columns = ['Date','Open','High','Low','Close','VSMAX Close','Volume']
VGHCX = pd.read_csv("Data/VGHCX.csv")
VGHCX.columns = ['Date','Open','High','Low','Close','VGHCX Close','Volume']

# Group C
AMZN = pd.read_csv("Data/AMZN.csv")
AMZN.columns = ['Date','Open','High','Low','Close','AMZN Close','Volume']
WMT = pd.read_csv("Data/WMT.csv")
WMT.columns = ['Date','Open','High','Low','Close','WMT Close','Volume']
CVS = pd.read_csv("Data/CVS.csv")
CVS.columns = ['Date','Open','High','Low','Close','CVS Close','Volume']


In [4]:

## --- Assemble -- code into a dataframe for Close of Day ---

close = pd.concat([VFIAX['Date'], VFIAX['VFIAX Close'], VBTLX['VBTLX Close'], VGSLX['VGSLX Close'], VIMAX['VIMAX Close'], VSMAX['VSMAX Close'], VGHCX['VGHCX Close'], AMZN['AMZN Close'], WMT['WMT Close'], CVS['CVS Close'] ], axis=1)
#print(close)

## --- generate mean daily return ---

dailyReturn = pd.DataFrame(columns = ['Date', 'VFIAX Daily Return','VBTLX Daily Return','VGSLX Daily Return', 'VIMAX Daily Return', 'VSMAX Daily Return', 'VGHCX Daily Return','AMZN Daily Return', 'WMT Daily Return','CVS Daily Return'])
for index, row in close.iterrows():
    if index == 0: continue
    #print((close['VFIAX Close'][index] - close['VFIAX Close'][index-1])/ (close['VFIAX Close'][index-1]))
    dailyReturn = dailyReturn.append({'Date': close['Date'][index],
                'VFIAX Daily Return': ((close['VFIAX Close'][index] - close['VFIAX Close'][index-1])/(close['VFIAX Close'][index-1])),
                'VBTLX Daily Return': ((close['VBTLX Close'][index] - close['VBTLX Close'][index-1])/(close['VBTLX Close'][index-1])),
                'VGSLX Daily Return': ((close['VGSLX Close'][index] - close['VGSLX Close'][index-1])/(close['VGSLX Close'][index-1])),
                'VIMAX Daily Return': ((close['VIMAX Close'][index] - close['VIMAX Close'][index-1])/(close['VIMAX Close'][index-1])),
                'VSMAX Daily Return': ((close['VSMAX Close'][index] - close['VSMAX Close'][index-1])/(close['VSMAX Close'][index-1])),
                'VGHCX Daily Return': ((close['VGHCX Close'][index] - close['VGHCX Close'][index-1])/(close['VGHCX Close'][index-1])),
                'AMZN Daily Return': ((close['AMZN Close'][index] - close['AMZN Close'][index-1])/(close['AMZN Close'][index-1])),
                'WMT Daily Return': ((close['WMT Close'][index] - close['WMT Close'][index-1])/(close['WMT Close'][index-1])),
                'CVS Daily Return': ((close['CVS Close'][index] - close['CVS Close'][index-1])/(close['CVS Close'][index-1]))},ignore_index=True)



In [5]:
## --- Equal weights ---

equalWeights = 1/dailyReturn.shape[0]
wBar = np.sum(np.square(np.ones(dailyReturn.shape[0]) * equalWeights))


In [8]:
## --- Estimator dataframes ---

ExpectedReturn = pd.DataFrame(columns = ['Date', 'ER'])
VarianceEstimator = pd.DataFrame(columns = ['Date', 'Var'])
StdOfExpectedValueEstimator = pd.DataFrame(columns = ['Date', 'Std Ev'])
signalToNoiseDF = pd.DataFrame(columns = ['Date', 'signalToNoise'])

for index, row in dailyReturn.iterrows():
    returns = row[1:]
    mean = np.sum(returns * equalWeights)
    variance = 1/(1 - wBar) * np.sum(equalWeights * np.square(returns - mean))
    StdOfExpectedValue = np.sqrt(wBar) * np.sqrt(variance)
    signalToNoise = mean/StdOfExpectedValue

    ExpectedReturn = ExpectedReturn.append({'Date':dailyReturn['Date'][index],'ER':mean},ignore_index=True)
    VarianceEstimator = VarianceEstimator.append({'Date':dailyReturn['Date'][index],'Var':variance},ignore_index=True)
    StdOfExpectedValueEstimator = StdOfExpectedValueEstimator.append({'Date':dailyReturn['Date'][index],'Std Ev':StdOfExpectedValue},ignore_index=True)
    signalToNoiseDF = signalToNoiseDF.append({'Date':dailyReturn['Date'][index],'signalToNoise':StdOfExpectedValue},ignore_index=True)
    

# print(ExpectedReturn)


In [16]:
def splitInToYears(df):
    cols = df.columns
    
    df2016 = pd.DataFrame(columns = cols)
    df2017 = pd.DataFrame(columns = cols)
    df2018 = pd.DataFrame(columns = cols)
    df2019 = pd.DataFrame(columns = cols)
    df2020 = pd.DataFrame(columns = cols)

    for index, row in df.iterrows():
        if dt.datetime.strptime(df['Date'][index],'%Y-%m-%d').year == 2016:
            df2016 = df2016.append({'Date':df['Date'][index],cols[1]:df[cols[1]][index]},ignore_index=True)
        if dt.datetime.strptime(df['Date'][index],'%Y-%m-%d').year == 2017:
            df2017 = df2017.append({'Date':df['Date'][index],cols[1]:df[cols[1]][index]},ignore_index=True)
        if dt.datetime.strptime(df['Date'][index],'%Y-%m-%d').year == 2018:
            df2018 = df2018.append({'Date':df['Date'][index],cols[1]:df[cols[1]][index]},ignore_index=True)
        if dt.datetime.strptime(df['Date'][index],'%Y-%m-%d').year == 2019:
            df2019 = df2019.append({'Date':df['Date'][index],cols[1]:df[cols[1]][index]},ignore_index=True)
        if dt.datetime.strptime(df['Date'][index],'%Y-%m-%d').year == 2020:
            df2020 = df2020.append({'Date':df['Date'][index],cols[1]:df[cols[1]][index]},ignore_index=True)
    return [df2016, df2017,df2018, df2019,df2020]
    

In [17]:
splitInToYears(ExpectedReturn)

[           Date        ER
 0    2016-01-04 -0.000114
 1    2016-01-05  0.000041
 2    2016-01-06 -0.000048
 3    2016-01-07 -0.000111
 4    2016-01-08 -0.000076
 ..          ...       ...
 247  2016-12-23 -0.000023
 248  2016-12-27  0.000025
 249  2016-12-28 -0.000039
 250  2016-12-29  0.000006
 251  2016-12-30 -0.000016
 
 [252 rows x 2 columns],
            Date        ER
 0    2017-01-03  0.000047
 1    2017-01-04  0.000051
 2    2017-01-05  0.000046
 3    2017-01-06  0.000014
 4    2017-01-09 -0.000010
 ..          ...       ...
 246  2017-12-22 -0.000014
 247  2017-12-26  0.000025
 248  2017-12-27  0.000003
 249  2017-12-28  0.000020
 250  2017-12-29 -0.000038
 
 [251 rows x 2 columns],
            Date        ER
 0    2018-01-02  0.000048
 1    2018-01-03  0.000029
 2    2018-01-04  0.000019
 3    2018-01-05  0.000071
 4    2018-01-08  0.000024
 ..          ...       ...
 246  2018-12-24 -0.000141
 247  2018-12-26  0.000319
 248  2018-12-27  0.000024
 249  2018-12-28  0.000018
 

In [None]:
## --- split into years 2015 to 2020 ---

ER2016 = pd.DataFrame(columns = ['Date', 'ER'])
ER2017 = pd.DataFrame(columns = ['Date', 'ER'])
ER2018 = pd.DataFrame(columns = ['Date', 'ER'])
ER2019 = pd.DataFrame(columns = ['Date', 'ER'])
ER2020 = pd.DataFrame(columns = ['Date', 'ER'])

for index, row in ExpectedReturn.iterrows():
    if dt.datetime.strptime(ExpectedReturn['Date'][index],'%Y-%m-%d').year == 2016:
        ER2016 = ER2016.append({'Date':ExpectedReturn['Date'][index],'ER':ExpectedReturn['ER'][index]},ignore_index=True)
    if dt.datetime.strptime(ExpectedReturn['Date'][index],'%Y-%m-%d').year == 2017:
        ER2017 = ER2017.append({'Date':ExpectedReturn['Date'][index],'ER':ExpectedReturn['ER'][index]},ignore_index=True)
    if dt.datetime.strptime(ExpectedReturn['Date'][index],'%Y-%m-%d').year == 2018:
        ER2018 = ER2018.append({'Date':ExpectedReturn['Date'][index],'ER':ExpectedReturn['ER'][index]},ignore_index=True)
    if dt.datetime.strptime(ExpectedReturn['Date'][index],'%Y-%m-%d').year == 2019:
        ER2019 = ER2019.append({'Date':ExpectedReturn['Date'][index],'ER':ExpectedReturn['ER'][index]},ignore_index=True)
    if dt.datetime.strptime(ExpectedReturn['Date'][index],'%Y-%m-%d').year == 2020:
        ER2020 = ER2020.append({'Date':ExpectedReturn['Date'][index],'ER':ExpectedReturn['ER'][index]},ignore_index=True)

print("2016: ")
print(ER2016)
print("2017: ")
print(ER2017)
print("2018: ")
print(ER2018)
print("2019: ")
print(ER2019)
print("2020: ")
print(ER2020)