In [1]:
#!/usr/bin/env python

import numpy as np
import pandas as pd
import matplotlib
import datetime as dt
import math
from IPython.display import display, Latex

In [2]:
## --- Data Wrangling ---

# Group A
VFIAX = pd.read_csv("Data/VFIAX.csv")
VFIAX.columns = ['Date','Open','High','Low','Close','VFIAX Close','Volume']
VBTLX = pd.read_csv("Data/VBTLX.csv")
VBTLX.columns = ['Date','Open','High','Low','Close','VBTLX Close','Volume']
VGSLX = pd.read_csv("Data/VGSLX.csv")
VGSLX.columns = ['Date','Open','High','Low','Close','VGSLX Close','Volume']

# Group B
VIMAX = pd.read_csv("Data/VIMAX.csv")
VIMAX.columns = ['Date','Open','High','Low','Close','VIMAX Close','Volume']
VSMAX = pd.read_csv("Data/VSMAX.csv")
VSMAX.columns = ['Date','Open','High','Low','Close','VSMAX Close','Volume']
VGHCX = pd.read_csv("Data/VGHCX.csv")
VGHCX.columns = ['Date','Open','High','Low','Close','VGHCX Close','Volume']

# Group C
AMZN = pd.read_csv("Data/AMZN.csv")
AMZN.columns = ['Date','Open','High','Low','Close','AMZN Close','Volume']
WMT = pd.read_csv("Data/WMT.csv")
WMT.columns = ['Date','Open','High','Low','Close','WMT Close','Volume']
CVS = pd.read_csv("Data/CVS.csv")
CVS.columns = ['Date','Open','High','Low','Close','CVS Close','Volume']


In [3]:

## --- Assemble -- code into a dataframe for Close of Day ---

close = pd.concat([VFIAX['Date'], VFIAX['VFIAX Close'], VBTLX['VBTLX Close'], VGSLX['VGSLX Close'], VIMAX['VIMAX Close'], VSMAX['VSMAX Close'], VGHCX['VGHCX Close'], AMZN['AMZN Close'], WMT['WMT Close'], CVS['CVS Close'] ], axis=1)
#print(close)

## --- generate mean daily return ---

dailyReturn = pd.DataFrame(columns = ['Date', 'VFIAX Daily Return','VBTLX Daily Return','VGSLX Daily Return', 'VIMAX Daily Return', 'VSMAX Daily Return', 'VGHCX Daily Return','AMZN Daily Return', 'WMT Daily Return','CVS Daily Return'])
for index, row in close.iterrows():
    if index == 0: continue
    #print((close['VFIAX Close'][index] - close['VFIAX Close'][index-1])/ (close['VFIAX Close'][index-1]))
    dailyReturn = dailyReturn.append({'Date': close['Date'][index],
                'VFIAX Daily Return': ((close['VFIAX Close'][index] - close['VFIAX Close'][index-1])/(close['VFIAX Close'][index-1])),
                'VBTLX Daily Return': ((close['VBTLX Close'][index] - close['VBTLX Close'][index-1])/(close['VBTLX Close'][index-1])),
                'VGSLX Daily Return': ((close['VGSLX Close'][index] - close['VGSLX Close'][index-1])/(close['VGSLX Close'][index-1])),
                'VIMAX Daily Return': ((close['VIMAX Close'][index] - close['VIMAX Close'][index-1])/(close['VIMAX Close'][index-1])),
                'VSMAX Daily Return': ((close['VSMAX Close'][index] - close['VSMAX Close'][index-1])/(close['VSMAX Close'][index-1])),
                'VGHCX Daily Return': ((close['VGHCX Close'][index] - close['VGHCX Close'][index-1])/(close['VGHCX Close'][index-1])),
                'AMZN Daily Return': ((close['AMZN Close'][index] - close['AMZN Close'][index-1])/(close['AMZN Close'][index-1])),
                'WMT Daily Return': ((close['WMT Close'][index] - close['WMT Close'][index-1])/(close['WMT Close'][index-1])),
                'CVS Daily Return': ((close['CVS Close'][index] - close['CVS Close'][index-1])/(close['CVS Close'][index-1]))},ignore_index=True)



In [26]:

## --- Gets year by rolling quarters ---
frames = 0
yearStart = pd.to_datetime(dailyReturn.Date.min() , yearfirst= True) - pd.tseries.offsets.DateOffset(days=1)
yearEnd = pd.to_datetime('2016-12-30', yearfirst= True)

rollingQuarterData = []

while frames < 15:
    mask = (pd.to_datetime(dailyReturn['Date'], yearfirst= True) >= (yearStart )) & (pd.to_datetime(dailyReturn['Date'], yearfirst= True) <= yearEnd)
    currYear = pd.DataFrame(dailyReturn.loc[mask])
    rollingQuarterData = rollingQuarterData + [currYear]
    yearStart = yearStart + pd.tseries.offsets.QuarterEnd()
    yearEnd = yearEnd + pd.tseries.offsets.QuarterEnd()
    frames += 1

In [36]:
def splitYearToQuarters(df):
    frames = 0
    quarterStart = pd.to_datetime(dailyReturn.Date.min() , yearfirst= True) - pd.tseries.offsets.DateOffset(days=1)
    quarterEnd = quarterStart + pd.tseries.offsets.QuarterEnd()

    quarters = []

    while frames < 4:
        mask = (pd.to_datetime(df['Date'], yearfirst= True) >= (quarterStart )) & (pd.to_datetime(df['Date'], yearfirst= True) <= quarterEnd)
        currQuarter = pd.DataFrame(df.loc[mask])
        quarters = quarters + [currQuarter]
        quarterStart = quarterStart + pd.tseries.offsets.QuarterEnd()
        quarterEnd = quarterEnd + pd.tseries.offsets.QuarterEnd()
        frames += 1
    return quarters

In [37]:
print(splitYearToQuarters(rollingQuarterData[0]))

[          Date  VFIAX Daily Return  VBTLX Daily Return  VGSLX Daily Return  \
0   2016-01-04           -0.015068            0.001880           -0.012746   
1   2016-01-05            0.001993           -0.000938            0.020262   
2   2016-01-06           -0.012741            0.003756           -0.003603   
3   2016-01-07           -0.023688            0.000936           -0.019137   
4   2016-01-08           -0.010820            0.001869           -0.013487   
..         ...                 ...                 ...                 ...   
56  2016-03-24           -0.000373           -0.000922            0.000611   
57  2016-03-28            0.000533            0.000000            0.009154   
58  2016-03-29            0.009051            0.004613            0.022116   
59  2016-03-30            0.004432           -0.000918           -0.002789   
60  2016-03-31           -0.002048            0.003960            0.006442   

    VIMAX Daily Return  VSMAX Daily Return  VGHCX Daily Return