In [4]:
import pandas as pd
import numpy as np
import datetime

In [5]:
# Read the Dow CSV
csv_path = "Resources/Dow.csv"
dow_data = pd.read_csv(csv_path)
dow_data.head(10)

Unnamed: 0,Date,Value
0,9/2/1977,872.31
1,9/9/1977,857.04
2,9/16/1977,856.81
3,9/23/1977,839.14
4,9/30/1977,847.11
5,10/7/1977,840.35
6,10/14/1977,821.64
7,10/21/1977,808.3
8,10/28/1977,822.68
9,11/4/1977,809.94


In [6]:
# Check if there are any values missing
dow_data.count()

Date     2082
Value    2082
dtype: int64

In [7]:
# Split the Date Column by '/' to help find the change of value per year.
dow_data[['Month','Day','Year']] = dow_data.Date.str.split("/",expand=True)
dow_data.head()

Unnamed: 0,Date,Value,Month,Day,Year
0,9/2/1977,872.31,9,2,1977
1,9/9/1977,857.04,9,9,1977
2,9/16/1977,856.81,9,16,1977
3,9/23/1977,839.14,9,23,1977
4,9/30/1977,847.11,9,30,1977


In [8]:
# Organize the new DF
new_dow_data= dow_data[['Year','Month','Day','Value']].copy()
new_dow_data["Year"]=new_dow_data["Year"].astype(int)
new_dow_data["Month"]=new_dow_data["Month"].astype(int)
new_dow_data["Day"]=new_dow_data["Day"].astype(int)
new_dow_data["Value"]=new_dow_data["Value"].astype(float)

In [9]:
# 2016-1996 of years
clean_dow_data= new_dow_data.loc[(new_dow_data["Year"]>1995)&(new_dow_data["Year"]<2017)]
clean_dow_data.head()

Unnamed: 0,Year,Month,Day,Value
957,1996,1,5,5181.43
958,1996,1,12,5061.12
959,1996,1,19,5184.68
960,1996,1,26,5271.75
961,1996,2,2,5373.99


In [10]:
clean_dow_data= clean_dow_data.loc[(clean_dow_data["Month"]==1)|(clean_dow_data["Month"]==12)]
clean_dow_data.head()

Unnamed: 0,Year,Month,Day,Value
957,1996,1,5,5181.43
958,1996,1,12,5061.12
959,1996,1,19,5184.68
960,1996,1,26,5271.75
1005,1996,12,6,6381.94


In [11]:
jan_data= clean_dow_data.loc[(clean_dow_data["Month"]==1)]
jan_data.head()

Unnamed: 0,Year,Month,Day,Value
957,1996,1,5,5181.43
958,1996,1,12,5061.12
959,1996,1,19,5184.68
960,1996,1,26,5271.75
1009,1997,1,3,6544.09


In [12]:
jan_dow_group = jan_data.groupby(["Year","Month"])
min_jan = jan_dow_group.min()
final_jan = pd.merge(min_jan,jan_data, on=["Year", "Month", "Day"])
final_jan = final_jan[["Year", "Month", "Value_y"]]
final_jan= final_jan.rename(columns={"Value_y":"Value"})
final_jan.head()

Unnamed: 0,Year,Month,Value
0,1996,1,5181.43
1,1997,1,6544.09
2,1998,1,7965.04
3,1999,1,9643.32
4,2000,1,11522.56


In [13]:
dec_data= clean_dow_data.loc[(clean_dow_data["Month"]==12)]
dec_data.head()

Unnamed: 0,Year,Month,Day,Value
1005,1996,12,6,6381.94
1006,1996,12,13,6304.87
1007,1996,12,20,6484.4
1008,1996,12,27,6560.91
1057,1997,12,5,8149.13


In [14]:
dec_dow_group = dec_data.groupby(["Year","Month"])
max_dec = dec_dow_group.max()
final_dec = pd.merge(max_dec,dec_data, on=["Year", "Month", "Day"])
final_dec = final_dec[["Year", "Month", "Value_y"]]
final_dec= final_dec.rename(columns={"Value_y":"Value"})
final_dec.head()

Unnamed: 0,Year,Month,Value
0,1996,12,6560.91
1,1997,12,7679.31
2,1998,12,9181.43
3,1999,12,11497.12
4,2000,12,10786.85


In [15]:
dfs=[final_jan,final_dec]
combined_dow_df= pd.concat(dfs)
combined_dow_df

Unnamed: 0,Year,Month,Value
0,1996,1,5181.43
1,1997,1,6544.09
2,1998,1,7965.04
3,1999,1,9643.32
4,2000,1,11522.56
5,2001,1,10662.01
6,2002,1,10259.74
7,2003,1,8601.69
8,2004,1,10409.85
9,2005,1,10603.96


In [16]:
combined_dow_df=combined_dow_df.sort_values(by=['Year','Month'],ascending=False).reset_index(drop=True)
start_value=0
combined_dow_df["Year Change"]=''
for index, row in combined_dow_df.iterrows():
    change =  start_value - row['Value']
    start_value = row['Value']
    combined_dow_df.loc[index, 'Year Change'] = change

combined_dow_df=combined_dow_df.loc[combined_dow_df["Month"]==1].reset_index(drop=True)
combined_dow_df=combined_dow_df[["Year","Year Change"]]
combined_dow_df

Unnamed: 0,Year,Year Change
0,2016,3416.15
1,2015,-407.96
2,2014,1583.72
3,2013,3043.2
4,2012,578.19
5,2011,542.8
6,2010,959.32
7,2009,1294.2
8,2008,-4284.63
9,2007,967.86


In [None]:
combined_dow_df.to_csv