In [1]:
import pandas as pd
import numpy as np
import datetime

In [2]:
# Read the Dow CSV
csv_path = "Resources/Dow.csv"
dow_data = pd.read_csv(csv_path)
dow_data.head(10)

Unnamed: 0,Date,Value
0,9/2/1977,872.31
1,9/9/1977,857.04
2,9/16/1977,856.81
3,9/23/1977,839.14
4,9/30/1977,847.11
5,10/7/1977,840.35
6,10/14/1977,821.64
7,10/21/1977,808.3
8,10/28/1977,822.68
9,11/4/1977,809.94


In [3]:
# Check if there are any values missing
dow_data.count()

Date     2082
Value    2082
dtype: int64

In [4]:
# Split the Date Column by '/' to help find the change of value per year.
dow_data[['Month','Day','Year']] = dow_data.Date.str.split("/",expand=True)
dow_data.head()

Unnamed: 0,Date,Value,Month,Day,Year
0,9/2/1977,872.31,9,2,1977
1,9/9/1977,857.04,9,9,1977
2,9/16/1977,856.81,9,16,1977
3,9/23/1977,839.14,9,23,1977
4,9/30/1977,847.11,9,30,1977


In [5]:
# Organize the new DF
new_dow_data= dow_data[['Year','Month','Day','Value']].copy()
new_dow_data["Year"]=new_dow_data["Year"].astype(int)
new_dow_data["Month"]=new_dow_data["Month"].astype(int)
new_dow_data["Day"]=new_dow_data["Day"].astype(int)
new_dow_data["Value"]=new_dow_data["Value"].astype(float)

In [30]:
# 2016-1996 of years
clean_dow_data2= new_dow_data.loc[(new_dow_data["Year"]>1985)&(new_dow_data["Year"]<2017)]
clean_dow_data2.head()

Unnamed: 0,Year,Month,Day,Value
435,1986,1,3,1549.2
436,1986,1,10,1513.53
437,1986,1,17,1536.7
438,1986,1,24,1529.93
439,1986,1,31,1570.99


In [7]:
clean_dow_data= clean_dow_data.loc[(clean_dow_data["Month"]==1)|(clean_dow_data["Month"]==12)]
clean_dow_data.head()

Unnamed: 0,Year,Month,Day,Value
435,1986,1,3,1549.2
436,1986,1,10,1513.53
437,1986,1,17,1536.7
438,1986,1,24,1529.93
439,1986,1,31,1570.99


In [8]:
jan_data= clean_dow_data.loc[(clean_dow_data["Month"]==1)]
jan_data.head()

Unnamed: 0,Year,Month,Day,Value
435,1986,1,3,1549.2
436,1986,1,10,1513.53
437,1986,1,17,1536.7
438,1986,1,24,1529.93
439,1986,1,31,1570.99


In [9]:
jan_dow_group = jan_data.groupby(["Year","Month"])
min_jan = jan_dow_group.min()
final_jan = pd.merge(min_jan,jan_data, on=["Year", "Month", "Day"])
final_jan = final_jan[["Year", "Month", "Value_y"]]
final_jan= final_jan.rename(columns={"Value_y":"Value"})
final_jan.head()

Unnamed: 0,Year,Month,Value
0,1986,1,1549.2
1,1987,1,1927.31
2,1988,1,1911.31
3,1989,1,2194.29
4,1990,1,2773.25


In [10]:
dec_data= clean_dow_data.loc[(clean_dow_data["Month"]==12)]
dec_data.head()

Unnamed: 0,Year,Month,Day,Value
483,1986,12,5,1925.06
484,1986,12,12,1912.26
485,1986,12,19,1928.85
486,1986,12,26,1930.4
535,1987,12,4,1766.74


In [11]:
dec_dow_group = dec_data.groupby(["Year","Month"])
max_dec = dec_dow_group.max()
final_dec = pd.merge(max_dec,dec_data, on=["Year", "Month", "Day"])
final_dec = final_dec[["Year", "Month", "Value_y"]]
final_dec= final_dec.rename(columns={"Value_y":"Value"})
final_dec.head()

Unnamed: 0,Year,Month,Value
0,1986,12,1930.4
1,1987,12,1938.83
2,1988,12,2168.57
3,1989,12,2753.2
4,1990,12,2629.21


In [12]:
dfs=[final_jan,final_dec]
combined_dow_df= pd.concat(dfs)
combined_dow_df

Unnamed: 0,Year,Month,Value
0,1986,1,1549.20
1,1987,1,1927.31
2,1988,1,1911.31
3,1989,1,2194.29
4,1990,1,2773.25
...,...,...,...
26,2012,12,12938.11
27,2013,12,16478.41
28,2014,12,18053.71
29,2015,12,17425.03


In [13]:
combined_dow_df=combined_dow_df.sort_values(by=['Year','Month'],ascending=False).reset_index(drop=True)
start_value=0
combined_dow_df["Year Change"]=''
for index, row in combined_dow_df.iterrows():
    change =  start_value - row['Value']
    start_value = row['Value']
    combined_dow_df.loc[index, 'Year Change'] = change

combined_dow_df=combined_dow_df.loc[combined_dow_df["Month"]==1].reset_index(drop=True)
combined_dow_df=combined_dow_df[["Year","Year Change"]]
combined_dow_df

Unnamed: 0,Year,Year Change
0,2016,3416.15
1,2015,-407.96
2,2014,1583.72
3,2013,3043.2
4,2012,578.19
5,2011,542.8
6,2010,959.32
7,2009,1294.2
8,2008,-4284.63
9,2007,967.86


In [14]:
combined_dow_df.to_csv("outputs/clean_dow.csv", index=False)

In [31]:
clean_dow_data2.head(6)

Unnamed: 0,Year,Month,Day,Value
435,1986,1,3,1549.2
436,1986,1,10,1513.53
437,1986,1,17,1536.7
438,1986,1,24,1529.93
439,1986,1,31,1570.99
440,1986,2,7,1613.42


In [32]:
dow_2008 = clean_dow_data.loc[(clean_dow_data["Year"]==2008)]
dow_2007_dec = clean_dow_data.loc[(clean_dow_data["Year"]==2007)&
                                  (clean_dow_data["Month"]==12)]

dataframes2 = [dow_2008,dow_2007_dec]                              
dow_2008 = pd.concat(dataframes2)
dow_2008

Unnamed: 0,Year,Month,Day,Value
1582,2008,1,4,12800.18
1583,2008,1,11,12606.3
1584,2008,1,18,12099.3
1585,2008,1,25,12207.17
1586,2008,2,1,12743.19
1587,2008,2,8,12182.13
1588,2008,2,15,12348.21
1589,2008,2,22,12381.02
1590,2008,2,29,12266.39
1591,2008,3,7,11893.69


In [34]:
dow_2008_groupby = dow_2008.groupby(["Year","Month"])

dow_2008_groupby = dow_2008_groupby.min()
dow_2008_groupby

Unnamed: 0_level_0,Unnamed: 1_level_0,Day,Value
Year,Month,Unnamed: 2_level_1,Unnamed: 3_level_1
2007,12,7,13339.85
2008,1,4,12099.3
2008,2,1,12182.13
2008,3,7,11893.69
2008,4,4,12325.42
2008,5,2,12479.63
2008,6,6,11346.51
2008,7,3,11100.54
2008,8,1,11326.32
2008,9,5,11143.13


In [47]:
dow_2008_2 = pd.merge(dow_2008,dow_2008_groupby, on=["Year", "Month", "Day"])
dow_2008_2 = dow_2008_2[['Year','Month','Value_y']] 
dow_2008_2 = dow_2008_2.sort_values(by=["Year","Month"],ascending=True)
dow_2008_2 = dow_2008_2.rename(columns={"Value_y":"Value"})
dow_2008_2

Unnamed: 0,Year,Month,Value
12,2007,12,13339.85
0,2008,1,12099.3
1,2008,2,12182.13
2,2008,3,11893.69
3,2008,4,12325.42
4,2008,5,12479.63
5,2008,6,11346.51
6,2008,7,11100.54
7,2008,8,11326.32
8,2008,9,11143.13


In [48]:
dow_2008_2['Monthly Change'] = ""

dow_2008_2['Value'] = dow_2008_2['Value'].astype(float)



# use iterrows to iterate through pandas dataframe
start_value = 0

for index, row in dow_2008_2.iterrows():
    
    change =  row['Value'] - start_value
    start_value = row['Value']
    dow_2008_2.loc[index, 'Monthly Change'] = change
    
dow_2008_2 = dow_2008_2.loc[(dow_2008_2["Year"]==2008)]

dow_2008_2

Unnamed: 0,Year,Month,Value,Monthly Change
0,2008,1,12099.3,-1240.55
1,2008,2,12182.13,82.83
2,2008,3,11893.69,-288.44
3,2008,4,12325.42,431.73
4,2008,5,12479.63,154.21
5,2008,6,11346.51,-1133.12
6,2008,7,11100.54,-245.97
7,2008,8,11326.32,225.78
8,2008,9,11143.13,-183.19
9,2008,10,8378.95,-2764.18


In [49]:
dow_2008_2.to_csv("outputs/dow_2008.csv",index=False)