In [16]:
#dependencies
import pandas as pd
import numpy as np
import datetime as dt
from sqlalchemy import create_engine

In [17]:
#store djia market data from CSV as a dataframe
file_dji = './Resources/DJI.csv'
dji_df = pd.read_csv(file_dji)
dji_df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2000-01-03,11501.849609,11522.009766,11305.69043,11357.509766,11357.509766,169750000
1,2000-01-04,11349.75,11350.05957,10986.450195,10997.929688,10997.929688,178420000
2,2000-01-05,10989.370117,11215.099609,10938.669922,11122.650391,11122.650391,203190000
3,2000-01-06,11113.370117,11313.450195,11098.450195,11253.259766,11253.259766,176550000
4,2000-01-07,11247.05957,11528.139648,11239.919922,11522.55957,11522.55957,184900000


In [20]:
#transform data as-needed
dji_df['DATE'] = pd.to_datetime(dji_df['Date'])
dji_df = dji_df.set_index('DATE')

#exception handle for re-runs
try:
    del dji_df['Date']
    del dji_df['Open']
    del dji_df['High']
    del dji_df['Low']
    del dji_df['Adj Close']
    del dji_df['Volume']
except KeyError:
    print('One or more keys not available for deletion (potential re-run)')
    pass

dji_df = dji_df.rename(columns={'Close':"Close_DJIA"})
dji_df.head()

Unnamed: 0_level_0,Close_DJIA
DATE,Unnamed: 1_level_1
2000-01-03,11357.509766
2000-01-04,10997.929688
2000-01-05,11122.650391
2000-01-06,11253.259766
2000-01-07,11522.55957


In [106]:
test_df = dji_df.loc[sars_grouped_df.index[0]:sars_grouped_df.index[-1]]
test2_df = pd.merge(test_df,sars_grouped_df,left_index=True, right_index=True,how='outer')
test2_df.columns = ['Market','Death']
test2_df

Unnamed: 0,Market,Death
2003-03-17,8141.919922,4
2003-03-18,8194.230469,4
2003-03-19,8265.450195,9
2003-03-20,8286.599609,10
2003-03-21,8521.969727,10
...,...,...
2003-07-07,9216.790039,812
2003-07-08,9223.089844,812
2003-07-09,9156.209961,812
2003-07-10,9036.040039,812


In [108]:
test2_df.Market - test2_df.Market.shift(1)

2003-03-17           NaN
2003-03-18     52.310547
2003-03-19     71.219726
2003-03-20     21.149414
2003-03-21    235.370118
                 ...    
2003-07-07           NaN
2003-07-08      6.299805
2003-07-09    -66.879883
2003-07-10   -120.169922
2003-07-11     83.549805
Name: Market, Length: 96, dtype: float64

In [87]:
print(sars_grouped_df.head())
print(sars_grouped_df.tail())

            Number of deaths
Date                        
2003-03-17                 4
2003-03-18                 4
2003-03-19                 9
2003-03-20                10
2003-03-21                10
            Number of deaths
Date                        
2003-07-07               812
2003-07-08               812
2003-07-09               812
2003-07-10               812
2003-07-11               813


In [49]:
#store djia market data from CSV as a dataframe
file_nasdaq = './Resources/NASDAQCOM.csv'
nasdaq_df = pd.read_csv(file_nasdaq)
nasdaq_df.head()

Unnamed: 0,DATE,NASDAQCOM
0,2000-01-03,4131.15
1,2000-01-04,3901.69
2,2000-01-05,3877.54
3,2000-01-06,3727.13
4,2000-01-07,3882.62


In [51]:
#transform data as-needed
nasdaq_df = nasdaq_df.rename(columns={'NASDAQCOM': 'Close_NASDAQCOMPOSITE','DATE':'Date'})
nasdaq_df = nasdaq_df.set_index('Date')
nasdaq_df.head()

Unnamed: 0_level_0,Close_NASDAQCOMPOSITE
Date,Unnamed: 1_level_1
2000-01-03,4131.15
2000-01-04,3901.69
2000-01-05,3877.54
2000-01-06,3727.13
2000-01-07,3882.62


In [44]:
#store SARS data from CSV as a dataframe
file_sars = './Resources/sars_2003_complete_dataset_clean.csv'
sars_df = pd.read_csv(file_sars)
sars_df.head()

Unnamed: 0,Date,Country,Cumulative number of case(s),Number of deaths,Number recovered
0,2003-03-17,Germany,1,0,0
1,2003-03-17,Canada,8,2,0
2,2003-03-17,Singapore,20,0,0
3,2003-03-17,"Hong Kong SAR, China",95,1,0
4,2003-03-17,Switzerland,2,0,0


In [45]:
#transform data as-needed
del sars_df['Cumulative number of case(s)']
del sars_df['Number recovered']
sars_grouped_df = pd.DataFrame(sars_df.groupby('Date').sum())
sars_grouped_df.head()

Unnamed: 0_level_0,Number of deaths
Date,Unnamed: 1_level_1
2003-03-17,4
2003-03-18,4
2003-03-19,9
2003-03-20,10
2003-03-21,10


In [46]:
#store H1N1 data from CSV as a dataframe
file_h1n1 = './Resources/Pandemic (H1N1) 2009.csv'
h1n1_df = pd.read_csv(file_h1n1, encoding = 'latin1')
h1n1_df.head()

Unnamed: 0,Country,Cases,Deaths,Update Time
0,Algeria,5,0.0,7/6/2009 9:00
1,Antigua and Barbuda,2,0.0,7/6/2009 9:00
2,Argentina,2485,60.0,7/6/2009 9:00
3,Australia,5298,10.0,7/6/2009 9:00
4,Austria,19,0.0,7/6/2009 9:00


In [47]:
#transform data as-needed
h1n1_df['DateTime'] = pd.to_datetime(h1n1_df['Update Time'])
h1n1_df['Date'] = h1n1_df['DateTime'].dt.date
h1n1_groupby_df = pd.DataFrame(h1n1_df.groupby('Date').sum())
h1n1_groupby_df.head()

Unnamed: 0_level_0,Cases,Deaths
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2009-05-23,24043,172.0
2009-05-25,25029,182.0
2009-05-26,25904,184.0
2009-05-27,26792,190.0
2009-05-29,31011,198.0


In [7]:
#store Ebola data from CSV as a dataframe
file_ebola = './Resources/ebola_2014_2016_clean.csv'
ebola_df = pd.read_csv(file_ebola)
ebola_df.head()

Unnamed: 0,Country,Date,No. of suspected cases,No. of probable cases,No. of confirmed cases,"No. of confirmed, probable and suspected cases",No. of suspected deaths,No. of probable deaths,No. of confirmed deaths,"No. of confirmed, probable and suspected deaths"
0,Guinea,2014-08-29,25.0,141.0,482.0,648.0,2.0,141.0,287.0,430.0
1,Nigeria,2014-08-29,3.0,1.0,15.0,19.0,0.0,1.0,6.0,7.0
2,Sierra Leone,2014-08-29,54.0,37.0,935.0,1026.0,8.0,34.0,380.0,422.0
3,Liberia,2014-08-29,382.0,674.0,322.0,1378.0,168.0,301.0,225.0,694.0
4,Sierra Leone,2014-09-05,78.0,37.0,1146.0,1261.0,11.0,37.0,443.0,491.0


In [None]:
#transform data as-needed

In [48]:
#store Covid-19 data from CSV as a dataframe
file_covid = './Resources/covid_19_clean_complete.csv'
covid_df = pd.read_csv(file_covid)
covid_df.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered
0,Anhui,Mainland China,31.8257,117.2264,1/22/20,1,0,0
1,Beijing,Mainland China,40.1824,116.4142,1/22/20,14,0,0
2,Chongqing,Mainland China,30.0572,107.874,1/22/20,6,0,0
3,Fujian,Mainland China,26.0789,117.9874,1/22/20,1,0,0
4,Gansu,Mainland China,36.0611,103.8343,1/22/20,0,0,0


In [None]:
#transform data as-needed

In [None]:
#connect to AWS cloud database
connection_string = ''
engine = create_engine(f'postgresql://{connection_string}')

In [None]:
#check tables
engine.table_names()

In [None]:
#use pandas to load market data df into database
<dataframe>.to_sql(name = '<name>', con = engine, if_exists = 'replace')

In [None]:
#use pandas to load SARS data df into database

In [None]:
#use pandas to load H1N1 data df into database

In [None]:
#use pandas to load Ebola data df into database

In [None]:
#use pandas to load Covid-19 data df into database

In [None]:
#query tables to validate load
<df>.read_sql_query('<query>', con = engine).head()