In [1]:
# Import Dependencies and setup
import pandas as pd
from sqlalchemy import create_engine
import time
import requests
import json

##### Extraction

In [2]:
# Extract CSV into pandas df
df_ca = pd.read_csv('../resources/california-history.csv')
df_ca.head()

Unnamed: 0,date,state,death,deathConfirmed,deathIncrease,deathProbable,hospitalized,hospitalizedCumulative,hospitalizedCurrently,hospitalizedIncrease,...,totalTestResults,totalTestResultsIncrease,totalTestsAntibody,totalTestsAntigen,totalTestsPeopleAntibody,totalTestsPeopleAntigen,totalTestsPeopleViral,totalTestsPeopleViralIncrease,totalTestsViral,totalTestsViralIncrease
0,2021-03-07,CA,54124.0,,258,,,,4291.0,0,...,49646014,133186,,,,,,0,49646014,133186
1,2021-03-06,CA,53866.0,,418,,,,4513.0,0,...,49512828,218325,,,,,,0,49512828,218325
2,2021-03-05,CA,53448.0,,400,,,,4714.0,0,...,49294503,146818,,,,,,0,49294503,146818
3,2021-03-04,CA,53048.0,,273,,,,4967.0,0,...,49147685,119637,,,,,,0,49147685,119637
4,2021-03-03,CA,52775.0,,278,,,,5110.0,0,...,49028048,130858,,,,,,0,49028048,130858


##### Drop Columns from Data

In [3]:
df_ca = df_ca[['date', 'state', 'deathIncrease', 'inIcuCurrently', 'positiveCasesViral', 'positiveIncrease', \
               'totalTestResults', 'totalTestResultsIncrease']]
df_ca.head()

Unnamed: 0,date,state,deathIncrease,inIcuCurrently,positiveCasesViral,positiveIncrease,totalTestResults,totalTestResultsIncrease
0,2021-03-07,CA,258,1159.0,3501394,3816,49646014,133186
1,2021-03-06,CA,418,1221.0,3497578,4452,49512828,218325
2,2021-03-05,CA,400,1236.0,3493126,4659,49294503,146818
3,2021-03-04,CA,273,1327.0,3488467,3504,49147685,119637
4,2021-03-03,CA,278,1403.0,3484963,3352,49028048,130858


##### Rename Columns

In [ ]:
clean_df_ca = df_ca.rename(columns={'deathIncrease' : 'deaths', 'hospitalizedIncrease' : 'daily hospitalization', \
                                   'inIcuCurrently' : 'Icu hospitalized', 'positiveCasesViral' : 'positive cases viral', \
                                    'positiveIncrease' : 'positive increase', 'totalTestResults' : 'test results total', \
                                    'totalTestResultsIncrease' : 'test increase'})
clean_df_ca.head()

Unnamed: 0,date,state,deaths,Icu hospitalized,positive cases viral,positive increase,test results total,test increase
0,2021-03-07,CA,258,1159.0,3501394,3816,49646014,133186
1,2021-03-06,CA,418,1221.0,3497578,4452,49512828,218325
2,2021-03-05,CA,400,1236.0,3493126,4659,49294503,146818
3,2021-03-04,CA,273,1327.0,3488467,3504,49147685,119637
4,2021-03-03,CA,278,1403.0,3484963,3352,49028048,130858


##### Replace NaN or Missing Values with 0

In [6]:
clean_df_ca.fillna(0)

Unnamed: 0,date,state,deaths,Icu hospitalized,positiveCasesViral,positive increase,test results total,test increase
0,2021-03-07,CA,258,1159.0,3501394,3816,49646014,133186
1,2021-03-06,CA,418,1221.0,3497578,4452,49512828,218325
2,2021-03-05,CA,400,1236.0,3493126,4659,49294503,146818
3,2021-03-04,CA,273,1327.0,3488467,3504,49147685,119637
4,2021-03-03,CA,278,1403.0,3484963,3352,49028048,130858
...,...,...,...,...,...,...,...,...
364,2020-03-08,CA,0,0.0,88,19,550,19
365,2020-03-07,CA,0,0.0,69,9,531,9
366,2020-03-06,CA,0,0.0,60,7,522,7
367,2020-03-05,CA,0,0.0,53,0,515,0
