In [41]:
import pandas as pd
import numpy as np
from datetime import datetime
import plotly.express as px

## Combine and Tidy Data Sources

In [42]:
djia = pd.read_csv("data/^DJI.csv")
confirmed = pd.read_csv("data/time_series_19-covid-Confirmed.csv")
deaths = pd.read_csv("data/time_series_19-covid-Deaths.csv")
recovered = pd.read_csv("data/time_series_19-covid-Recovered.csv")

In [43]:
djia = djia[djia['Date'] >= '2019-12-01']

In [44]:
# tidy 3 dfs
melt_ids = ['Province/State', 'Country/Region', 'Lat', 'Long']

tidy_confirmed = confirmed.melt(id_vars=melt_ids, 
                               value_vars=[c for c in confirmed.columns \
                                           if c not in ['Province/State', 'Country/Region', 'Lat', 'Long']], 
                               var_name='Date', value_name='Confirmed')

tidy_deaths = deaths.melt(id_vars=melt_ids, 
                               value_vars=[c for c in confirmed.columns \
                                           if c not in ['Province/State', 'Country/Region', 'Lat', 'Long']], 
                               var_name='Date', value_name='Deaths')

tidy_recovered = recovered.melt(id_vars=melt_ids, 
                                value_vars=[c for c in confirmed.columns \
                                           if c not in ['Province/State', 'Country/Region', 'Lat', 'Long']], 
                                var_name='Date', value_name='Recovered')


# merge 3 dfs
melt_ids.append('Date')
df = pd.merge(pd.merge(tidy_confirmed, tidy_deaths, on=melt_ids), 
                         tidy_recovered, on=melt_ids)

# tidy merged df
tidy_df = df.melt(id_vars=melt_ids, 
                  value_vars=['Confirmed', 'Deaths', 'Recovered'],
                  var_name='Type', 
                  value_name='Count')

# format date col
tidy_df['Date']=tidy_df['Date'].apply(lambda x: datetime.strptime(x, '%m/%d/%y'))

## Plot

In [45]:
tidy_df = tidy_df.drop(['Lat', 'Long'],axis=1).groupby(['Country/Region', 'Type', 'Date']).sum().reset_index()

In [46]:
tidy_df.sample(5)

Unnamed: 0,Country/Region,Type,Date,Count
13060,Togo,Deaths,2020-03-04,0
2452,Chile,Recovered,2020-02-05,0
1671,Bosnia and Herzegovina,Confirmed,2020-02-06,0
8030,Mexico,Confirmed,2020-02-17,0
5554,Iran,Confirmed,2020-02-25,95


In [49]:
tidy_df['US'] = tidy_df['Country/Region'] == 'US'
tidy_df['US'].replace(True, 'US', inplace=True)
tidy_df['US'].replace(False, 'Non-US', inplace=True)

In [59]:
tidy_df = tidy_df.groupby(['US', 'Date', 'Type']).sum().reset_index()
px.scatter(tidy_df[tidy_df['Type']=='Confirmed'], 
           x='Date', 
           y='Count', 
           color='US')