In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px

In [16]:
confirmed_cases_file_link = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv"
death_cases_file_link = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv"

In [17]:
confirmed_df = pd.read_csv(confirmed_cases_file_link)
print(confirmed_df.shape)
deaths_df = pd.read_csv(death_cases_file_link)
print(deaths_df.shape)

(3261, 179)
(3261, 180)


In [18]:
confirmed_df.head()

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,...,6/28/20,6/29/20,6/30/20,7/1/20,7/2/20,7/3/20,7/4/20,7/5/20,7/6/20,7/7/20
0,16,AS,ASM,16,60.0,,American Samoa,US,-14.271,-170.132,...,0,0,0,0,0,0,0,0,0,0
1,316,GU,GUM,316,66.0,,Guam,US,13.4443,144.7937,...,247,253,257,267,280,280,280,280,301,303
2,580,MP,MNP,580,69.0,,Northern Mariana Islands,US,15.0979,145.6739,...,30,30,30,30,31,31,31,31,31,31
3,630,PR,PRI,630,72.0,,Puerto Rico,US,18.2208,-66.5901,...,7189,7250,7465,7537,7608,7683,7787,7916,8585,8714
4,850,VI,VIR,850,78.0,,Virgin Islands,US,18.3358,-64.8963,...,81,81,81,90,92,98,111,111,112,116


In [19]:
deaths_df.head()

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,...,6/28/20,6/29/20,6/30/20,7/1/20,7/2/20,7/3/20,7/4/20,7/5/20,7/6/20,7/7/20
0,16,AS,ASM,16,60.0,,American Samoa,US,-14.271,-170.132,...,0,0,0,0,0,0,0,0,0,0
1,316,GU,GUM,316,66.0,,Guam,US,13.4443,144.7937,...,5,5,5,5,5,5,5,5,5,5
2,580,MP,MNP,580,69.0,,Northern Mariana Islands,US,15.0979,145.6739,...,2,2,2,2,2,2,2,2,2,2
3,630,PR,PRI,630,72.0,,Puerto Rico,US,18.2208,-66.5901,...,153,153,153,153,153,154,155,155,155,157
4,850,VI,VIR,850,78.0,,Virgin Islands,US,18.3358,-64.8963,...,6,6,6,6,6,6,6,6,6,6


In [20]:
confirmed_df[confirmed_df["Province_State"]=="Virgin Islands"]

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,...,6/28/20,6/29/20,6/30/20,7/1/20,7/2/20,7/3/20,7/4/20,7/5/20,7/6/20,7/7/20
4,850,VI,VIR,850,78.0,,Virgin Islands,US,18.3358,-64.8963,...,81,81,81,90,92,98,111,111,112,116


In [21]:
deaths_df[deaths_df['Province_State']=='Puerto Rico']

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,...,6/28/20,6/29/20,6/30/20,7/1/20,7/2/20,7/3/20,7/4/20,7/5/20,7/6/20,7/7/20
3,630,PR,PRI,630,72.0,,Puerto Rico,US,18.2208,-66.5901,...,153,153,153,153,153,154,155,155,155,157


In [22]:
confirmed_df = confirmed_df.replace(np.nan, '', regex=True)
deaths_df = deaths_df.replace(np.nan, '', regex=True)

## Exploratory Analysis

In [23]:
confirmed_df.columns

Index(['UID', 'iso2', 'iso3', 'code3', 'FIPS', 'Admin2', 'Province_State',
       'Country_Region', 'Lat', 'Long_',
       ...
       '6/28/20', '6/29/20', '6/30/20', '7/1/20', '7/2/20', '7/3/20', '7/4/20',
       '7/5/20', '7/6/20', '7/7/20'],
      dtype='object', length=179)

In [24]:
confirmed_df_data = confirmed_df.copy().drop(['Lat', 'Long_', 'Country_Region', 'iso2', 'iso3', 'code3', 'FIPS', 'Admin2', 'Province_State', 'Lat', 'Long_'],axis =1)
confirmed_df_summary = confirmed_df.sum()

In [25]:
fig_1 = go.Figure(data=go.Scatter(x=confirmed_df_summary.index, y = confirmed_df_summary.values, mode='lines+markers'))
fig_1.update_layout(title='Total Coronavirus Confirmed Cases (USA)',
                   yaxis_title='Confirmed Cases', xaxis_tickangle = 315)
fig_1.show()

In [26]:
deaths_df.columns

Index(['UID', 'iso2', 'iso3', 'code3', 'FIPS', 'Admin2', 'Province_State',
       'Country_Region', 'Lat', 'Long_',
       ...
       '6/28/20', '6/29/20', '6/30/20', '7/1/20', '7/2/20', '7/3/20', '7/4/20',
       '7/5/20', '7/6/20', '7/7/20'],
      dtype='object', length=180)

In [27]:
deaths_df_data = deaths_df.copy().drop(['Lat', 'Long_', 'Country_Region', 'UID', 'iso2', 'iso3', 'code3', 'FIPS', 'Admin2', 'Province_State'],axis =1)
deaths_df_summary = deaths_df.sum()

In [28]:
fig_2 = go.Figure(data=go.Scatter(x=deaths_df_summary.index, y = deaths_df_summary.values, mode='lines+markers'))
fig_2.update_layout(title='Total Coronavirus Deaths Cases (USA)',
                   yaxis_title='Deaths Cases', xaxis_tickangle = 315)
fig_2.show()