# Libraries

In [123]:
import pandas as pd
import numpy as np
from plotly.offline import iplot,plot
import plotly.graph_objects as go
import cufflinks as cf
cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)

# The dataset gets updated dynamically.

In [93]:
url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us.csv'
df = pd.read_csv(url, error_bad_lines=False)
df.head()

Unnamed: 0,date,cases,deaths
0,2020-01-21,1,0
1,2020-01-22,1,0
2,2020-01-23,1,0
3,2020-01-24,2,0
4,2020-01-25,3,0


In [94]:
df.tail()

Unnamed: 0,date,cases,deaths
125,2020-05-25,1670101,98190
126,2020-05-26,1688990,98937
127,2020-05-27,1707699,100422
128,2020-05-28,1730158,101622
129,2020-05-29,1754724,102812


# Data cleaning

In [95]:
df.isnull().sum()

date      0
cases     0
deaths    0
dtype: int64

The dataset has no missing values.

Change format to M/D/Y

In [151]:
df_usa_max = pd.read_csv('https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv')
df_usa_max = df_world[df_world['location'] == 'United States']

In [153]:
df_usa_max.head()

Unnamed: 0,iso_code,location,date,total_cases,new_cases,total_deaths,new_deaths,total_cases_per_million,new_cases_per_million,total_deaths_per_million,...,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cvd_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_100k
19431,USA,United States,2019-12-31,0,0,0,0,0.0,0.0,0.0,...,15.413,9.732,54225.446,1.2,151.089,10.79,19.1,24.6,,2.77
19432,USA,United States,2020-01-01,0,0,0,0,0.0,0.0,0.0,...,15.413,9.732,54225.446,1.2,151.089,10.79,19.1,24.6,,2.77
19433,USA,United States,2020-01-02,0,0,0,0,0.0,0.0,0.0,...,15.413,9.732,54225.446,1.2,151.089,10.79,19.1,24.6,,2.77
19434,USA,United States,2020-01-03,0,0,0,0,0.0,0.0,0.0,...,15.413,9.732,54225.446,1.2,151.089,10.79,19.1,24.6,,2.77
19435,USA,United States,2020-01-04,0,0,0,0,0.0,0.0,0.0,...,15.413,9.732,54225.446,1.2,151.089,10.79,19.1,24.6,,2.77


Unnamed: 0,iso_code,location,date,total_cases,new_cases,total_deaths,new_deaths,total_cases_per_million,new_cases_per_million,total_deaths_per_million,...,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cvd_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_100k
19578,USA,United States,2020-05-26,1662302,19064,98220,500,5022.02,57.595,296.735,...,15.413,9.732,54225.446,1.2,151.089,10.79,19.1,24.6,,2.77
19579,USA,United States,2020-05-27,1681212,18910,98916,696,5079.15,57.129,298.837,...,15.413,9.732,54225.446,1.2,151.089,10.79,19.1,24.6,,2.77
19580,USA,United States,2020-05-28,1699933,18721,100442,1526,5135.708,56.558,303.448,...,15.413,9.732,54225.446,1.2,151.089,10.79,19.1,24.6,,2.77
19581,USA,United States,2020-05-29,1721750,21817,101617,1175,5201.62,65.912,306.998,...,15.413,9.732,54225.446,1.2,151.089,10.79,19.1,24.6,,2.77
19582,USA,United States,2020-05-30,1747087,25337,102836,1219,5278.166,76.546,310.68,...,15.413,9.732,54225.446,1.2,151.089,10.79,19.1,24.6,,2.77


# Deaths cases in USA

In [137]:
trace = go.Scatter(
    x = df['date'],
    y = df['cases'],
    name = 'Covid-19 cases',
    
    )

layout = go.Layout(
    title = 'Total Covid-19 cases in USA',
    title_x = 0.5,
    showlegend = True,
    xaxis = dict(title = 'Dates'),
    yaxis = dict(title = 'Covid-19 cases'),
    
    )

data = [trace]

fig = go.Figure(data = data,layout = layout)
iplot(fig)

In [97]:
trace = go.Scatter(
    x = df['date'],
    y = df['deaths'],
    name = 'Covid-19 deaths',
    marker = dict(color = '#f14542')
    )
layout = go.Layout(
    title = 'Covid-19 deaths in USA',
    title_x = 0.5,
    xaxis = dict(title = 'Dates'),
    yaxis = dict(title = 'Deaths'),
    showlegend = True
    )

data = [trace]

fig = go.Figure(data = data,layout = layout)
iplot(fig)

In [98]:
trace1 = go.Scatter(
    x = df['date'],
    y = df['cases'],
    name = 'Covid-19 cases',
    marker = dict(color = '#1971DE')
    )

trace2 = go.Scatter(
    x = df['date'],
    y = df['deaths'],
    name = 'Covid-19 deaths',
    marker = dict(color = '#f14542')
    )

layout = go.Layout(
    title = 'Covid-19 cases in USA',
    title_x = 0.5,
    showlegend = True,
    xaxis = dict(title = 'Dates'),
    yaxis = dict(title = 'Covid-19 cases'),
    
    )

data = [trace1,trace2]

fig = go.Figure(data = data,layout = layout)
print("Total Cases in USA",df['cases'].iat[-1])
print("Total deaths in USA",df['deaths'].iat[-1])
iplot(fig)

Total Cases in USA 1754724
Total deaths in USA 102812


In [99]:
df['deaths/cases'] = df['deaths']/df['cases']

In [100]:
df['deaths/cases']

0      0.000000
1      0.000000
2      0.000000
3      0.000000
4      0.000000
         ...   
125    0.058793
126    0.058578
127    0.058805
128    0.058736
129    0.058592
Name: deaths/cases, Length: 130, dtype: float64

In [101]:
trace = go.Scatter(
    x = df['date'],
    y = df['deaths/cases'],
    name = 'death/cases',
    marker = dict(color = '#f14542')
    )
layout = go.Layout(
    title = 'Ratio of death per case',
    title_x = 0.5,
    xaxis = dict(title = 'Dates'),
    yaxis = dict(title = 'Deaths'),
    showlegend = True
    )

data = [trace]

fig = go.Figure(data = data,layout = layout)

today_ratio = round(df['deaths/cases'].iat[-1]*100,2)


print(" As per today around {0} {1} people die due to covid-19 in USA".format(today_ratio,'%'))

iplot(fig)

 As per today around 5.86 % people die due to covid-19 in USA


In [156]:
df_jan = df[(df['date'] > '2020-01-21') & (df['date'] <= '2020-01-31')]
df_feb = df[(df['date'] > '2020-02-01') & (df['date'] <= '2020-02-29')]
df_mar = df[(df['date'] > '2020-03-01') & (df['date'] <= '2020-03-31')]
df_apr = df[(df['date'] > '2020-04-01') & (df['date'] <= '2020-04-30')]
df_may = df[(df['date'] > '2020-05-01') & (df['date'] <= '2020-05-29')]

In [157]:
trace1 = go.Scatter(
    x = df_may['date'],
    y = df_may['cases'],
    name = 'Covid-19 cases',
    marker = dict(color = '#1971DE'),
    mode = 'markers',
    )

trace2 = go.Scatter(
    x = df_may['date'],
    y = df_may['deaths'],
    name = 'Covid-19 deaths',
    marker = dict(color = '#f14542'),
    mode = 'markers',
    )

layout = go.Layout(
    title = 'Covid-19 cases for MAY 2020 in USA',
    title_x = 0.5,
    showlegend = True,
    xaxis = dict(title = 'Dates'),
    yaxis = dict(title = 'Covid-19 cases'),
    
    )

data = [trace1,trace2]

fig = go.Figure(data = data,layout = layout)
print("Total Cases in USA",df_may['cases'].iat[-1])
print("Total deaths in USA",df_may['deaths'].iat[-1])
iplot(fig)

Total Cases in USA 1754724
Total deaths in USA 102812


In [158]:
trace1 = go.Scatter(
    x = df_mar['date'],
    y = df_mar['cases'],
    name = 'Covid-19 cases',
    marker = dict(color = '#1971DE'),
    mode = 'markers',
    )

trace2 = go.Scatter(
    x = df_mar['date'],
    y = df_mar['deaths'],
    name = 'Covid-19 deaths',
    marker = dict(color = '#f14542'),
    mode = 'markers',
    )

layout = go.Layout(
    title = 'Covid-19 cases for March 2020 in USA',
    title_x = 0.5,
    showlegend = True,
    xaxis = dict(title = 'Dates'),
    yaxis = dict(title = 'Covid-19 cases'),
    
    )

data = [trace1,trace2]

fig = go.Figure(data = data,layout = layout)
print("Total Cases in USA till March 2020 -->",df_mar['cases'].iat[-1])
print("Total death in USA till March 2020 -->",df_mar['deaths'].iat[-1])
iplot(fig)

Total Cases in USA till March 2020 --> 188413
Total death in USA till March 2020 --> 4290


In [162]:
trace = go.Bar(
    x = ['January','February','March','April','May'],
    y = [df_jan['cases'].sum(),df_feb['cases'].sum(),df_mar['cases'].sum(),df_apr['cases'].sum(),df_may['cases'].sum()]
    )

layout = go.Layout(
    title = 'Total Covid-19 cases monthly in USA',
    title_x = 0.5,
    xaxis = dict(title = 'Month'),
    yaxis = dict(title = 'Total covid-19 cases')
    )
data = [trace]

fig = go.Figure(data = data,layout = layout)
print('Total covid-19 cases in January-->',df_jan['cases'].sum())
print('Total covid-19 cases in February-->',df_feb['cases'].sum())
print('Total covid-19 cases in March-->',df_mar['cases'].sum())
print('Total covid-19 cases in April-->',df_apr['cases'].sum())
print('Total covid-19 cases in May-->',df_may['cases'].sum())
iplot(fig)

Total covid-19 cases in January--> 40
Total covid-19 cases in February--> 728
Total covid-19 cases in March--> 1094806
Total covid-19 cases in April--> 19388944
Total covid-19 cases in May--> 40752719


In [160]:
trace = go.Bar(x = ['January','February','March'],y = ['5','6','8'])   # Trace the data

data = [trace]  # Make a list

layout = {"title":"sales for first three months",   # Make a dictionary
           "xaxis":{"title":"Month","tickangle":-20},
           "yaxis":{"title":"Sales"}
         }

In [191]:
jan_newcases = df_usa_max[(df_usa_max['date'] > '2020-01-01') & (df_usa_max['date'] <= '2020-01-31')]['new_cases'].sum()
feb_newcases = df_usa_max[(df_usa_max['date'] > '2020-02-01') & (df_usa_max['date'] <= '2020-02-29')]['new_cases'].sum()
mar_newcases = df_usa_max[(df_usa_max['date'] > '2020-03-01') & (df_usa_max['date'] <= '2020-03-31')]['new_cases'].sum()
apr_newcases = df_usa_max[(df_usa_max['date'] > '2020-04-01') & (df_usa_max['date'] <= '2020-04-30')]['new_cases'].sum()
may_newcases = df_usa_max[(df_usa_max['date'] > '2020-05-01') & (df_usa_max['date'] <= '2020-05-29')]['new_cases'].sum()

jan_newdeaths = df_usa_max[(df_usa_max['date'] > '2020-01-01') & (df_usa_max['date'] <= '2020-01-31')]['new_deaths'].sum()
feb_newdeaths = df_usa_max[(df_usa_max['date'] > '2020-02-01') & (df_usa_max['date'] <= '2020-02-29')]['new_deaths'].sum()
mar_newdeaths = df_usa_max[(df_usa_max['date'] > '2020-03-01') & (df_usa_max['date'] <= '2020-03-31')]['new_deaths'].sum()
apr_newdeaths = df_usa_max[(df_usa_max['date'] > '2020-04-01') & (df_usa_max['date'] <= '2020-04-30')]['new_deaths'].sum()
may_newdeaths = df_usa_max[(df_usa_max['date'] > '2020-05-01') & (df_usa_max['date'] <= '2020-05-29')]['new_deaths'].sum()


In [171]:
trace1 = go.Scatter(
    x = df_usa_max['date'],
    y = df_usa_max['new_cases'],
    name = 'Covid-19 new cases',
    marker = dict(color = '#1927DE '),
   
    )

trace2 = go.Scatter(
    x = df_usa_max['date'],
    y = df_usa_max['new_deaths'],
    name = 'Covid-19 new death',
    marker = dict(color = '#f14542'),
   
    )

layout = go.Layout(
    title = 'Covid-19 new cases and new deaths',
    title_x = 0.5,
    xaxis = dict(title = 'Dates'),
    yaxis = dict(title = 'New cases'),
    showlegend = True,
    
    )

data = [trace1,trace2]

fig = go.Figure(data = data,layout = layout)

today_newcase = round(df_usa_max['new_cases'].iat[-1]*100,2)


#print(" Covid-19 new cases today are --> {0}".format(today_newcase))

iplot(fig)

In [186]:
labels = ['Total population of USA','Americans Died in COVID-19','Americans Died in World War 2']
values = [df_usa_max['population'].iat[-1],df_usa_max['total_deaths'].sum(),'407000']

trace = go.Pie(labels = labels, values = values)     # To show percentage on pie chart remove text info
data = [trace]

layout = go.Layout(
    title = 'Percentage of Americans dies in various incidents',
    title_x = 0.5
    )
fig = go.Figure(data = data,layout = layout)

iplot(fig)

Last three month new cases

In [193]:
trace1 = go.Bar(
    x = ['March','April','May'],
    y = [mar_newcases,apr_newcases,may_newcases]
    )

trace2 = go.Bar(
    x = ['March','April','May'],
    y = [mar_newdeaths,apr_newdeaths,may_newdeaths]
    )

layout = go.Layout(
    title = 'Covid-19 new cases last three months in USA',
    title_x = 0.5,
    xaxis = dict(title = 'Month'),
    yaxis = dict(title = 'Total covid-19 cases')
    )
data = [trace1,trace2]

fig = go.Figure(data = data,layout = layout)

iplot(fig)

In [188]:
feb_newcases

59