In [16]:
import numpy as np
import pandas as pd
import plotly.offline as py
from plotly import tools
py.init_notebook_mode(connected=True)
import plotly.express as px
import plotly.graph_objs as go

pd.set_option('display.max_rows', 500)

In [17]:
df = pd.read_csv("../input/novel-corona-virus-2019-dataset/covid_19_data.csv", parse_dates=['Last Update'])
df.rename(columns={'Country/Region':'Country'}, inplace=True)
df = df.drop(columns = ['SNo', "Last Update"]) 
countries = pd.read_csv("../input/countries-of-the-world-iso-codes-and-population/countries_by_population_2019.csv")
countries_iso = pd.read_csv("../input/countries-of-the-world-iso-codes-and-population/country_codes_2020.csv")

In [15]:
df[df.Country == "India"].sort_values(['ObservationDate'], ascending = False)

KeyError: 'ObservationDate'

In [4]:
df['Province/State'] = df['Province/State'].fillna('NULL')
df.sort_values(['Country', 'Province/State', 'ObservationDate'], ascending = [True,True,False], inplace = True)
df = df.drop_duplicates(['Country', 'Province/State'], keep = "first")
df = df.drop(columns = "ObservationDate")


df.sample(5)

Unnamed: 0,Province/State,Country,Confirmed,Deaths,Recovered
4383,"Pierce County, WA",US,4.0,0.0,0.0
6041,Grand Princess,US,20.0,0.0,0.0
6072,,Paraguay,8.0,0.0,0.0
4451,"Berkshire County, MA",US,1.0,0.0,0.0
6053,,Venezuela,17.0,0.0,0.0


In [5]:
df_country = df.groupby(['Country'], as_index=False)['Confirmed', 'Deaths'].sum()

df_country.sample(5)

Unnamed: 0,Country,Confirmed,Deaths
35,Costa Rica,35.0,0.0
87,Latvia,34.0,0.0
101,Mexico,53.0,0.0
16,Belgium,1058.0,5.0
96,Maldives,13.0,0.0


In [6]:
cols_to_drop = ['Rank', 'pop2018','GrowthRate', 'area', 'Density']
countries = countries.drop(columns = cols_to_drop)

countries = countries.merge(countries_iso[['name', 'cca3']], on = ['name'], how = "left")

cols_to_rename = {'name': 'Country', 'pop2019': 'Population', 'cca3': 'ISO'}
countries = countries.rename(columns = cols_to_rename)

countries.head()

Unnamed: 0,Country,Population,ISO
0,China,1433783.686,CHN
1,India,1366417.754,IND
2,United States,329064.917,USA
3,Indonesia,270625.568,IDN
4,Pakistan,216565.318,PAK


In [7]:
countries_to_rename = {'US': 'United States', 'Mainland China': 'China', 'UK': 'United Kingdom'}
df_country['Country'] = df_country['Country'].replace(countries_to_rename)

df_country = df_country.merge(countries[['Country', 'Population', 'ISO']], on = "Country", how = "left")
df_country = df_country.dropna()

df_country.sample(5)

Unnamed: 0,Country,Confirmed,Deaths,Population,ISO
45,Equatorial Guinea,1.0,0.0,1355.986,GNQ
149,Sudan,1.0,1.0,42813.238,SDN
52,French Guiana,11.0,0.0,290.832,GUF
19,Bolivia,11.0,0.0,11513.1,BOL
67,Honduras,6.0,0.0,9746.117,HND


In [8]:
df_country['Population'] = round((df_country['Population']/1000),2)
df_country = df_country.rename(columns = {'Population': 'Population (million)'})
df_country['Cases per Million'] = round((df_country['Confirmed']/df_country['Population (million)']),2)
df_country['Deaths per Million'] = round((df_country['Deaths']/df_country['Population (million)']),2)

df_country = df_country[df_country['Population (million)'] > 1]

df_country.sample(5)

Unnamed: 0,Country,Confirmed,Deaths,Population (million),ISO,Cases per Million,Deaths per Million
154,Tanzania,1.0,0.0,58.01,TZA,0.02,0.0
87,Latvia,34.0,0.0,1.91,LVA,17.8,0.0
145,South Korea,8236.0,75.0,51.23,KOR,160.77,1.46
78,Ivory Coast,1.0,0.0,25.72,CIV,0.04,0.0
131,Rwanda,5.0,0.0,12.63,RWA,0.4,0.0


In [9]:
df_country = df_country.sort_values(['Cases per Million'], ascending = False).reset_index(drop=True)
df_country.drop(columns = ['ISO', 'Deaths', 'Deaths per Million']).head(20)

Unnamed: 0,Country,Confirmed,Population (million),Cases per Million
0,Italy,27980.0,60.55,462.1
1,Switzerland,2200.0,8.59,256.11
2,Norway,1333.0,5.38,247.77
3,Spain,9942.0,46.74,212.71
4,Denmark,1194.0,5.77,206.93
5,Iran,14991.0,82.91,180.81
6,South Korea,8236.0,51.23,160.77
7,Qatar,439.0,2.83,155.12
8,Estonia,205.0,1.33,154.14
9,Netherlands,2549.0,17.1,149.06


In [10]:
fig = px.choropleth(df_country, locations="ISO",color="Cases per Million",hover_name="Country",color_continuous_scale=px.colors.sequential.YlOrRd)

layout = go.Layout(title=go.layout.Title(text="Corona Cases per million Citizens",x=0.5),font=dict(size=14),width = 750,height = 350,margin=dict(l=0,r=0,b=0,t=30))

fig.update_layout(layout)

fig.show()

In [11]:
df_country = df_country.sort_values(['Deaths per Million'], ascending = False).reset_index(drop=True)
df_country.drop(columns = ['ISO', 'Confirmed', 'Cases per Million']).head(20)

Unnamed: 0,Country,Deaths,Population (million),Deaths per Million
0,Italy,2158.0,60.55,35.64
1,Iran,853.0,82.91,10.29
2,Spain,342.0,46.74,7.32
3,France,181.0,65.13,2.78
4,Netherlands,44.0,17.1,2.57
5,China,3213.0,1433.78,2.24
6,Switzerland,14.0,8.59,1.63
7,South Korea,75.0,51.23,1.46
8,United Kingdom,70.0,67.53,1.04
9,Bahrain,1.0,1.64,0.61


In [12]:
fig = px.choropleth(df_country, locations="ISO",color="Deaths per Million",hover_name="Country",color_continuous_scale=px.colors.sequential.YlOrRd)

layout = go.Layout(title=go.layout.Title( text="Corona deaths per million Citizens", x=0.5),font=dict(size=14),width = 750,height = 350,margin=dict(l=0,r=0,b=0,t=30))

fig.update_layout(layout)

fig.show()