# Read file

In [1]:


import pandas as pd
csv_df = pd.read_csv("D:/covid_19_clean_complete.csv")

In [2]:
from datetime import datetime,date

formatter_string = "%m/%d/%y" 
csv_df['date'] = pd.to_datetime(csv_df.Date, format=formatter_string)


data_dates = csv_df.date.unique()
max_data_date = max(data_dates)
print("Maximum date is {0}".format(max_data_date))

d = pd.to_datetime(str(max_data_date)).strftime('%Y-%m-%d')

title = 'COVID-2019'
chart_title = title + ' as of ' + d

Maximum date is 2020-03-19T00:00:00.000000000


In [11]:
csv_df.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,date,Confirmed_log,color
0,,Thailand,15.0,101.0,1/22/20,2,0,0,2020-01-22,1.098612,fuschia
1,,Japan,36.0,138.0,1/22/20,2,0,0,2020-01-22,1.098612,fuschia
2,,Singapore,1.2833,103.8333,1/22/20,0,0,0,2020-01-22,0.0,fuschia
3,,Nepal,28.1667,84.25,1/22/20,0,0,0,2020-01-22,0.0,fuschia
4,,Malaysia,2.5,112.5,1/22/20,0,0,0,2020-01-22,0.0,fuschia


# Plot cases over time

In [3]:
import plotly.offline as py
import plotly.graph_objs as go

py.offline.init_notebook_mode(connected=True)
cases_df = csv_df[['date','Confirmed','Deaths','Recovered']].groupby('date').sum()

trace_confirmed = go.Scatter(
    x=cases_df.index, 
    y=cases_df.Confirmed,
    mode="markers+lines",
    name = 'Confirmed'
)

trace_deaths = go.Scatter(
    x=cases_df.index, 
    y=cases_df.Deaths,
    mode="markers+lines",
    name = 'Deaths'
)

trace_recovered = go.Scatter(
    x=cases_df.index, 
    y=cases_df.Recovered,
    mode="markers+lines",
    name = 'Recovered'
)

data = [trace_confirmed, trace_deaths, trace_recovered]
py.iplot({
    "data": data,
    "layout": go.Layout(title=chart_title)
})

# Get latest data

In [4]:
df = csv_df.loc[csv_df.date==max_data_date]
by_country = df[['Country/Region','Confirmed','Deaths', 'Recovered']].groupby(['Country/Region']).sum()
by_country.sort_values(by='Confirmed',ascending=False,inplace=True)
by_country['Active'] = by_country['Confirmed'] - by_country['Deaths'] - by_country['Recovered']

# Sunburst chart

In [5]:
countries = by_country.index.to_list()
ids = countries
labels = countries
parents = [title] * len(countries)
values = by_country.Confirmed.to_list()

classifications = by_country.columns.drop('Confirmed')

for cty in by_country.index:
    for c in classifications:
        ids = ids + [cty + '_' + c]
        parents = parents + [cty]
        labels = labels + [c]
        values = values + [by_country.loc[cty][c]]

trace = go.Sunburst(
    ids=ids,
    labels=labels,
    parents=parents,
    values=values,
    branchvalues="total",
    outsidetextfont={"size": 20, "color": "#377eb8"},
#     leaf={"opacity": 0.4},
    marker={"line": {"width": 2}}
)

layout = go.Layout(
    title = chart_title + "<br>(click on country)",
    margin = go.layout.Margin(t=100, l=0, r=0, b=0),
    sunburstcolorway=["#636efa","#ef553b","#00cc96"]
)

fig = go.Figure([trace], layout)

py.iplot(fig)

# Plot cases by location with animation

In [14]:
csv_df['Province/State'].fillna(csv_df['Country/Region'] ,inplace=True)
import plotly.express as px
import numpy as np

csv_df['Confirmed_log'] = np.log(1+csv_df['Confirmed'])
csv_df['color'] = 'fuschia'

a_df = csv_df.sort_values(by=['date'])
fig = px.scatter_mapbox(a_df,
                    animation_frame='Date',
                    animation_group="Country/Region",
                    lat="Lat", lon="Long", hover_name="Province/State", 
                    hover_data=["Province/State","Country/Region","Confirmed","Deaths","Recovered"],
                    size="Confirmed_log",
                    color_discrete_sequence=['hsla(360, 100%, 50%, 0.75)'], 
                    zoom=0.5
                    )
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(title=chart_title
                 , width = 900, height = 700)
fig.show()

# List of top 20 countries

In [7]:
top_country_confirmed = by_country.sort_values(by="Confirmed",ascending=False).index.to_list()
top_country_deaths = by_country.sort_values(by="Deaths",ascending=False).index.to_list()
top_country_recovered = by_country.sort_values(by="Recovered",ascending=False).index.to_list()
top_country = {
    'Confirmed' : top_country_confirmed,
    'Deaths' : top_country_deaths,
    'Recovered' : top_country_recovered,
}
print(top_country['Confirmed'][:20])

['China', 'Italy', 'Iran', 'Spain', 'Germany', 'US', 'France', 'Korea, South', 'Switzerland', 'United Kingdom', 'Netherlands', 'Austria', 'Belgium', 'Norway', 'Sweden', 'Denmark', 'Japan', 'Malaysia', 'Canada', 'Portugal']


# By country

In [8]:
country_cases_df = csv_df[['Country/Region','date','Confirmed','Deaths','Recovered']].groupby(['Country/Region','date']).sum()

data = []
countries = np.sort(csv_df['Country/Region'].unique())
category = 'Confirmed'
top_n = 5
for c in countries:
    country = country_cases_df.loc[c]
    
    visible_flag = 'legendonly'
    if c in top_country[category][:top_n]:
        visible_flag = None
    trace = go.Scatter(
        x=country.index.to_list(), 
        y=country[category],
        mode="markers+lines",
        name = c,
        text = c,
        visible=visible_flag
    )
    data.append(trace)

py.iplot({
    "data": data,
    "layout": go.Layout(title='<B>{}</B><BR><I>Top {} countries with {} shown. Click legend to show others</I>'.format(chart_title,top_n,category))
})

* # By country aligned by Confirmed growth

In [9]:
country_cases_df['Confirmed_pct'] = country_cases_df.pct_change().fillna(0)['Confirmed']
country_cases_df_growth = country_cases_df.loc[country_cases_df['Confirmed_pct'] > 0]

# Insert a placeholder column
country_cases_df_growth.insert(loc=0, column='ID', value=0)
country_cases_df_growth.reset_index().set_index(['Country/Region'])

# Set ID counter for each country which will be used as index
pd.options.mode.chained_assignment = None
for c in countries:
    country_cases_df_growth.loc[c,'ID'] = np.arange(len(country_cases_df_growth.loc[c]))

country_cases_df_growth = country_cases_df_growth.reset_index().set_index(['Country/Region','ID'])
data = []
category = 'Confirmed'
top_n = 5
for c in countries:
    country = country_cases_df_growth.loc[c]
    
    visible_flag = 'legendonly'
    if c in top_country[category][:top_n]:
        visible_flag = None
    trace = go.Scatter(
        x=country.index.to_list(), 
        y=country[category],
        mode="markers+lines",
        name = c,
        text = country.date,
        visible=visible_flag
    )
    data.append(trace)

py.iplot({
    "data": data,
    "layout": go.Layout(title='<B>{}</B><BR><I>Shifted to align increase<BR>Top {} countries with {} shown. Click legend to show others</I>'.format(chart_title,top_n,category))
})

* # By country aligned by Deaths

In [10]:
data = []
category = 'Deaths'
top_n = 5
for c in countries:
    country = country_cases_df_growth.loc[c]
    
    visible_flag = 'legendonly'
    if c in top_country[category][:top_n]:
        visible_flag = None
    trace = go.Scatter(
        x=country.index.to_list(), 
        y=country[category],
        mode="markers+lines",
        name = c,
        text = country.date,
        visible=visible_flag
    )
    data.append(trace)

py.iplot({
    "data": data,
    "layout": go.Layout(title='<B>{}</B><BR><I>Shifted to align increase<BR>Top {} countries with {} shown. Click legend to show others</I>'.format(chart_title,top_n,category))
})