In [None]:
Objective: Worldwide numbers, More detail for China and Europe

In [107]:
import pandas as pd
import numpy as np

covid_global_df = pd.read_csv('./data/covid_19_data.csv')

#Rename column Country/region to Country
covid_global_df = covid_global_df.rename(columns={'Country/Region':'Country'})

#Replace 'None' as NaN
covid_global_df['Province/State'].replace(to_replace='None', value=np.nan, inplace=True)

covid_global_df.head()

Unnamed: 0,SNo,ObservationDate,Province/State,Country,Last Update,Confirmed,Deaths,Recovered
0,1,01/22/2020,Anhui,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
1,2,01/22/2020,Beijing,Mainland China,1/22/2020 17:00,14.0,0.0,0.0
2,3,01/22/2020,Chongqing,Mainland China,1/22/2020 17:00,6.0,0.0,0.0
3,4,01/22/2020,Fujian,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
4,5,01/22/2020,Gansu,Mainland China,1/22/2020 17:00,0.0,0.0,0.0


# Current numbers worldwide

In [108]:
import plotly as py
import plotly.graph_objects as go
import plotly.express as px
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True) 

In [109]:
df_date = covid_global_df[covid_global_df['Confirmed']>0].copy(deep=True)
df_date = df_date.groupby('ObservationDate').sum().reset_index()

df_date.head()

Unnamed: 0,ObservationDate,SNo,Confirmed,Deaths,Recovered
0,01/22/2020,566,555.0,17.0,28.0
1,01/23/2020,2158,653.0,18.0,30.0
2,01/24/2020,4188,941.0,26.0,36.0
3,01/25/2020,6490,1438.0,42.0,39.0
4,01/26/2020,9071,2118.0,56.0,52.0


In [110]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=df_date['ObservationDate'], y=df_date['Confirmed'], fill='tozeroy', name='Confirmed Cases')) # fill down to xaxis
fig.add_trace(go.Scatter(x=df_date['ObservationDate'], y=df_date['Recovered'], fill='tozeroy', name='Recovered')) # fill down to xaxis
fig.add_trace(go.Scatter(x=df_date['ObservationDate'], y=df_date['Deaths'], fill='tozeroy', name='Deaths')) # fill down to xaxis

fig.update_layout(
    title_text = 'Number of confirmed cases, recoveries, and deaths worldwide',
)

fig.show()

In [111]:
# Sort values in order to have the newest numbers on the top
cases_now_df = covid_global_df[covid_global_df['Confirmed']>0].copy(deep=True)

cases_now_df.sort_values('ObservationDate', ascending=False, inplace=True)

# Remove rows with duplicate Country Entries
cases_now_df.drop_duplicates(subset = ['Province/State', 'Country'], keep='first', inplace=True)

# Sum of all corona cases within Mainland China and the US. Original data is divided by Province/State
cases_now_df = cases_now_df.append(cases_now_df[(cases_now_df['Country'] == 'Mainland China') |
                                       (cases_now_df['Country'] == 'US')].groupby('Country').sum().reset_index(), sort=False, ignore_index=True)

cases_now_df = cases_now_df[(cases_now_df['Province/State'].isnull())]
cases_now_df.head()

Unnamed: 0,SNo,ObservationDate,Province/State,Country,Last Update,Confirmed,Deaths,Recovered
0,3391,03/04/2020,,Ukraine,2020-03-03T15:33:02,1.0,0.0,0.0
1,3284,03/04/2020,,Netherlands,2020-03-04T12:33:03,38.0,0.0,0.0
2,3291,03/04/2020,,Iceland,2020-03-04T19:33:03,26.0,0.0,0.0
3,3290,03/04/2020,,United Arab Emirates,2020-03-03T23:43:02,27.0,0.0,5.0
4,3289,03/04/2020,,India,2020-03-04T12:33:03,28.0,0.0,3.0


In [112]:
fig = go.Figure(data=go.Choropleth(
    locations = cases_now_df['Country'],
    locationmode = 'country names',
    z = cases_now_df['Confirmed'],
    #text = '%s: %s', cases_now_df['Country'], cases_now_df['Confirmed'],
    colorscale = 'Blues',
    autocolorscale=True,
    reversescale=False,
    marker_line_color='darkgray',
    marker_line_width=0.5,
    #colorbar_tickprefix = '$',
    colorbar_title = 'Confirmed cases',
))

fig.update_layout(
    title_text='Confirmed Corona Cases as of X',
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='equirectangular'
    ),
    annotations = [dict(
        x=0.55,
        y=0.1,
        xref='paper',
        yref='paper',
        text='Source: <a href="https://www.kaggle.com/sudalairajkumar/novel-corona-virus-2019-dataset">\
            Kaggle: Novel Corona Virus 2019 Dataset</a>',
        showarrow = False
    )]
)

fig.show()

In [113]:
cases_now_without_china_df = cases_now_df[cases_now_df['Country']!= 'Mainland China']

fig = go.Figure(data=go.Choropleth(
    locations = cases_now_without_china_df['Country'],
    locationmode = 'country names',
    z = cases_now_without_china_df['Confirmed'],
    #text = '%s: %s', cases_now_df['Country'], cases_now_df['Confirmed'],
    colorscale = 'Blues',
    autocolorscale=True,
    reversescale=False,
    marker_line_color='darkgray',
    marker_line_width=0.5,
    colorbar_title = 'Confirmed cases',
))

fig.update_layout(
    title_text='Confirmed Corona Cases as of X (without China)',
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='equirectangular'
    ),
    annotations = [dict(
        x=0.55,
        y=0.1,
        xref='paper',
        yref='paper',
        text='Source: <a href="https://www.kaggle.com/sudalairajkumar/novel-corona-virus-2019-dataset">\
            Kaggle: Novel Corona Virus 2019 Dataset</a>',
        showarrow = False
    )]
)

fig.show()

# Spread of Covid-19

In [114]:
df_country_date = covid_global_df[covid_global_df['Confirmed']>0].copy(deep=True)
df_country_date = df_country_date.groupby(['ObservationDate', 'Country']).sum().reset_index()
df_country_date

Unnamed: 0,ObservationDate,Country,SNo,Confirmed,Deaths,Recovered
0,01/22/2020,Japan,36,2.0,0.0,0.0
1,01/22/2020,Macau,21,1.0,0.0,0.0
2,01/22/2020,Mainland China,373,547.0,17.0,28.0
3,01/22/2020,South Korea,38,1.0,0.0,0.0
4,01/22/2020,Taiwan,29,1.0,0.0,0.0
...,...,...,...,...,...,...
1449,03/04/2020,UK,3273,85.0,0.0,8.0
1450,03/04/2020,US,107274,153.0,11.0,8.0
1451,03/04/2020,Ukraine,3391,1.0,0.0,0.0
1452,03/04/2020,United Arab Emirates,3290,27.0,0.0,5.0


In [48]:
fig = px.choropleth(df_country_date,
                    locations='Country',
                    locationmode = 'country names',
                    color='Confirmed',
                    color_continuous_scale='RdBu_R',
                    animation_frame='ObservationDate'
                          )
fig.update_layout(
    title_text = 'Spread of Covid-19 worldwide',
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='equirectangular'
    )
)
fig.show()

In [115]:
df_country_date_without_china = df_country_date[df_country_date['Country']!= 'Mainland China']

fig = px.choropleth(df_country_date_without_china,
                    locations='Country',
                    locationmode = 'country names',
                    color='Confirmed',
                    color_continuous_scale='RdBu_R',
                    hover_name='Country',
                    animation_frame='ObservationDate'
                          )
fig.update_layout(
    title_text = 'Spread of confirmed cases of Covid-19 in the world (without China)',
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='equirectangular'
    )
)
fig.show()

## Spread within the EU

In [15]:
eu_memberstates = {'Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Cyprus', 'Czech Republic', 'Denmark', 'Estonia', 'Finland',
                   'France', 'Germany', 'Greece', 'Hungary', 'Ireland', 'Italy', 'Latvia', 'Lithuania',
                   'Luxembourg', 'Malta', 'Netherlands', 'Poland', 'Portugal', 'Romania', 'Slovakia',
                   'Slovenia', 'Spain', 'Sweden'}

df_country_date_europe = df_country_date[df_country_date['Country'].isin(eu_memberstates)]
fig = px.choropleth(df_country_date_europe,
                    locations='Country',
                    locationmode = 'country names',
                    color='Confirmed',
                    color_continuous_scale='RdBu_R',
                    hover_name='Country',
                    animation_frame='ObservationDate'
                          )
fig.update_layout(
    title_text = 'Spread of confirmed cases of Covid-19 in the EU',
    geo=dict(
        scope='europe',
        showframe=False,
        showcoastlines=True,
        projection_type='hammer'
    )
)
fig.show()

## Spread within China

In [118]:
df_country_date_china = covid_global_df[covid_global_df['Country'] == 'Mainland China'].copy(deep=True)
df_country_date_china = df_country_date_china[df_country_date_china['Confirmed']>0]

df_country_date_china = df_country_date_china.groupby(['ObservationDate', 'Province/State']).sum().reset_index()
df_country_date_china

Unnamed: 0,ObservationDate,Province/State,SNo,Confirmed,Deaths,Recovered
0,01/22/2020,Anhui,1,1.0,0.0,0.0
1,01/22/2020,Beijing,2,14.0,0.0,0.0
2,01/22/2020,Chongqing,3,6.0,0.0,0.0
3,01/22/2020,Fujian,4,1.0,0.0,0.0
4,01/22/2020,Guangdong,6,26.0,0.0,0.0
...,...,...,...,...,...,...
1310,03/04/2020,Tianjin,3265,136.0,3.0,124.0
1311,03/04/2020,Tibet,3367,1.0,0.0,1.0
1312,03/04/2020,Xinjiang,3274,76.0,3.0,69.0
1313,03/04/2020,Yunnan,3262,174.0,2.0,169.0


# Forecast

In [None]:
TODO code schreiben

# Comparison with Sars

In [None]:
TODO: Comparison with similar epidemics (ebola, sars)

In [None]:
Follow instructions to make plotly work here:
    
    https://plot.ly/python/getting-started/#jupyterlab-support-python-35