In [1]:
# Import libraries
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
from ipywidgets import widgets, interactive

In [2]:
# Read and display data. Dataset: https://www.kaggle.com/sudalairajkumar/novel-corona-virus-2019-dataset
df = pd.read_csv('NewData/covid_19_data.csv') 
df.head()

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered
0,1,01/22/2020,Anhui,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
1,2,01/22/2020,Beijing,Mainland China,1/22/2020 17:00,14.0,0.0,0.0
2,3,01/22/2020,Chongqing,Mainland China,1/22/2020 17:00,6.0,0.0,0.0
3,4,01/22/2020,Fujian,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
4,5,01/22/2020,Gansu,Mainland China,1/22/2020 17:00,0.0,0.0,0.0


In [3]:
# Info on the dataset
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1942 entries, 0 to 1941
Data columns (total 8 columns):
SNo                1942 non-null int64
ObservationDate    1942 non-null object
Province/State     1413 non-null object
Country/Region     1942 non-null object
Last Update        1942 non-null object
Confirmed          1942 non-null float64
Deaths             1942 non-null float64
Recovered          1942 non-null float64
dtypes: float64(3), int64(1), object(4)
memory usage: 121.5+ KB


In [4]:
# Clean the dataset

# Drop the 1st and 5th column
df = df.drop(columns=["SNo", "Last Update"])

# Rename 'Mainland China' to 'China'
df.replace('Mainland China', 'China', inplace=True)

# Transform date to date type
df['Date'] = df['ObservationDate'].astype("datetime64")

# Group the dataset and replace NA by 0
df = df.groupby(by=['Date', 'Country/Region']).agg({'Confirmed': 'sum',
                                                'Deaths': 'sum',
                                                'Recovered': 'sum'}).unstack().reset_index().fillna(0)

# Display cleaned dataset
df.head()

Unnamed: 0_level_0,Date,Confirmed,Confirmed,Confirmed,Confirmed,Confirmed,Confirmed,Confirmed,Confirmed,Confirmed,...,Recovered,Recovered,Recovered,Recovered,Recovered,Recovered,Recovered,Recovered,Recovered,Recovered
Country/Region,Unnamed: 1_level_1,Australia,Belgium,Brazil,Cambodia,Canada,China,Colombia,Egypt,Finland,...,South Korea,Spain,Sri Lanka,Sweden,Taiwan,Thailand,UK,US,United Arab Emirates,Vietnam
0,2020-01-22,0.0,0.0,0.0,0.0,0.0,547.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2020-01-23,0.0,0.0,0.0,0.0,0.0,639.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2020-01-24,0.0,0.0,0.0,0.0,0.0,916.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2020-01-25,4.0,0.0,0.0,0.0,0.0,1399.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2020-01-26,4.0,0.0,0.0,0.0,1.0,2062.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0


In [5]:
# Extract only the most recent data on all infected people and remove countries with 0 infected
df_i = df['Confirmed'].tail(1).transpose()
df_i = df_i.loc[(df_i!=0).any(1)]


# Sort by number of infected people
infected_sorted = df_i.iloc[:,0].sort_values()

In [6]:
#Function for generating historical graph for a specific country (or "All" countries)
def corona_plot(country):
    fig_hist = go.Figure()
    
    #Find number of confirmed (infected, ie. excluding recovered and deaths), recovered and deaths
    if (country == 'All'):
        recovered = df['Recovered'].sum(axis=1)
        deaths = df['Deaths'].sum(axis=1)
        confirmed = df['Confirmed'].sum(axis=1) - recovered - deaths
    else:
        recovered = df[('Recovered', country)]
        deaths = df[('Deaths', country)]
        confirmed = df[('Confirmed', country)] - recovered - deaths

    # Create stacked graph
    fig_hist.add_trace(go.Scatter(
        x=df['Date'],
        y=deaths,
        fill='tonexty',
        name="Deaths",
        mode='lines', line_color='red',
        stackgroup='one'))

    fig_hist.add_trace(go.Scatter(
        x=df['Date'],
        y=recovered,
        fill='tonexty',
        name="Recovered",
        mode='lines', line_color='green',
        stackgroup='one'))
    
    fig_hist.add_trace(go.Scatter(
        x=df['Date'],
        y=confirmed,
        fill='tonexty',
        name="Infected",
        mode='lines', line_color='orange',
        stackgroup='one'))


    # Axis labels, title and legend
    fig_hist.update_layout(
        xaxis = dict(
            title_text = "Date"),
        yaxis = dict(
            title_text = "Number of people"),
        title_text=f'Disease development in {country}')
    
    if (country == 'All'):
        fig_hist.update_layout(
            title_text=f'Disease development in the whole world'
        )
    elif (country == 'Others'):
        fig_hist.update_layout(
            title_text=f'Disease development for unspecified locations'
        )  
    
    return fig_hist.show()

In [7]:
#Create world map
world_map = px.choropleth(
                    df_i, 
                    title="Interactive world map",
                    locations=df_i.index, 
                    locationmode = "country names",
                    hover_name=df_i.index, 
                    color=df_i,
                    color_continuous_scale=px.colors.sequential.Oranges,
                    labels={ # replaces default labels by column name
                    "color": "# Infected people",  "locations": "Country"
                    },
                    
                    range_color=[0,infected_sorted.tail(3)[0]])

fig_map  = go.FigureWidget(world_map)


#Define interaction between world map and dropdown menu
def hover_fn(trace, points, state):
    ind = points.point_inds[0]
    country = df_i.index[ind]
    country_menu.value = country


for f in fig_map.data:
    f.on_hover(hover_fn)

#Create dropdown menu
country_menu = widgets.Dropdown(
    options=['All'] + list(df_i.index),
    value='All',
    description='Country:'
)

#Display world map and historical graph
display(fig_map, interactive(corona_plot, country=country_menu))

FigureWidget({
    'data': [{'coloraxis': 'coloraxis',
              'geo': 'geo',
              'hoverlabel':…

interactive(children=(Dropdown(description='Country:', options=('All', 'Australia', 'Belgium', 'Cambodia', 'Ca…