In [1]:
# Import libraries
import folium
import pandas as pd
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import plotly.express as px
from ipywidgets import widgets, interactive
import plotly.graph_objs as go
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

In [2]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines){return false;}

<IPython.core.display.Javascript object>

In [3]:
# Read and display data. Dataset: https://www.kaggle.com/sudalairajkumar/novel-corona-virus-2019-dataset
df = pd.read_csv('NewData/covid_19_data.csv') 
df.head()

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered
0,1,01/22/2020,Anhui,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
1,2,01/22/2020,Beijing,Mainland China,1/22/2020 17:00,14.0,0.0,0.0
2,3,01/22/2020,Chongqing,Mainland China,1/22/2020 17:00,6.0,0.0,0.0
3,4,01/22/2020,Fujian,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
4,5,01/22/2020,Gansu,Mainland China,1/22/2020 17:00,0.0,0.0,0.0


In [4]:
# Info on the dataset
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1942 entries, 0 to 1941
Data columns (total 8 columns):
SNo                1942 non-null int64
ObservationDate    1942 non-null object
Province/State     1413 non-null object
Country/Region     1942 non-null object
Last Update        1942 non-null object
Confirmed          1942 non-null float64
Deaths             1942 non-null float64
Recovered          1942 non-null float64
dtypes: float64(3), int64(1), object(4)
memory usage: 121.5+ KB


In [5]:
# Clean the dataset

# Drop the 1st and 5th column
df.drop("SNo", axis=1, inplace=True)
df.drop("Last Update", axis=1, inplace=True)

# Rename 'Mainland China' to 'China'
df.replace('Mainland China', 'China', inplace=True)

# Transform date to date type
df['Date'] = pd.to_datetime(df['ObservationDate'])

# Group the dataset and replace NA by 0
df = df.groupby(by=['Date', 'Country/Region']).agg({'Confirmed': 'sum',
                                                'Deaths': 'sum',
                                                'Recovered': 'sum'}).unstack().reset_index().fillna(0)

df.rename(columns={"Country/Region": "Country"})

# Display cleaned dataset
df.head()

Unnamed: 0_level_0,Date,Confirmed,Confirmed,Confirmed,Confirmed,Confirmed,Confirmed,Confirmed,Confirmed,Confirmed,...,Recovered,Recovered,Recovered,Recovered,Recovered,Recovered,Recovered,Recovered,Recovered,Recovered
Country/Region,Unnamed: 1_level_1,Australia,Belgium,Brazil,Cambodia,Canada,China,Colombia,Egypt,Finland,...,South Korea,Spain,Sri Lanka,Sweden,Taiwan,Thailand,UK,US,United Arab Emirates,Vietnam
0,2020-01-22,0.0,0.0,0.0,0.0,0.0,547.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2020-01-23,0.0,0.0,0.0,0.0,0.0,639.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2020-01-24,0.0,0.0,0.0,0.0,0.0,916.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2020-01-25,4.0,0.0,0.0,0.0,0.0,1399.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2020-01-26,4.0,0.0,0.0,0.0,1.0,2062.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0


In [6]:
# Extract only the most recent data on all infected people and remove countries with 0 infected
df_i = df['Confirmed'].tail(1).transpose()
df_i = df_i.loc[(df_i!=0).any(1)]

# Sort by number of infected people
infected_sorted = df_i.iloc[:,0].sort_values()

In [7]:
#Function for generating historical graph for a specific country (or "All" countries)
def corona_plot(country):
    
    fig, ax = plt.subplots(figsize=(12, 6))                    
    t = df["Date"]
    
    #Find number of infected (confirmed), recovered and deaths
    if (country == 'All'):
        confirmed = df['Confirmed'].sum(axis=1)
        recovered_and_deaths = df['Recovered'].sum(axis=1)+df['Deaths'].sum(axis=1)
        deaths = df['Deaths'].sum(axis=1)
    else:
        confirmed = df[('Confirmed', country)]
        recovered_and_deaths = df[('Recovered', country)]+df[('Deaths', country)]
        deaths = df[('Deaths', country)]
        
    # Create stacked graph
    ax.fill_between(t, recovered_and_deaths, confirmed, color='orange')
    ax.fill_between(t, recovered_and_deaths, deaths, color='green')
    ax.fill_between(t, deaths, 0, color='red')

    
    # Format number of ticks on x-axis
    numberOfDates = 6
    dayIntervalNumber = int(len(df) / numberOfDates)
    days = mdates.DayLocator()  # every month
    dayInterval = mdates.DayLocator(interval=dayIntervalNumber)  # every n days
    dayFmt = mdates.DateFormatter('%b %d')
    ax.xaxis.set_major_locator(dayInterval)
    ax.xaxis.set_minor_locator(days)
    ax.xaxis.set_major_formatter(dayFmt)

    #Format text on x-axis
    fig.autofmt_xdate()

    #Axis labels, title and legend
    plt.ylabel('Number of people',fontsize=12)
    plt.xlabel('Date',fontsize=12)
    if (country == 'All'):
        plt.title('Total cases in the world',fontsize=15)
    else:
        plt.title('Number of cases in ' + country,fontsize=15)
    plt.legend(["Infected", 'Recovered', 'Deaths'], loc = 'upper left')

In [8]:
#Create world map
world_map = px.choropleth(
                    df_i, 
                    title="Interactive world map",
                    width=800, height=500,
                    locations=df_i.index, 
                    locationmode = "country names",
                    hover_name=df_i.index, 
                    color=df_i,
                    color_continuous_scale=px.colors.sequential.Oranges,
                    labels={ # replaces default labels by column name
                    "color": "# Infected people",  "locations": "Country"
                    },
                    
                    range_color=[0,infected_sorted.tail(3)[0]])

fig_map  = go.FigureWidget(world_map)


#Define interaction between world map and dropdown menu
def hover_fn(trace, points, state):
    ind = points.point_inds[0]
    country = df_i.index[ind]
    country_menu.value = country


for f in fig_map.data:
    f.on_hover(hover_fn)

#Create dropdown menu
country_menu = widgets.Dropdown(
    options=['All'] + list(df_i.index),
    value='All',
    description='Country:'
)

#Display world map and historical graph
display(fig_map, interactive(corona_plot, country=country_menu))

FigureWidget({
    'data': [{'coloraxis': 'coloraxis',
              'geo': 'geo',
              'hoverlabel':…

interactive(children=(Dropdown(description='Country:', options=('All', 'Australia', 'Belgium', 'Cambodia', 'Ca…