In [1]:
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
import pandas_bokeh
pd.set_option("plotting.backend","pandas_bokeh")
pd.plotting.output_notebook()

In [3]:
df = pd.read_csv("./Data/full_grouped.csv")
df.head()

Unnamed: 0,Date,Country/Region,Confirmed,Deaths,Recovered,Active,New cases,New deaths,New recovered,WHO Region
0,2020-01-22,Afghanistan,0,0,0,0,0,0,0,Eastern Mediterranean
1,2020-01-22,Albania,0,0,0,0,0,0,0,Europe
2,2020-01-22,Algeria,0,0,0,0,0,0,0,Africa
3,2020-01-22,Andorra,0,0,0,0,0,0,0,Europe
4,2020-01-22,Angola,0,0,0,0,0,0,0,Africa


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35156 entries, 0 to 35155
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Date            35156 non-null  object
 1   Country/Region  35156 non-null  object
 2   Confirmed       35156 non-null  int64 
 3   Deaths          35156 non-null  int64 
 4   Recovered       35156 non-null  int64 
 5   Active          35156 non-null  int64 
 6   New cases       35156 non-null  int64 
 7   New deaths      35156 non-null  int64 
 8   New recovered   35156 non-null  int64 
 9   WHO Region      35156 non-null  object
dtypes: int64(7), object(3)
memory usage: 2.7+ MB


In [5]:
df = df.rename(columns = {"Country/Region":"Country"})
df["Date"] = pd.to_datetime(df["Date"])
df["Total Confirmed"] = df["Confirmed"] + df["New cases"]
df["Total Deaths"] = df["Deaths"] + df["New deaths"]
df["Total Recovered"] = df["Recovered"] + df["New recovered"]
df["Month"] = df["Date"].dt.strftime("%m")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35156 entries, 0 to 35155
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   Date             35156 non-null  datetime64[ns]
 1   Country          35156 non-null  object        
 2   Confirmed        35156 non-null  int64         
 3   Deaths           35156 non-null  int64         
 4   Recovered        35156 non-null  int64         
 5   Active           35156 non-null  int64         
 6   New cases        35156 non-null  int64         
 7   New deaths       35156 non-null  int64         
 8   New recovered    35156 non-null  int64         
 9   WHO Region       35156 non-null  object        
 10  Total Confirmed  35156 non-null  int64         
 11  Total Deaths     35156 non-null  int64         
 12  Total Recovered  35156 non-null  int64         
 13  Month            35156 non-null  object        
dtypes: datetime64[ns](1), int64(10), objec

In [6]:
df.head()

Unnamed: 0,Date,Country,Confirmed,Deaths,Recovered,Active,New cases,New deaths,New recovered,WHO Region,Total Confirmed,Total Deaths,Total Recovered,Month
0,2020-01-22,Afghanistan,0,0,0,0,0,0,0,Eastern Mediterranean,0,0,0,1
1,2020-01-22,Albania,0,0,0,0,0,0,0,Europe,0,0,0,1
2,2020-01-22,Algeria,0,0,0,0,0,0,0,Africa,0,0,0,1
3,2020-01-22,Andorra,0,0,0,0,0,0,0,Europe,0,0,0,1
4,2020-01-22,Angola,0,0,0,0,0,0,0,Africa,0,0,0,1


In [7]:
df.isna().sum()

Date               0
Country            0
Confirmed          0
Deaths             0
Recovered          0
Active             0
New cases          0
New deaths         0
New recovered      0
WHO Region         0
Total Confirmed    0
Total Deaths       0
Total Recovered    0
Month              0
dtype: int64

In [8]:
monthly_cases = df.groupby("Month",as_index = False).mean()
monthly_cases.head()

Unnamed: 0,Month,Confirmed,Deaths,Recovered,Active,New cases,New deaths,New recovered,Total Confirmed,Total Deaths,Total Recovered
0,1,20.606417,0.475401,0.448128,19.682888,5.011765,0.104813,0.102139,25.618182,0.580214,0.550267
1,2,306.737415,8.64374,67.060483,231.033192,13.899871,0.502121,7.02471,320.637286,9.14586,74.085193
2,3,1553.012765,68.587373,456.162153,1028.26324,135.598413,7.16612,23.41901,1688.611178,75.753493,479.581163
3,4,11299.761497,768.487344,2842.071301,7689.202852,430.014795,33.908378,145.372906,11729.776292,802.395722,2987.444207
4,5,25011.281525,1657.685699,9121.41418,14232.181646,503.888563,23.961014,275.31016,25515.170088,1681.646714,9396.72434


In [9]:
montly_active_cases = monthly_cases.plot.line(x = "Month",
                                        y = "Active",
                                        xlabel = "Month",
                                        ylabel = "Active Cases",
                                        title = "Active Cases from month Jan 20 to May 20",
                                        plot_data_points=True,
                                        marker = "circle",
                                        plot_data_points_size=5,
                                        line_color = "red",
                                        line_dash = "dotted",
                                        zooming = False,
                                        show_figure = False)

In [10]:
country_monthly_cases = df.groupby(["Country","Month"]).mean().sort_values("Confirmed",ascending = False)

monthly_cases_plot = country_monthly_cases.head(10).plot(kind = "bar",
                        y = "Confirmed",
                        xlabel = "Confirmed Cases in Month",
                        ylabel = "Confirmed Cases",
                        zooming = False,
                        show_figure = False)

In [11]:
country_cases = df.groupby(["Country"],as_index=False).mean()
country_cases["Recovery_rate"] = country_cases["Total Recovered"]/country_cases["Total Confirmed"]
country_cases.head()

Unnamed: 0,Country,Confirmed,Deaths,Recovered,Active,New cases,New deaths,New recovered,Total Confirmed,Total Deaths,Total Recovered,Recovery_rate
0,Afghanistan,10299.946809,261.159574,4245.957447,5792.829787,192.888298,6.75,134.031915,10492.835106,267.909574,4379.989362,0.417427
1,Albania,1046.287234,30.361702,632.324468,383.601064,25.957447,0.765957,14.601064,1072.244681,31.12766,646.925532,0.603338
2,Algeria,6275.292553,414.744681,4020.728723,1839.819149,148.792553,6.18617,100.196809,6424.085106,420.930851,4120.925532,0.641481
3,Andorra,502.148936,28.845745,367.414894,105.888298,4.824468,0.276596,4.271277,506.973404,29.12234,371.68617,0.733147
4,Angola,120.542553,5.734043,34.962766,79.845745,5.053191,0.218085,1.287234,125.595745,5.952128,36.25,0.288624


In [12]:
recovery_rate_plot = country_cases.plot.line(x = "Country",
                  y = "Recovery_rate",
                  xlabel = "Countries",
                ylabel = "Recovery Rate",
                zooming = False,
                plot_data_points=True,
                plot_data_points_size=10,
                title = "Average Recovery Rate for each country",
                show_figure = False)

In [13]:
recovery_rate_plot.plot_width = 900

layout = pandas_bokeh.column(recovery_rate_plot,
                pandas_bokeh.row(montly_active_cases,monthly_cases_plot),
                )
pandas_bokeh.show(layout)

In [14]:
WHO_region = df.groupby(["WHO Region","Month"],as_index = False).mean()
WHO_region.head()

Unnamed: 0,WHO Region,Month,Confirmed,Deaths,Recovered,Active,New cases,New deaths,New recovered,Total Confirmed,Total Deaths,Total Recovered
0,Africa,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Africa,2,0.005029,0.0,0.0,0.005029,0.001437,0.0,0.0,0.006466,0.0,0.0
2,Africa,3,18.614247,0.430108,0.903226,17.280914,2.692204,0.071237,0.149866,21.306452,0.501344,1.053091
3,Africa,4,268.622917,11.761806,74.717361,182.14375,15.051389,0.597917,6.186111,283.674306,12.359722,80.903472
4,Africa,5,1235.401882,36.018817,494.81922,704.563844,51.787634,1.109543,24.709677,1287.189516,37.12836,519.528898


In [15]:
WHO_Jan = WHO_region[WHO_region["Month"] == "01"]
WHO_Feb = WHO_region[WHO_region["Month"] == "02"]
WHO_Mar = WHO_region[WHO_region["Month"] == "03"]
WHO_Apr = WHO_region[WHO_region["Month"] == "04"]
WHO_May = WHO_region[WHO_region["Month"] == "05"]


In [16]:
WHO_Jan_plot = WHO_Jan.plot(x = "WHO Region",
                           y = "Active",
                           xlabel = "Countries in WHO Region",
                           ylabel = "Active Cases in Country",
                           title = "Active Cases in January 2020",
                             plot_data_points=True,
                            plot_data_points_size=10,
                            marker="circle",
                            zooming = False,
                           show_figure = False)
WHO_Feb_plot = WHO_Feb.plot(x = "WHO Region",
                           y = "Active",
                           xlabel = "Countries in WHO Region",
                           ylabel = "Active Cases in Country",
                           title = "Active Cases in February 2020",
                            plot_data_points=True,
                            plot_data_points_size=10,
                            marker="circle",
                            zooming = False,
                           show_figure = False)
WHO_Mar_plot = WHO_Jan.plot(x = "WHO Region",
                           y = "Active",
                           xlabel = "Countries in WHO Region",
                           ylabel = "Active Cases in Country",
                           title = "Active Cases in Mar 2020",
                            plot_data_points=True,
                            plot_data_points_size=10,
                            marker="circle",
                            zooming = False,
                           show_figure = False)
WHO_Apr_plot = WHO_Apr.plot(x = "WHO Region",
                           y = "Active",
                           xlabel = "Countries in WHO Region",
                           ylabel = "Active Cases in Country",
                           title = "Active Cases in April 2020",
                            plot_data_points=True,
                            plot_data_points_size=10,
                            marker="circle",
                            zooming = False,
                           show_figure = False)
WHO_May_plot = WHO_May.plot(x = "WHO Region",
                           y = "Active",
                           xlabel = "Countries in WHO Region",
                           ylabel = "Active Cases in Country",
                           title = "Active Cases in May 2020",
                            plot_data_points=True,
                            plot_data_points_size=10,
                            marker="circle",
                            zooming = False,
                           show_figure = False)

In [17]:
pandas_bokeh.plot_grid([[WHO_Jan_plot, WHO_Feb_plot], 
                        [WHO_Mar_plot, WHO_Apr_plot],
                       [WHO_May_plot]],plot_width=500)