In [19]:
import pandas as pd


# Read the confirmed cases data
data_raw = pd.read_csv(
        "https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series"
        "/time_series_covid19_confirmed_global.csv")
data_raw

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,5/31/21,6/1/21,6/2/21,6/3/21,6/4/21,6/5/21,6/6/21,6/7/21,6/8/21,6/9/21
0,,Afghanistan,33.939110,67.709953,0,0,0,0,0,0,...,71838,72977,74026,75119,76628,77963,79224,80841,82326,84050
1,,Albania,41.153300,20.168300,0,0,0,0,0,0,...,132315,132337,132351,132360,132372,132374,132379,132384,132397,132415
2,,Algeria,28.033900,1.659600,0,0,0,0,0,0,...,128913,129218,129640,129976,130361,130681,130958,131283,131647,132034
3,,Andorra,42.506300,1.521800,0,0,0,0,0,0,...,13727,13729,13744,13752,13758,13758,13758,13777,13781,13791
4,,Angola,-11.202700,17.873900,0,0,0,0,0,0,...,34551,34752,34960,35140,35307,35594,35772,35854,36004,36115
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
271,,Vietnam,14.058324,108.277199,0,2,2,2,2,2,...,7432,7625,7870,8063,8364,8580,8791,9027,9222,9635
272,,West Bank and Gaza,31.952200,35.233200,0,0,0,0,0,0,...,308350,308732,309036,309333,309644,309853,310026,310026,310544,310544
273,,Yemen,15.552727,48.516388,0,0,0,0,0,0,...,6742,6751,6759,6767,6773,6780,6787,6812,6823,6832
274,,Zambia,-13.133897,27.849332,0,0,0,0,0,0,...,95263,95821,96563,97388,98376,99540,100278,100765,101996,103763


In [20]:
# Melt the dateframe and set index
def clean_data(df_raw):
    df_cleaned = df_raw.melt(id_vars=['Province/State','Country/Region','Lat','Long'],value_name='Cases',var_name='Date')
    df_cleaned = df_cleaned.set_index(['Country/Region','Province/State','Date'])
    return df_cleaned 

# Clean all datasets
data_cleaned = clean_data(data_raw)
data_cleaned

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Lat,Long,Cases
Country/Region,Province/State,Date,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Afghanistan,,1/22/20,33.939110,67.709953,0
Albania,,1/22/20,41.153300,20.168300,0
Algeria,,1/22/20,28.033900,1.659600,0
Andorra,,1/22/20,42.506300,1.521800,0
Angola,,1/22/20,-11.202700,17.873900,0
...,...,...,...,...,...
Vietnam,,6/9/21,14.058324,108.277199,9635
West Bank and Gaza,,6/9/21,31.952200,35.233200,310544
Yemen,,6/9/21,15.552727,48.516388,6832
Zambia,,6/9/21,-13.133897,27.849332,103763


In [21]:
# Get countrywide data (rather than by region)
def country_data(df_cleaned):
    df = df_cleaned.groupby(['Country/Region','Date'])['Cases'].sum().reset_index()
    df = df.set_index(['Country/Region','Date'])
    df.index = df.index.set_levels([df.index.levels[0], pd.to_datetime(df.index.levels[1])])
    df = df.sort_values(['Country/Region','Date'],ascending=True)
    return df
  
country_data = country_data(data_cleaned)
country_data

Unnamed: 0_level_0,Unnamed: 1_level_0,Cases
Country/Region,Date,Unnamed: 2_level_1
Afghanistan,2020-01-22,0
Afghanistan,2020-01-23,0
Afghanistan,2020-01-24,0
Afghanistan,2020-01-25,0
Afghanistan,2020-01-26,0
...,...,...
Zimbabwe,2021-06-05,39168
Zimbabwe,2021-06-06,39189
Zimbabwe,2021-06-07,39238
Zimbabwe,2021-06-08,39321


In [22]:
total_cases_country = country_data.max(level=0)['Cases'].reset_index().set_index('Country/Region')

total_cases_country.sort_values(by='Cases',ascending=False, inplace=True)

data_top_10_country = total_cases_country.head(10)

data_top_10_country

Unnamed: 0_level_0,Cases
Country/Region,Unnamed: 1_level_1
US,33414025
India,29089069
Brazil,17122877
France,5978650
Turkey,5306690
Russia,5096657
United Kingdom,4551694
Italy,4237790
Argentina,4038528
Germany,3715870


In [23]:
# Generate list of top 10 countries
country_list = data_top_10_country.index.tolist()

In [24]:
# Define the start as the first day more than 100 cases were reported
# Create dict with country names and the data.

top_10_growth = {}

for country in country_list:
    country_first_case = country_data.loc[country]['Cases'].reset_index().set_index('Date')
    country_growth = country_first_case[country_first_case.gt(99)].dropna().reset_index()
    top_10_growth[country] = country_growth

top_10_growth

{'US':           Date       Cases
 0   2020-03-04       107.0
 1   2020-03-05       184.0
 2   2020-03-06       237.0
 3   2020-03-07       403.0
 4   2020-03-08       519.0
 ..         ...         ...
 458 2021-06-05  33357205.0
 459 2021-06-06  33362600.0
 460 2021-06-07  33378096.0
 461 2021-06-08  33393246.0
 462 2021-06-09  33414025.0
 
 [463 rows x 2 columns],
 'India':           Date       Cases
 0   2020-03-14       102.0
 1   2020-03-15       113.0
 2   2020-03-16       119.0
 3   2020-03-17       142.0
 4   2020-03-18       156.0
 ..         ...         ...
 448 2021-06-05  28809339.0
 449 2021-06-06  28909975.0
 450 2021-06-07  28996473.0
 451 2021-06-08  29089069.0
 452 2021-06-09  29089069.0
 
 [453 rows x 2 columns],
 'Brazil':           Date       Cases
 0   2020-03-13       151.0
 1   2020-03-14       151.0
 2   2020-03-15       162.0
 3   2020-03-16       200.0
 4   2020-03-17       321.0
 ..         ...         ...
 449 2021-06-05  16907425.0
 450 2021-06-06  16947062

In [25]:
import plotly.graph_objects as go
import plotly.express as px


fig = go.Figure()

for country in country_list:
    country_first_case = country_data.loc[country]['Cases'].reset_index().set_index('Date')
    country_growth = country_first_case[country_first_case.gt(99)].dropna().reset_index()
    fig.add_trace(go.Scatter(x=country_growth.index, y=country_growth['Cases'], mode='lines', name=country))
    length = len(country_growth.index) - 1
    fig.add_annotation(
        x=length,
        y=country_growth.at[(length),'Cases'],
        text=country,
        showarrow=False,
        xshift=10)

fig.show()

In [26]:
fig.update_layout(
    title="COVID-19 cumulative number of cases",
    xaxis_title="Day after first 100 cases",
    yaxis_title="Number of cases",
    xaxis=dict(
        showline=True,
        showgrid=False,
        showticklabels=True,
        linecolor='rgb(204, 204, 204)',
        linewidth=2,
        ticks='outside',
        tickfont=dict(
            family='Arial',
            size=12,
            color='rgb(82, 82, 82)',
        ),
    ),
    yaxis=dict(
        showgrid=False,
        zeroline=False,
        showline=True,
        showticklabels=True,
        linecolor='rgb(204, 204, 204)',
        linewidth=2,
        ticks='outside',
        tickfont=dict(
            family='Arial',
            size=12,
            color='rgb(82, 82, 82)',
        ),
    ),
    autosize=True,
    margin=dict(
        autoexpand=False,
        l=100,
        r=100,
        t=110,
    ),
    showlegend=False,
    plot_bgcolor='white'
)