In [22]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [23]:
df = pd.read_csv("data/data20_q1/data.csv")
df.head()

Unnamed: 0,Country,Date,Confirmed,Recovered,Deaths,Active
0,Germany,2020-01-22,0,0,0,0
1,Germany,2020-01-23,0,0,0,0
2,Germany,2020-01-24,0,0,0,0
3,Germany,2020-01-25,0,0,0,0
4,Germany,2020-01-26,0,0,0,0


In [24]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6800 entries, 0 to 6799
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Country    6800 non-null   object
 1   Date       6800 non-null   object
 2   Confirmed  6800 non-null   int64 
 3   Recovered  6800 non-null   int64 
 4   Deaths     6800 non-null   int64 
 5   Active     6800 non-null   int64 
dtypes: int64(4), object(2)
memory usage: 318.9+ KB


In [25]:
df["Date"] = pd.to_datetime(df["Date"])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6800 entries, 0 to 6799
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Country    6800 non-null   object        
 1   Date       6800 non-null   datetime64[ns]
 2   Confirmed  6800 non-null   int64         
 3   Recovered  6800 non-null   int64         
 4   Deaths     6800 non-null   int64         
 5   Active     6800 non-null   int64         
dtypes: datetime64[ns](1), int64(4), object(1)
memory usage: 318.9+ KB


In [26]:
df.describe()

Unnamed: 0,Confirmed,Recovered,Deaths,Active
count,6800.0,6800.0,6800.0,6800.0
mean,9773762.0,1325311.0,146963.7,8301487.0
std,14889550.0,3718714.0,198627.7,14890440.0
min,0.0,0.0,0.0,0.0
25%,470080.5,0.0,28439.75,99547.25
50%,4261185.0,70568.5,87037.5,997678.5
75%,10822620.0,561501.8,139289.0,8578785.0
max,83237590.0,30974750.0,1002020.0,82235570.0


In [27]:
df.corr()

Unnamed: 0,Confirmed,Recovered,Deaths,Active
Confirmed,1.0,0.074962,0.953456,0.968501
Recovered,0.074962,1.0,0.067141,-0.175676
Deaths,0.953456,0.067141,1.0,0.923292
Active,0.968501,-0.175676,0.923292,1.0


In [28]:
period = (df['Date'] <= "2020-05-20")
df_period = df.loc[period]

In [29]:
data_by_date = df_period.groupby("Date")[["Confirmed", "Recovered", "Deaths", "Active"]].sum()
data_by_date

Unnamed: 0_level_0,Confirmed,Recovered,Deaths,Active
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-01-22,1,0,0,1
2020-01-23,1,0,0,1
2020-01-24,4,0,0,4
2020-01-25,5,0,0,5
2020-01-26,8,0,0,8
...,...,...,...,...
2020-05-16,2788725,954901,196352,1637472
2020-05-17,2824949,971935,198135,1654879
2020-05-18,2864072,997356,200003,1666713
2020-05-19,2903257,1017430,201823,1684004


## Trend of Recovered, Confirmed, Death, and Active COVID-19 Cases

In [30]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=data_by_date.index, y=data_by_date["Recovered"],
                         mode='lines+markers',
                         name='Recovered Cases'))
fig.add_trace(go.Scatter(x=data_by_date.index, y=data_by_date["Confirmed"],
                         mode='lines+markers',
                         name='Confirmed Cases'))
fig.add_trace(go.Scatter(x=data_by_date.index, y=data_by_date["Deaths"],
                         mode='lines+markers',
                         name='Death Cases'))
fig.add_trace(go.Scatter(x=data_by_date.index, y=data_by_date["Active"],
                         mode='lines+markers',
                         name='Active Cases'))

fig.update_layout(title="Trend of different types of cases", title_x=0.5, width=1700, height=600, legend=dict(x=0, y=1, traceorder="normal"))
fig.update_xaxes(title_text="Date")
fig.update_yaxes(title_text="Number of Cases")
# to export static image, use this:
# fig.write_image("location/image_name.png")

fig.show()

## Trend of Recovered, Confirmed, Death, and Active COVID-19 Cases by Countries

In [31]:
def get_df_by_country(country):
    country_data = df_period.loc[df_period["Country"] == country]
    return country_data.groupby(["Date"])[["Confirmed", "Recovered", "Deaths", "Active"]].sum()

In [32]:
data_de_period = get_df_by_country("Germany")
data_es_period = get_df_by_country("Spain")
data_fr_period = get_df_by_country("France")
data_in_period = get_df_by_country("India")
data_it_period = get_df_by_country("Italy")
data_ru_period = get_df_by_country("Russia")
data_tr_period = get_df_by_country("Turkey")
data_us_period = get_df_by_country("United States")

In [33]:
def plot_by_case(case_type):
    fig = go.Figure()

    fig.add_trace(
    go.Scatter(x=data_de_period.index, y=data_de_period[case_type], name="Germany")
    )
    fig.add_trace(
        go.Scatter(x=data_es_period.index, y=data_es_period[case_type], name="Spain")
    )
    fig.add_trace(
        go.Scatter(x=data_fr_period.index, y=data_fr_period[case_type], name="France")
    )
    fig.add_trace(
        go.Scatter(x=data_in_period.index, y=data_in_period[case_type], name="India")
    )
    fig.add_trace(
        go.Scatter(x=data_it_period.index, y=data_it_period[case_type], name="Italy")
    )
    fig.add_trace(
        go.Scatter(x=data_ru_period.index, y=data_ru_period[case_type], name="Russia")
    )
    fig.add_trace(
        go.Scatter(x=data_tr_period.index, y=data_tr_period[case_type], name="Turkey")
    )
    fig.add_trace(
        go.Scatter(x=data_us_period.index, y=data_us_period[case_type], name="United States")
    )

    fig.update_layout(title="Trend of %s Cases" % case_type, title_x=0.5, width=1700, height=600, legend=dict(x=0, y=1, traceorder="normal"))
    fig.update_xaxes(title_text="Date")
    fig.update_yaxes(title_text="Number of {case_type} Cases (Million)")

    fig.show()

In [34]:
plot_by_case("Recovered")
plot_by_case("Confirmed")
plot_by_case("Deaths")
plot_by_case("Active")

## Mortality and Recovery rate of COVID-19 cases

Mortality Rate = (Number of Death Cases / Number of Confirmed Cases) x 100
Recovery Rate= (Number of Recovered Cases / Number of Confirmed Cases) x 100

In [35]:
data_by_date["Recovery Rate"] = (data_by_date["Recovered"] / data_by_date["Confirmed"]) * 100
data_by_date["Mortality Rate"] = (data_by_date["Deaths"] / data_by_date["Confirmed"]) * 100
data_by_date["Closed Cases"] = data_by_date["Recovered"] + data_by_date["Deaths"]

print("Average Recovery Rate", data_by_date["Recovery Rate"].mean())
print("Median Recovery Rate", data_by_date["Recovery Rate"].median())
print()
print("Average Mortality Rate", data_by_date["Mortality Rate"].mean())
print("Median Mortality Rate", data_by_date["Mortality Rate"].median())

Average Recovery Rate 15.991615964821746
Median Recovery Rate 14.021325875719302

Average Mortality Rate 4.4411743863277575
Median Mortality Rate 5.333785976766896


In [36]:
fig = make_subplots(rows=2, cols=1, subplot_titles=("Recovery Rate", "Mortality Rate"))

fig.add_trace(
    go.Scatter(x=data_by_date.index, y=data_by_date["Recovery Rate"], name="Recovery Rate"), row=1, col=1
)
fig.add_trace(
    go.Scatter(x=data_by_date.index, y=data_by_date["Mortality Rate"], name="Mortality Rate"), row=2, col=1
)

fig.update_layout(width=1700, height=960, legend=dict(x=0, y=1, traceorder="normal"))
fig.update_xaxes(title_text="Date", row=1, col=1)
fig.update_yaxes(title_text="Recovery Rate", row=1, col=1)
fig.update_xaxes(title_text="Date", row=2, col=1)
fig.update_yaxes(title_text="Mortality Rate", row=2, col=1)

fig.show()

## Recovery and Mortality Rate of Covid-19 Cases by Country

In [37]:
def get_recovery_rate(data):
    return (data["Recovered"] / data["Confirmed"]) * 100

def get_mortality_rate(data):
    return (data["Deaths"] / data["Confirmed"]) * 100

In [38]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(x=data_de_period.index, y=get_recovery_rate(data_de_period), name="Germany")
)
fig.add_trace(
    go.Scatter(x=data_es_period.index, y=get_recovery_rate(data_es_period), name="Spain")
)
fig.add_trace(
    go.Scatter(x=data_fr_period.index, y=get_recovery_rate(data_fr_period), name="France")
)
fig.add_trace(
    go.Scatter(x=data_in_period.index, y=get_recovery_rate(data_in_period), name="India")
)
fig.add_trace(
    go.Scatter(x=data_it_period.index, y=get_recovery_rate(data_it_period), name="Italy")
)
fig.add_trace(
    go.Scatter(x=data_ru_period.index, y=get_recovery_rate(data_ru_period), name="Russia")
)
fig.add_trace(
    go.Scatter(x=data_tr_period.index, y=get_recovery_rate(data_tr_period), name="Turkey")
)
fig.add_trace(
    go.Scatter(x=data_us_period.index, y=get_recovery_rate(data_us_period), name="United States")
)

fig.update_layout(title="Recovery Rate by Country", title_x=0.5, width=1700, height=600, legend=dict(x=0, y=1, traceorder="normal"))
fig.update_xaxes(title_text="Date")
fig.update_yaxes(title_text="Recovery Rate")

fig.show()

In [39]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(x=data_de_period.index, y=get_mortality_rate(data_de_period), name="Germany")
)
fig.add_trace(
    go.Scatter(x=data_es_period.index, y=get_mortality_rate(data_es_period), name="Spain")
)
fig.add_trace(
    go.Scatter(x=data_fr_period.index, y=get_mortality_rate(data_fr_period), name="France")
)
fig.add_trace(
    go.Scatter(x=data_in_period.index, y=get_mortality_rate(data_in_period), name="India")
)
fig.add_trace(
    go.Scatter(x=data_it_period.index, y=get_mortality_rate(data_it_period), name="Italy")
)
fig.add_trace(
    go.Scatter(x=data_ru_period.index, y=get_mortality_rate(data_ru_period), name="Russia")
)
fig.add_trace(
    go.Scatter(x=data_tr_period.index, y=get_mortality_rate(data_tr_period), name="Turkey")
)
fig.add_trace(
    go.Scatter(x=data_us_period.index, y=get_mortality_rate(data_us_period), name="United States")
)

fig.update_layout(title="Mortality Rate by Country", title_x=0.5, width=1700, height=600, legend=dict(x=0, y=1, traceorder="normal"))
fig.update_xaxes(title_text="Date")
fig.update_yaxes(title_text="Recovery Rate")

fig.show()