In [1]:
import os
import pandas as pd
from IPython.display import Javascript
from IPython.core.display import display
from IPython.core.display import HTML
import plotly.express as px
import pycountry as pyc
import pycountry_convert as pc
import re
import plotly.graph_objects as go
import plotly.io as pio


pio.templates.default = "plotly_white"

In [2]:
os.listdir('Data')

['Country_wise_data.csv',
 'time_series_covid19_confirmed_global.csv',
 'time_series_covid19_deaths_global.csv',
 'time_series_covid19_recovered_global.csv']

In [3]:
df_cg= pd.read_csv('Data/time_series_covid19_confirmed_global.csv')
df_dg= pd.read_csv('Data/time_series_covid19_deaths_global.csv')
df_rg= pd.read_csv('Data/time_series_covid19_recovered_global.csv')
df_country_wise= pd.read_csv('Data/Country_wise_data.csv')

In [4]:
print(f'Worldwide Confirmed case file has {df_cg.shape[0]} rows and {df_cg.shape[1]} columns')
print(f'Worldwide deaths case file has {df_dg.shape[0]} rows and {df_dg.shape[1]} columns')
print(f'Worldwide recovered cases file has {df_rg.shape[0]} rows and {df_rg.shape[1]} columns')

Worldwide Confirmed case file has 267 rows and 275 columns
Worldwide deaths case file has 267 rows and 275 columns
Worldwide recovered cases file has 254 rows and 275 columns


In [5]:
def check_na(df):
    return df.isna().sum()[df.isna().sum()>0]

print(f'Worldwide Confirmed case file column "{check_na(df_cg).index[0]}" has {check_na(df_cg).values[0]} empty values')
print(f'Worldwide deaths case column "{check_na(df_dg).index[0]}" has {check_na(df_dg).values[0]} empty values')
print(f'Worldwide recovered cases file column "{check_na(df_rg).index[0]}" has {check_na(df_rg).values[0]} empty values')

Worldwide Confirmed case file column "Province/State" has 186 empty values
Worldwide deaths case column "Province/State" has 186 empty values
Worldwide recovered cases file column "Province/State" has 187 empty values


In [6]:
dates= df_cg.columns[4:]
dates_cg= df_cg.columns[4:]
info_cg=list(df_cg.columns[:4])
info_cg.append('total')
df_cg_temp= df_cg.copy()
df_cg_temp['total']= df_cg[dates_cg].sum(axis=1)+1

dates_dg= df_dg.columns[4:]
info_dg=list(df_dg.columns[:4])
info_dg.append('total')
df_dg_temp= df_dg.copy()
df_dg_temp['total']= df_dg[dates_dg].sum(axis=1)+1

dates_rg= df_rg.columns[4:]
info_rg=list(df_rg.columns[:4])
info_rg.append('total')
df_rg_temp= df_rg.copy()
df_rg_temp['total']= df_rg[dates_rg].sum(axis=1)+1

##  Question 1: what is the current  Situation of Covid 19 around the globe ?

In [7]:
df_cg_temp= df_cg_temp[info_cg].sort_values(by='total',ascending=False)
df_cg_temp[info_cg].head().style.background_gradient(subset='total',cmap='PuBu')

Unnamed: 0,Province/State,Country/Region,Lat,Long,total
243,,US,40.0,-100.0,746265153
30,,Brazil,-14.235,-51.9253,425648502
143,,India,20.593684,78.96288,407691001
208,,Russia,61.52401,105.318756,132516562
202,,Peru,-9.19,-75.0152,74471114


In [8]:
# parameters= df,
# path(col names representing the manner in which the layers will be made ex: world,continent,country
# ),values(single value reperesenting size)
# fig = px.treemap(
#     names = ['A','B','C'],
#     parents = ['','A','A'],
#     values=[10,12,4]
# )
# fig.show()

In [9]:
def change_error_countries(df):
    df['Country/Region']= df['Country/Region'].str.replace(r'(Korea, South)','Korea')
    df['Country/Region']= df['Country/Region'].str.replace(r'(Congo \(Kinshasa\))','Congo')
    df['Country/Region']= df['Country/Region'].str.replace(r'(West Bank and Gaza)','Palestine')
    df['Country/Region']= df['Country/Region'].str.replace(r'(Burma)','Myanmar')
    df['Country/Region']= df['Country/Region'].str.replace(r'(Congo \(Brazzaville\))','Congo')
    df['Country/Region']= df['Country/Region'].str.replace(r'(Diamond Princess)','London')
    df['Country/Region']= df['Country/Region'].str.replace(r'(Taiwan\*)','Taiwan')
    df['Country/Region']= df['Country/Region'].str.replace(r'(Timor-Leste)','Indonesia')
    df['Country/Region']= df['Country/Region'].str.replace(r'(Laos)','Thailand')
    df['Country/Region']= df['Country/Region'].str.replace(r'(Holy See)','Rome')
    df['Country/Region']= df['Country/Region'].str.replace(r'(Western Sahara)','Africa')
    df['Country/Region']= df['Country/Region'].str.replace(r'(MS Zaandam)','Italy')
    return df
df_cg_temp= change_error_countries(df_cg_temp)
df_dg_temp= change_error_countries(df_dg_temp)
df_rg_temp= change_error_countries(df_rg_temp)
df_cg= change_error_countries(df_cg)
df_dg= change_error_countries(df_dg)
df_rg= change_error_countries(df_rg)

In [10]:
def find_continent(country):
    continent=None
    alpha2= pyc.countries.search_fuzzy(f'{str(country)}')[0].alpha_2
    continent_code= pc.country_alpha2_to_continent_code(alpha2)
    continent= pc.convert_continent_code_to_continent_name(continent_code)

    return continent

df_cg_temp['Continent']= df_cg_temp['Country/Region'].apply(find_continent)
df_dg_temp['Continent']= df_dg_temp['Country/Region'].apply(find_continent)
df_rg_temp['Continent']= df_rg_temp['Country/Region'].apply(find_continent)

KeyboardInterrupt: 

<span style= 'color:brown'>In this section, I'll have a look at how COVID19 has been growing throughout the world from 22nd january 2020. I'll be using tree maps to show the share of COVID19 Cases worldwide and chloropleth maps with a time slider to show the daily impact of virus.<span>

In [None]:
fig= px.treemap(
df_cg_temp,
path=['Continent','Country/Region'],
values='total',
color='total',
color_continuous_scale='iceFire',
title='Current share of Worldwide Confirmed global COVID19 Cases'
)
fig.update_layout(
font=dict(
        family="Comic Sans MS",
        size=13,
        color="RebeccaPurple"
    ))
fig.show()

<span style= 'color:brown'>United States has the most number of Covid 19 confirmed cases, followed by India and Brazil.
It is worth praising as for Russia and Africa , the geographical territory is vast and they have kept the cases at bay.<span>

In [None]:
fig= px.treemap(
df_dg_temp,
path=['Continent','Country/Region'],
values='total',
color='total',
color_continuous_scale='reds',
title='Current share of Worldwide COVID19 Deaths'
)
fig.update_layout(
font=dict(
        family="Comic Sans MS",
        size=13,
        color="RebeccaPurple"
    ))
fig.show()

<span style='color:brown'>When it comes to Death United States is still leading the Band as a concequence of Donal Trump decision to open up the economy with restriction. 
Brazil follows.<span>

In [None]:
fig= px.treemap(
df_rg_temp,
path=['Continent','Country/Region'],
values='total',
color='total',
color_continuous_scale='greens',
title='Current share of Worldwide COVID19 Recovery'
)
fig.update_layout(
font=dict(
        family="Comic Sans MS",
        size=13,
        color="RebeccaPurple"
    ))
fig.show()

<span style='color:green'>When it comes to recovery of Confirmed cases India and Brazil are leading. It is worth praising for India where the recovery rate is extremely High.
As for Brazil the recovery rate is high but the death rate levels the number.<span>

<U><span style="color:red">**Confirmed Cases and Fatalities are cummulative sums of all the previous days. In order to understand the daily trend, I'll create a column for daily cases and deaths that will be the difference between the current value and the previous day's value.**</span><U>

In [None]:
print(df_cg[dates_cg].shape)
print(df_dg[dates_dg].shape)
print(df_rg[dates_rg].shape)

In [None]:
# df_cg= df_cg.sort_values(by='Country/Region')
# df_dg= df_dg.sort_values(by='Country/Region')
# df_rg= df_rg.sort_values(by='Country/Region')

In [None]:
recovery= df_cg[dates].copy()
recovery.index= df_cg['Country/Region']
recovery

In [None]:
deaths= df_dg[dates].copy()
deaths.index= df_dg['Country/Region']
deaths

In [None]:
confirmed= df_rg[dates].copy()
confirmed.index= df_rg['Country/Region']
confirmed

In [None]:
alpha= recovery.sum()
bravo= deaths.sum()
charlie= confirmed.sum()
df= pd.concat([alpha,bravo,charlie],axis=1).reset_index()

In [None]:
df.columns= ['Date','Recovery','deaths','confirmed']

In [None]:
df['Date']= pd.to_datetime(df['Date'])
df

In [None]:
lockdown_ended= str(pd.to_datetime('2020-05-30').date())

fig= go.Figure(
)
fig.add_trace(
go.Scatter(
    x= df['Date'],
    y= df['Recovery'],
    marker={'color':'green'},
    name='Recovery Rate'
)
)

fig.add_annotation(
    x=df['Date'].iloc[-1],
    y= df['Recovery'].iloc[-1],
    text= 'on {}, <b>{} </b> cases'.format(str(df['Date'].iloc[-1].date()),str(df['Recovery'].iloc[-1])),
    bgcolor='white',
    font={'color':'green'}
)


fig.add_trace(
go.Scatter(
    x= df['Date'],
    y= df['deaths'],
    name='Death Rate'

)
)
fig.add_annotation(
    x=df['Date'].iloc[-1],
    y= df['deaths'].iloc[-1],
    text= 'on {}, <b>{}</b> cases'.format(str(df['Date'].iloc[-1].date()),str(df['deaths'].iloc[-1])),
    bgcolor='white',
    font={'color':'red'}
    
)


fig.add_trace(
go.Scatter(
    x= df['Date'],
    y= df['confirmed'],
    marker={'color':'blue'},
    mode='lines',
    name='Confirmed case Rate'
)
)
fig.add_annotation(
    x=df['Date'].iloc[-1],
    y= df['confirmed'].iloc[-1],
    text= 'on {}, <b>{}</b> cases'.format(str(df['Date'].iloc[-1].date()),str(df['confirmed'].iloc[-1])),
    bgcolor='white'
)
fig.update_layout(
title=' Comparision of Confirmed cases, Recovery rate and deaths of Covid 19 ',
yaxis_title='Cases',
xaxis_title='Months',
xaxis=dict(tickmode='array',
      tickvals=['2020-03-22','2020-04-22','2020-05-22','2020-06-22','2020-07-22','2020-08-22',
               '2020-09-22','2020-10-22'],
      ticktext=['March','April','May','June','July','August','September','October']),
font=dict(
        family="Comic Sans MS",
        size=13,
        color="RebeccaPurple"
    )
)
fig.add_annotation(
    x= lockdown_ended,
    y= 8351,
    ay=-80,
    ax=1,
    text=' India began to reopen here'
)

fig.show()

India is also known for it's population and when it ended it's lockdown, it led to a major surge in Positive cases and which led to spike in recovery cases too.

In [None]:
recovery_tmp= recovery.copy()
r=recovery_tmp.reset_index()
recovery_tmp= r.groupby('Country/Region').sum().copy()
recent_date= str(pd.to_datetime(recovery_tmp.columns[-1]).date())
top6= recovery_tmp.sort_values(by=recovery_tmp.columns[-1])[-6:][recovery_tmp.columns[-1]]
dates= pd.to_datetime(recovery_tmp.columns)
fig= go.Figure(
    layout=dict(showlegend=False)
)
for i in range(len(recovery_tmp)):
    fig.add_trace(
        go.Scatter(
            x= dates,
            y= recovery_tmp.iloc[i].values,
            text= recovery_tmp.index[i],
            name=recovery_tmp.index[i]
        )
    )
for i in range(5):
    fig.add_annotation(
        x= recent_date,
        y= top6[-i],
        text= top6.index[-i],
        showarrow=False
    )
fig.add_annotation(
    x= lockdown_ended,
    y= 8351,
    ay=-80,
     ax=1,
    text=' India begins to reopen here'
)
fig.update_layout(
    title=' Comparision of India with recovery rate in different countries',
    xaxis_title=' Cases',
    yaxis_title=' Months'
)
font=dict(
        family="Comic Sans MS",
        size=13,
        color="RebeccaPurple"
    )

fig.show()


India Ended its major lockdown on 31st May 2020 and we can see a major spike in cases after that

In [None]:
deaths_tmp= deaths.sort_values(by='10/18/20',ascending=False)[:5].copy()
spike_day= str(pd.to_datetime('2020-06-14').date())
r=deaths_tmp.reset_index()
deaths_tmp= r.groupby('Country/Region').sum().copy()
recent_date= str(pd.to_datetime(deaths_tmp.columns[-1]).date())
top6= deaths_tmp.sort_values(by=deaths_tmp.columns[-1])[-6:][deaths_tmp.columns[-1]]
dates= pd.to_datetime(deaths_tmp.columns)
fig= go.Figure(layout=dict(showlegend=False))
for i in range(len(deaths_tmp)):
    fig.add_trace(
        go.Scatter(
            x= dates,
            y= deaths_tmp.iloc[i].values,
            text= deaths_tmp.index[i],
            name=deaths_tmp.index[i]
        )
    )
for i in range(5):
    fig.add_annotation(
        x= recent_date,
        y= top6[-i],
        text= top6.index[-i],
        showarrow=False
    )
fig.add_annotation(
    x= lockdown_ended,
    y= 5185,
    ay=-60,
     ax=1,
    text=' India begins to reopen here'
)

fig.add_annotation(
    arrowsize=2,
    arrowhead=1,
    x= spike_day,
    y= 12237,
    ay=-100,
     ax=-10,
    text='<b>The Surge in cases, approx(3k)</b>'
)
fig.update_layout(
    title=' Comparision of India with Death rate in different countries',
    xaxis_title=' Cases',
    yaxis_title=' Months'
)
font=dict(
        family="Comic Sans MS",
        size=13,
        color="RebeccaPurple"
    )

fig.show()



As India began to reopen it's economy on 31st may 2020, it led to a surge in cases and death rate after 2-3 weeks

In [None]:
confirmed_tmp= confirmed.sort_values(by='10/18/20',ascending=False)[:5].copy()
r=confirmed_tmp.reset_index()
confirmed_tmp= r.groupby('Country/Region').sum().copy()
recent_date= str(pd.to_datetime(confirmed_tmp.columns[-1]).date())
top6= confirmed_tmp.sort_values(by=confirmed_tmp.columns[-1])[-6:][confirmed_tmp.columns[-1]]
dates= pd.to_datetime(confirmed_tmp.columns)
fig= go.Figure(layout=dict(showlegend=False))
for i in range(len(confirmed_tmp)):
    fig.add_trace(
        go.Scatter(
            x= dates,
            y= confirmed_tmp.iloc[i].values,
            text= confirmed_tmp.index[i],
            name=confirmed_tmp.index[i]
        )
    )
for i in range(5):
    fig.add_annotation(
        x= recent_date,
        y= top6[-i],
        text= top6.index[-i],
        showarrow=False
    )

fig.update_layout(
    title=' Comparision of s cases arriving in different countries',
    xaxis_title=' Cases',
    yaxis_title=' Months'
)
fig.add_annotation(
    x= lockdown_ended,
    y= 8351,
    ay=-60,
     ax=1,
    text=' India begins to reopen here'
)
font=dict(
        family="Comic Sans MS",
        size=13,
        color="RebeccaPurple"
    )

fig.show()



In [None]:
data = go.Scattergeo(
        lon = df_cg['Long'],
        lat = df_cg['Lat'],
        mode = 'markers',
        marker = dict(symbol = 'star',size=5,colorscale = 'Reds'),
        text = df_cg['10/13/20'],
        )
fig = go.Figure(data = [data])
fig.show()

In [None]:
df_country_wise.head()