<center><h1 style='color:red'>COVID-19

A novel strain of coronavirus — SARS-CoV-2 — was first detected in December 2019 in Wuhan, a city in China’s Hubei province with a population of 11 million, after an outbreak of pneumonia without an obvious cause. The virus has now spread to over 200 countries and territories across the globe, and was characterised as a pandemic by the World Health Organization (WHO)

**Let's Hope for the best and make sure to Take care of yourself, your familiy members and your loved ones!**

**Disclaimer**

> Data is from scrapped from JHU github repository. Any variation in the data there will also reflect in this notebook

### **Real time Analysis** 
- Results and plots gets updated every time dataset get's updated!

# Libraries

In [None]:
# to load json files
import json
# datetime oprations
from datetime import timedelta
# to get web contents
from urllib.request import urlopen

# for numerical analyiss
import numpy as np
# to store and process data in dataframe
import pandas as pd

# basic visualization package
import matplotlib.pyplot as plt
# advanced ploting
import seaborn as sns
#color patch
import matplotlib.patches as mpatches

# hide warnings
import warnings
warnings.filterwarnings('ignore')

plt.style.use('seaborn')

# Dataset

In [None]:
# Full data
# =========

full_table = pd.read_csv('../input/corona-virus-report/covid_19_clean_complete.csv')
print(full_table.shape)
full_table.head()

In [None]:
# Grouped by day, country
# =======================

full_grouped = pd.read_csv('../input/corona-virus-report/full_grouped.csv')
full_grouped['Date'] = pd.to_datetime(full_grouped['Date'])
full_grouped.head()

In [None]:
# Day wise
# ========

day_wise = pd.read_csv('../input/corona-virus-report/day_wise.csv')
day_wise['Date'] = pd.to_datetime(day_wise['Date'])
day_wise.head()

In [None]:
# Country wise
# ============

country_wise = pd.read_csv('../input/corona-virus-report/country_wise_latest.csv')
country_wise = country_wise.replace('', np.nan).fillna(0)
country_wise.head()

In [None]:
# Worldometer data
# ================

worldometer_data = pd.read_csv('../input/corona-virus-report/worldometer_data.csv')
worldometer_data = worldometer_data.replace('', np.nan).fillna(0)
print(worldometer_data.shape)
worldometer_data.head()

# Latest Score Board

In [None]:
#get values from dataframe
temp = day_wise[['Date','Deaths', 'Recovered', 'Active']].tail(1)
dt = temp.Date.dt.strftime("%d-%m-%y").values
temp = temp.melt(id_vars="Date", value_vars=['Active', 'Deaths', 'Recovered'])
temp.sort_values(by='value', inplace=True, ascending=False)

#fig = plt.treemap(temp, path=["variable"], values="value", height=225)
#fig.data[0].textinfo = 'label+text+value'
#fig.show()
bars = plt.bar(temp.variable, temp.value, color=['green', 'blue', 'red'], alpha=0.7)

#remove ytick labels
plt.tick_params(top=False, bottom=False, left=False, right=False, labelleft=False, labelbottom=True)

# remove the frame of the chart
for spine in plt.gca().spines.values():
    spine.set_visible(False)

# direct label each bar with Y axis values
for bar in bars:
    plt.gca().text(bar.get_x() + bar.get_width()/2, bar.get_height()-5, str(int(bar.get_height()/1000)) + 'K', 
                 ha='center', fontsize=15)
    
plt.xticks(temp.variable.values,fontsize=15)
plt.title('Total Cases till {}'.format(dt[0]), fontsize=17);

# Cases over the time

In [None]:
plt.style.use('fivethirtyeight')
fig, ax = plt.subplots()
plt.bar(day_wise.Date,day_wise.Confirmed,width=0.7,alpha=0.7)

ax.set_yscale("log")
plt.title('Monthly Confirmed Cases');

In [None]:
fig, (ax1,ax2) = plt.subplots(1,2,figsize=(15,5))

ax1.bar(day_wise.Date,day_wise.Active,color='#ff2e63', width=0.7,alpha=0.7)
ax1.set_yscale("log")
ax1.set_title('Monthly Active Cases');

ax2.bar(day_wise.Date,day_wise['New cases'],color='#fe9801', width=0.7,alpha=0.7)
ax2.set_yscale("log")
ax2.set_title('Monthly New Cases');

In [None]:
fig, (ax1,ax2) = plt.subplots(1,2,figsize=(15,5),sharey=True)

ax1.bar(day_wise.Date,day_wise.Deaths,color='red', width=0.7,alpha=0.7)
ax1.set_yscale("log")
ax1.set_title('Monthly Deaths');

ax2.bar(day_wise.Date,day_wise.Recovered,color='green', width=0.7,alpha=0.7)
ax2.set_yscale("log")
ax2.set_title('Monthly Recovered');

In [None]:
fig, (ax1,ax2) = plt.subplots(1,2,figsize=(15,5),sharey=True)

ax1.bar(day_wise.Date,day_wise['New deaths'],color='red', width=0.7,alpha=0.7)
ax1.set_yscale("log")
ax1.set_title('Monthly New Deaths');

ax2.bar(day_wise.Date,day_wise['New recovered'],color='green', width=0.7,alpha=0.7)
ax2.set_yscale("log")
ax2.set_title('Monthly New Recovered');

In [None]:
fig, ax = plt.subplots()
plt.bar(day_wise.Date,day_wise['Deaths / 100 Recovered'],width=0.7,alpha=0.7)

ax.set_yscale("log")
plt.title('Number of Deaths / 100 Recovered');

In [None]:
fig, ax = plt.subplots()
plt.bar(day_wise.Date,day_wise['Recovered / 100 Cases'],width=0.7,alpha=0.7, color='#ff2e63')

ax.set_yscale("log")
plt.title('Number of Recovered / 100 Cases');

In [None]:
fig, ax = plt.subplots()
plt.bar(day_wise.Date,day_wise['No. of countries'],width=0.7,alpha=0.7, color='#fe9801')

ax.set_yscale("log")
plt.title('Number of Countries Affected');

# Top 15 Countries

In [None]:
plt.style.use('default')

temp = country_wise[['Country/Region', 'Confirmed']].sort_values(by='Confirmed', ascending=False)[0:15]
temp.sort_values(by='Confirmed', inplace=True)

bars = plt.barh(temp['Country/Region'], temp['Confirmed'],color=['gray'])
bars[-1].set_color('#D63412')
plt.tick_params(labelbottom=False, bottom=False, top=False)
plt.title('Confirmed Cases in Top15 Countries')
plt.gca().ticklabel_format(useOffset=False, style='plain', axis='x')

# remove the frame of the chart
for spine in plt.gca().spines.values():
    spine.set_visible(False)
    
# direct label each bar with Y axis values
for i, v in enumerate(temp['Confirmed']):
    plt.gca().text(v + 3, i - .25, str(int(v/1000))+'K', color='black', fontsize=10)

In [None]:
temp = country_wise[['Country/Region', 'Active']].sort_values(by='Active', ascending=False)[0:15]
temp.sort_values(by='Active', inplace=True)

bars = plt.barh(temp['Country/Region'], temp['Active'],color=['gray'])
bars[-1].set_color('#D63412')
plt.tick_params(labelbottom=False, bottom=False, top=False)
plt.title('Active Cases in Top15 Countries')
plt.gca().ticklabel_format(useOffset=False, style='plain', axis='x')

# remove the frame of the chart
for spine in plt.gca().spines.values():
    spine.set_visible(False)
    
# direct label each bar with Y axis values
for i, v in enumerate(temp['Active']):
    plt.gca().text(v + 3, i - .25, str(int(v/1000))+'K', color='black', fontsize=10)

In [None]:
temp = country_wise[['Country/Region', 'Deaths']].sort_values(by='Deaths', ascending=False)[0:15]
temp.sort_values(by='Deaths', inplace=True)

bars = plt.barh(temp['Country/Region'], temp['Deaths'],color=['gray'])
bars[-1].set_color('#D63412')
plt.tick_params(labelbottom=False, bottom=False, top=False)
plt.title('Deaths in Top15 Countries')
plt.gca().ticklabel_format(useOffset=False, style='plain', axis='x')

# remove the frame of the chart
for spine in plt.gca().spines.values():
    spine.set_visible(False)
    
# direct label each bar with Y axis values
for i, v in enumerate(temp['Deaths']):
    plt.gca().text(v + 3, i - .25, str(int(v/1000))+'K', color='black', fontsize=10)

In [None]:
temp = country_wise[['Country/Region', 'Recovered']].sort_values(by='Recovered', ascending=False)[0:15]
temp.sort_values(by='Recovered', inplace=True)

bars = plt.barh(temp['Country/Region'], temp['Recovered'],color=['lightgray'])
bars[-1].set_color('darkgreen')
plt.tick_params(labelbottom=False, bottom=False, top=False)
plt.title('Recovered Cases in Top15 Countries')
plt.gca().ticklabel_format(useOffset=False, style='plain', axis='x')

# remove the frame of the chart
for spine in plt.gca().spines.values():
    spine.set_visible(False)
    
# direct label each bar with Y axis values
for i, v in enumerate(temp['Recovered']):
    plt.gca().text(v + 3, i - .25, str(int(v/1000))+'K', color='black', fontsize=10)

In [None]:
dic = {'Americas':'red','South-East Asia':'green','Europe':'blue','Africa':'yellow','Eastern Mediterranean':'gray'}
temp = country_wise[['Country/Region', 'New cases', 'WHO Region']].sort_values(by='New cases', ascending=False)[0:15]
temp.sort_values(by='New cases', inplace=True)
temp['WHO Region']=temp['WHO Region'].map(dic)
temp['WHO Region'].dropna(inplace=True)


bars = plt.barh(temp['Country/Region'], temp['New cases'], color=temp['WHO Region'], alpha=0.8)
plt.tick_params(labelbottom=False, bottom=False, top=False)
plt.title('New cases in Top15 Countries')
plt.gca().ticklabel_format(useOffset=False, style='plain', axis='x')
plt.legend()

# remove the frame of the chart
for spine in plt.gca().spines.values():
    spine.set_visible(False)

# direct label each bar with Y axis values
for i, v in enumerate(temp['New cases']):
    plt.gca().text(v + 3, i - .25, str(int(v/1000))+'K', color='black', fontsize=10)
    
#Add Legend
patchList = []
for key in dic:
        data_key = mpatches.Patch(color=dic[key], label=key)
        patchList.append(data_key)
plt.legend(handles=patchList, loc='best');

In [None]:
temp = country_wise[['Country/Region', 'New recovered', 'WHO Region']].sort_values(by='New recovered', ascending=False)[0:15]
temp.sort_values(by='New recovered', inplace=True)
temp['WHO Region']=temp['WHO Region'].map(dic)
temp['WHO Region'].dropna(inplace=True)

bars = plt.barh(temp['Country/Region'], temp['New recovered'], color=temp['WHO Region'], alpha=0.8)
plt.tick_params(labelbottom=False, bottom=False, top=False)
plt.title('New recovered cases in Top15 Countries')
plt.gca().ticklabel_format(useOffset=False, style='plain', axis='x')
plt.legend()

# remove the frame of the chart
for spine in plt.gca().spines.values():
    spine.set_visible(False)

# direct label each bar with Y axis values
for i, v in enumerate(temp['New recovered']):
    plt.gca().text(v + 3, i - .25, str(int(v/1000))+'K', color='black', fontsize=10)
    
#Add Legend
patchList = []
for key in dic:
        data_key = mpatches.Patch(color=dic[key], label=key)
        patchList.append(data_key)
plt.legend(handles=patchList, loc='best');

# Let's start with Interactive Visualization with the help of plotly

In [None]:
import plotly.express as px

### Iteractive Maps

In [None]:
def plot_map(df,col):
    df = df[df[col]>0]
    fig = px.choropleth(df, locations="Country/Region", locationmode='country names', 
                  color=col, hover_name="Country/Region", 
                  title=col, hover_data=[col], color_continuous_scale="Reds")
    fig.show()

In [None]:
plot_map(country_wise, 'Confirmed')

In [None]:
plot_map(country_wise, 'Deaths')

**Let's see how cases increases over the time!**

In [None]:
#the plot i will be using is same choropleth plot as used in above plots the 
#differnece is i will be updating the plot with colors according to the animation frame!

fig = px.choropleth(full_grouped, locations="Country/Region", 
                    color=np.log(full_grouped["Confirmed"]),
                    locationmode='country names', hover_name="Country/Region", 
                    animation_frame=full_grouped["Date"].dt.strftime('%Y-%m-%d'),
                    title='Cases over time', color_continuous_scale=px.colors.sequential.Reds)
fig.update(layout_coloraxis_showscale=False)
fig.show()

### Interactive plot

We have already plotted number of cases using matplotlib so now i will be plotting same thing but this time by using plotly

In [None]:
def plot_daywise(col, hue):
    fig = px.bar(day_wise, x="Date", y=col, width=700, color_discrete_sequence=[hue])
    fig.update_layout(title=col, xaxis_title="", yaxis_title="")
    fig.show()

In [None]:
plot_daywise('New cases', '#333333')

**We have plotted cases accroding to month, try on your own to plot deaths and active cases interactively by using plotly** 

In [None]:
plot_daywise('New deaths', '#D63412')

We can also plot line graph instead of bar graph

In [None]:
def plot_daywise_line(col, hue):
    fig = px.line(day_wise, x="Date", y=col, width=700, color_discrete_sequence=[hue])
    fig.update_layout(title=col, xaxis_title="", yaxis_title="")
    fig.show()

In [None]:
plot_daywise_line('New deaths', '#D63412')

vow!, this looks nice!

**Let's compare number of deaths vs confimed cases using scatter plot in plotly**

In [None]:
fig = px.scatter(country_wise.sort_values('Deaths', ascending=False).iloc[:20, :], 
                 x='Confirmed', y='Deaths', color='Country/Region', size='Confirmed', 
                 height=700, text='Country/Region', log_x=True, log_y=True, 
                 title='Deaths vs Confirmed (Scale is in log10)')
fig.update_traces(textposition='top center')
fig.update_layout(showlegend=False)
fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()

### Top 10 countries case-report

In [None]:
temp = country_wise.iloc[:,0:5].sort_values(ascending=False, by='Confirmed').set_index('Country/Region')[:10]
temp.style.background_gradient(cmap='Greens')

In [None]:
plt.figure(figsize=(10,5))
plt.title('Confirmed cases in Pie-Chart', size=20)
plt.pie(temp.Confirmed)
plt.legend(temp.index, loc='best', fontsize=8)
plt.show()

#### Region wise dataset

In [None]:
latest_data = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/07-13-2020.csv')
latest_data.head()

### Top20 Region Wise report

In [None]:
latest_data.sort_values(by='Confirmed', ascending=False)\
            .loc[:10,['Country_Region','Province_State','Confirmed','Deaths','Recovered','Active']]\
            .reset_index(drop=True)[:20]\
            .style.background_gradient(cmap='Greens')

In [None]:
temp = latest_data[latest_data.Country_Region == 'US']\
            .loc[:,['Province_State','Confirmed']]\
            .groupby(['Province_State']).sum()\
            .sort_values(by='Confirmed',ascending=False)\
            .iloc[:10]

plt.figure(figsize=(10,5))
plt.title('Confirmed cases in USA', size=20)
plt.pie(temp.Confirmed)
plt.legend(temp.index, loc='best', fontsize=8)
plt.show()

In [None]:
temp = latest_data[latest_data.Country_Region == 'India']\
            .loc[:,['Province_State','Confirmed']]\
            .groupby(['Province_State']).sum()\
            .sort_values(by='Confirmed',ascending=False)\
            .iloc[:10]

plt.figure(figsize=(10,5))
plt.title('Confirmed cases in INDIA', size=20)
plt.pie(temp.Confirmed)
plt.legend(temp.index, loc='best', fontsize=8)
plt.show()

In [None]:
temp = latest_data[latest_data.Country_Region == 'China']\
            .loc[:,['Province_State','Confirmed']]\
            .groupby(['Province_State']).sum()\
            .sort_values(by='Confirmed',ascending=False)\
            .iloc[:10]

plt.figure(figsize=(10,5))
plt.title('Confirmed cases in China', size=20)
plt.pie(temp.Confirmed)
plt.legend(temp.index, loc='best', fontsize=8)
plt.show()

### Some statistics

In [None]:
temp = country_wise.iloc[:,0:5].sort_values(ascending=False, by='Confirmed').set_index('Country/Region')[:10]
temp['Death ratio'] = temp.Deaths/temp.Confirmed
temp['recover ratio'] = temp.Recovered/temp.Confirmed
temp['var'] = 1

In [None]:
temp.sort_values(by='recover ratio',ascending=False,inplace=True)

plt.figure(figsize=(15,5))
plt.bar(temp.index,temp['var'],color='white',edgecolor='black',alpha=0.7)
bars = plt.bar(temp.index,temp['recover ratio'],color='green',alpha=0.6)
for spine in plt.gca().spines.values():
    spine.set_visible(False)
plt.tick_params(top=False, bottom=False, left=False, right=False, labelleft=False, labelbottom=True)
for bar in bars:
    plt.gca().text(bar.get_x() + bar.get_width()/2, bar.get_height()+0.01, str(round(bar.get_height()*100,1)) + '%', 
                 ha='center', color='black', fontsize=11)
plt.title('recover ratio in Top 10 countries');

In [None]:
temp.sort_values(by='Death ratio',ascending=False,inplace=True)

plt.figure(figsize=(15,2.5))
plt.bar(temp.index,temp['var'],color='white',edgecolor='black',alpha=0.7)
bars = plt.bar(temp.index,temp['Death ratio'],color='red',alpha=0.6)
for spine in plt.gca().spines.values():
    spine.set_visible(False)
plt.tick_params(top=False, bottom=False, left=False, right=False, labelleft=False, labelbottom=True)
for bar in bars:
    plt.gca().text(bar.get_x() + bar.get_width()/2, bar.get_height()+0.02, str(round(bar.get_height()*100,2)) + '%', 
                 ha='center', color='black', fontsize=11)
plt.title('Death ratio in Top 10 countries');

<center><h1 style='color:green'>Dn't forget to Upvote, Thanks and Be Safe!