
Coronavirus disease 2019 (COVID-19) is an infectious disease caused by severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2),virus closely related to the SARS virus.

The virus first emerged in Wuhan, Hubei, China and is the cause of the 2019–20 coronavirus pandemic.

While the majority of cases result in mild symptoms,some progress to pneumonia and multi-organ failure.

The infection is spread from one person to others via respiratory droplets produced from the airways, often during coughing. Hand washing, maintaining distance from other people, and not touching one's face with unwashed hands are measures recommended to prevent the disease.It is also recommended to cover one's nose and mouth with a tissue or a bent elbow when coughing.

Masks are recommended for those who suspect they have the virus and for those who are taking care of someone with a suspected infection, but they are not recommended for the general public.

There is no vaccine or specific antiviral treatment.Management involves treatment of symptoms, supportive care, and experimental measures.

Source: https://en.wikipedia.org/wiki/Coronavirus_disease_2019

In this Notebook,we shall try to use various visualization Techniques to find out inferences about the pandemic.



**Please,visit this [link](https://www.iban.com/country-codes) to identify countries through ISO2 and ISO3 country naming convention which has been used in this notebook to avoid any confusion. **

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

**Loading appropriate Packages and Data Preprocessing.**

In [None]:
!pip install pycountry-convert
!pip install country_converter --upgrade


In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px 
import pycountry
import pycountry_convert as pc
import plotly.graph_objects as go
from Bio import SeqIO
from Bio import pairwise2
from Bio.pairwise2 import format_alignment
from Bio import Phylo
from Bio.SeqUtils import GC
from functools import lru_cache
import warnings
warnings.filterwarnings('ignore')
from Bio.SeqUtils import GC_skew,molecular_weight
import pylab
import country_converter as coco
from plotly.subplots import make_subplots


In [None]:
import pandas as pd
covid_clean = pd.read_csv("../input/corona-virus-report/covid_19_clean_complete.csv",parse_dates=['Date'])

us_covid19_daily = pd.read_csv("../input/covid19-in-usa/us_covid19_daily.csv")
us_states_covid19_daily = pd.read_csv("../input/covid19-in-usa/us_states_covid19_daily.csv")


In [None]:
covid_clean['Active'] = covid_clean['Confirmed'] - covid_clean['Deaths'] - covid_clean['Recovered']

covid_clean['Country/Region'] = covid_clean['Country/Region'].replace('Mainland China', 'China')

covid_clean[['Province/State']] = covid_clean[['Province/State']].fillna('unknown')
covid_ship =covid_clean[covid_clean['Province/State'].str.contains('Grand Princess')|covid_clean['Province/State'].str.contains('Diamond Princess cruise ship')]


In [None]:

@lru_cache(maxsize=None)
def do_fuzzy_search(country):
    try:
        result = pycountry.countries.search_fuzzy(country)
        return result[0].alpha_2
    except:
        return np.nan

covid_clean['country_code'] = covid_clean["Country/Region"].apply(lambda country: do_fuzzy_search(country))

In [None]:
covid_clean["country_code"] = covid_clean.apply(lambda row:"TW" if row['Country/Region'] == 'Taipei and environs'   else row['country_code'],axis = 1)
covid_clean["country_code"] = covid_clean.apply(lambda row:"IR" if row['Country/Region'] == 'Iran (Islamic Republic of)' else row['country_code'],axis = 1) 
covid_clean["country_code"] = covid_clean.apply(lambda row:"PS" if row['Country/Region'] == 'occupied Palestinian territory' else row['country_code'],axis = 1)
covid_clean["country_code"] = covid_clean.apply(lambda row:"GB" if row['Country/Region'] == 'Channel Islands' else row['country_code'],axis = 1)
covid_clean["country_code"] = covid_clean.apply(lambda row:"TW" if row['Country/Region'] == 'Taiwan*'  else row['country_code'],axis = 1)
covid_clean["country_code"] = covid_clean.apply(lambda row:"KR" if row['Country/Region'] == 'Korea, South'  else row['country_code'],axis = 1)
covid_clean["country_code"] = covid_clean.apply(lambda row:"CD" if row['Country/Region'] == 'Congo (Kinshasa)'  else row['country_code'],axis = 1)
covid_clean["country_code"] = covid_clean.apply(lambda row:"CD" if row['Country/Region'] == 'Congo (Brazzaville)'   else row['country_code'],axis = 1)

In [None]:
@lru_cache(maxsize=None)
def do_continent_search(alpha2):
    try:
        result = pc.country_alpha2_to_continent_code(alpha2)
        return result
    except Exception:
        return np.nan
covid_clean['continent'] = covid_clean["country_code"].apply(lambda x: do_continent_search(x))
covid_clean["continent"] = covid_clean.apply(lambda row:"EU" if row['country_code'] == 'VA'   else row['continent'],axis = 1)



In [None]:
@lru_cache(maxsize=None)
def country(alpha2):
    try:
        result =coco.convert(names=alpha2, to='name_short',not_found = np.NaN)
        return result
    except Exception:
        return np.nan


In [None]:
covid_clean['Country/Region'] = covid_clean['country_code'].apply(lambda x:country(x))

**Clean dataset with ISO2 country names and continent names**

**Visualizations**

In [None]:
latest_data = covid_clean[covid_clean["Date"] == max(covid_clean["Date"])].reset_index()
country_latest_data = latest_data.groupby('Country/Region').sum().reset_index().sort_values(by = 'Confirmed',ascending = False).head(5)
fig = go.Figure(data=[
    go.Bar(name='Confirmed', x=country_latest_data["Country/Region"], y=country_latest_data['Confirmed'],marker_color = 'rgb(55, 83, 109)'),
    go.Bar(name='Active', x=country_latest_data["Country/Region"], y=country_latest_data['Active'],marker_color = 'lightsalmon'),
    go.Bar(name = 'Revovered',x=country_latest_data["Country/Region"],y=country_latest_data['Recovered'],marker_color = 'green' ),
    go.Bar(name = 'Deaths',x=country_latest_data["Country/Region"],y=country_latest_data['Deaths'],marker_color = 'crimson' ),
    
])
fig.update_layout(barmode='group',title_text ='Top 5 countries ')
fig.layout.template ='plotly_dark'
fig.show()

In [None]:
latest_data = covid_clean[covid_clean["Date"] == max(covid_clean["Date"])].reset_index()
country_latest_data = latest_data.groupby('Country/Region').sum().reset_index()
countries_confirmed_cases = country_latest_data[country_latest_data['Confirmed']>1000].reset_index().sort_values(by = 'Confirmed',ascending = False)
fig = go.Figure(data=[
    go.Bar(name='Confirmed', x=countries_confirmed_cases["Country/Region"], y=countries_confirmed_cases['Confirmed'],marker_color = 'rgb(55, 83, 109)'),
    go.Bar(name='Active', x=countries_confirmed_cases["Country/Region"], y=countries_confirmed_cases['Active'],marker_color = 'lightsalmon'),
    go.Bar(name = 'Revovered',x=countries_confirmed_cases["Country/Region"],y=countries_confirmed_cases['Recovered'],marker_color = 'green' ),
    go.Bar(name = 'Deaths',x=countries_confirmed_cases["Country/Region"],y=countries_confirmed_cases['Deaths'],marker_color = 'crimson' ),
    
])
fig.update_layout(barmode='group',title_text ='Countries(confirmed>1000)')
fig.layout.template ='plotly_dark'
fig.show()

In [None]:
latest_data = covid_clean[covid_clean["Date"] == max(covid_clean["Date"])].reset_index()
country_latest_data = latest_data.groupby('Country/Region').sum().reset_index()
countries_confirmed_cases = country_latest_data[(country_latest_data['Confirmed']<1000)&(country_latest_data['Confirmed']>100)].reset_index().sort_values(by = 'Confirmed',ascending = False)
fig = go.Figure(data=[
    go.Bar(name='Confirmed', x=countries_confirmed_cases["Country/Region"], y=countries_confirmed_cases['Confirmed'],marker_color = 'rgb(55, 83, 109)'),
    go.Bar(name='Active', x=countries_confirmed_cases["Country/Region"], y=countries_confirmed_cases['Active'],marker_color = 'lightsalmon'),
    go.Bar(name = 'Revovered',x=countries_confirmed_cases["Country/Region"],y=countries_confirmed_cases['Recovered'],marker_color = 'green' ),
    go.Bar(name = 'Deaths',x=countries_confirmed_cases["Country/Region"],y=countries_confirmed_cases['Deaths'],marker_color = 'crimson' ),
    
])
fig.update_layout(barmode='group',title_text ='Deaths vs Confirmed Cases(confirmed between 1000 and 100)')
fig.layout.template ='plotly_dark'
fig.show()

In [None]:
latest_data = covid_clean[covid_clean["Date"] == max(covid_clean["Date"])].reset_index()
country_latest_data = latest_data.groupby('Country/Region').sum().reset_index()
countries_confirmed_cases = country_latest_data[(country_latest_data['Confirmed']<100)&(country_latest_data['Confirmed']>10)].reset_index().sort_values(by = 'Confirmed',ascending = False)
fig = go.Figure(data=[
    go.Bar(name='Active', x=countries_confirmed_cases["Country/Region"], y=countries_confirmed_cases['Active'],marker_color = 'rgb(55, 83, 109)'),
    go.Bar(name = 'Revovered',x=countries_confirmed_cases["Country/Region"],y=countries_confirmed_cases['Recovered'],marker_color = 'green' ),
    go.Bar(name = 'Deaths',x=countries_confirmed_cases["Country/Region"],y=countries_confirmed_cases['Deaths'],marker_color = 'crimson' ),
    
])
fig.update_layout(barmode='stack',title_text ='Coountries(confirmed between 100 and 10)')
fig.layout.template ='plotly_dark'
fig.show()

In [None]:
latest_data = covid_clean[covid_clean["Date"] == max(covid_clean["Date"])].reset_index()
country_latest_data = latest_data.groupby('Country/Region').sum().reset_index()
countries_confirmed_cases = country_latest_data[(country_latest_data['Confirmed']<10)].reset_index().sort_values(by = 'Confirmed',ascending = False)
fig = go.Figure(data=[
    go.Bar(name='Active', x=countries_confirmed_cases["Country/Region"], y=countries_confirmed_cases['Active'],marker_color = 'rgb(55, 83, 109)'),
    go.Bar(name = 'Revovered',x=countries_confirmed_cases["Country/Region"],y=countries_confirmed_cases['Recovered'],marker_color = 'green' ),
    go.Bar(name = 'Deaths',x=countries_confirmed_cases["Country/Region"],y=countries_confirmed_cases['Deaths'],marker_color = 'crimson' ),
    
])
fig.update_layout(barmode='stack',title_text ='Countries(confirmed < 10)')
fig.layout.template ='plotly_dark'
fig.show()

In [None]:
temp = covid_clean.groupby(['Country/Region','Date']).sum().reset_index()
country_df =covid_clean.groupby('Country/Region').sum().reset_index()
ls =country_df[country_df['Confirmed']>1000]['Country/Region'].unique()
temp = temp[temp['Country/Region'].isin(list(ls))]
a = temp.groupby('Country/Region')
counter = 0
fig = make_subplots(rows = ls.reshape(-1,2).shape[0],cols = ls.reshape(-1,2).shape[1],subplot_titles = list(ls))

for i in  range(1,ls.reshape(-1,2).shape[0]+1):
    for j in range(1,ls.reshape(-1,2).shape[1]+1):
        b = a.get_group(ls[counter])
        fig.add_trace(go.Scatter(x=b['Date'], y=b['Confirmed'],mode='lines',name=ls[counter]), row=i, col=j)
        counter = counter+1
fig.update_layout(height=2000, width=800, title_text="Countries Cases>1000",showlegend=False)
fig.layout.template ='plotly_dark'
fig.show()


In [None]:
latest_data = covid_clean[covid_clean["Date"] == max(covid_clean["Date"])].reset_index()
continent_latest_data = latest_data.groupby('continent').sum().reset_index().sort_values(by = 'Confirmed',ascending =False)
fig = go.Figure(data=[
    go.Bar(name='Active', x=continent_latest_data["continent"], y=continent_latest_data['Active'],marker_color = 'rgb(55, 83, 109)'),
    go.Bar(name = 'Revovered',x=continent_latest_data["continent"],y=continent_latest_data['Recovered'],marker_color = 'green' ),
    go.Bar(name = 'Deaths',x=continent_latest_data["continent"],y=continent_latest_data['Deaths'],marker_color = 'crimson' ),
    
])
fig.update_layout(barmode='group',title_text ='continent')
fig.layout.template ='plotly_dark'
fig.show()

In [None]:
temp = covid_clean.groupby('Date')['Recovered', 'Deaths', 'Active'].sum().reset_index()
temp = temp.melt(id_vars="Date", value_vars=['Recovered', 'Deaths', 'Active'],
                 var_name='Case', value_name='Count')
temp.head()

fig = px.area(temp, x="Date", y="Count", color='Case')
fig.update_layout(barmode='stack',title_text ='Confirmed Cases')
fig.layout.template ='plotly_dark'
fig.show()

In [None]:
from datetime import datetime
princess = covid_ship[covid_ship['Province/State'] == 'Grand Princess'].groupby('Date').sum().reset_index()
princess = princess[princess['Date']>'2020-03-03']

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=princess['Date'], y=princess['Confirmed'],
                    mode='lines+markers',
                    name='Confirmed'))
fig.add_trace(go.Scatter(x=princess['Date'], y=princess['Recovered'],
                    mode='lines',
                    name='Recovered'))
fig.update_layout(barmode='stack',title_text ='Grand Princess Confirmed vs Recovered')
fig.layout.template ='plotly_dark'
fig.show()

In [None]:
temp = covid_clean.groupby('Date').sum().reset_index()
fig = go.Figure()
fig.add_trace(go.Scatter(x=temp['Date'], y=temp['Confirmed'],
                    mode='lines',
                    name='Confirmed'))
fig.add_trace(go.Scatter(x=temp['Date'], y=temp['Recovered'],
                    mode='lines',
                    name='Recovered'))
fig.add_trace(go.Scatter(x=temp['Date'], y=temp['Deaths'],
                    mode='lines',
                    name='Deaths'))
fig.update_layout(barmode='stack',title_text ='Covid Patterns')
fig.layout.template ='plotly_dark'
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=temp['Date'], y=temp['Active'],
                    mode='lines',
                    name='Active'))
fig.add_trace(go.Scatter(x=temp['Date'], y=temp['Recovered'],
                    mode='lines',
                    name='Recovered'))
fig.update_layout(barmode='stack',title_text ='Covid Patterns')
fig.layout.template ='plotly_dark'
fig.show()

In [None]:
fig = px.line(covid_clean,x = "Date",y = "Confirmed")
fig.update_layout(title_text ='Date vs Confirmed Cases')
fig.layout.template ='plotly_dark'
fig.show()

In [None]:
fig = px.line(covid_clean.groupby(['Date','Country/Region']).agg({'Confirmed':"sum"}).reset_index(),x = "Date",y = "Confirmed",color = "Country/Region")
fig.update_layout(title_text = 'Country-Wise Covid Distribution')
fig.layout.template ='plotly_dark'
fig.show()

In [None]:
continent_data = covid_clean.groupby('continent').sum()
continent_data[["Confirmed","Deaths","Recovered","Active"]].style.background_gradient(cmap='Reds')

In [None]:
fig = px.line(covid_clean.groupby(['Date','continent']).agg({'Confirmed':"sum"}).reset_index(),x = "Date",y = "Confirmed",color = "continent")
fig.update_layout(title_text = 'Total Continent-Wise Covid Distribution')
fig.layout.template ='plotly_dark'
fig.show()

In [None]:
continent_data['Mortality'] = 100*continent_data['Deaths']/continent_data['Confirmed']
continent_data[['Mortality']].style.background_gradient(cmap='Reds')

In [None]:
continent_data['Recovery Rate'] = 100*continent_data['Recovered']/continent_data['Confirmed']
continent_data[['Recovery Rate']].style.background_gradient(cmap='Greens')

In [None]:
temp = covid_clean.groupby('Date').sum().reset_index()
temp['Mortality'] = temp['Deaths']/temp['Confirmed']
fig = px.line(x = temp['Date'],y = temp['Mortality'],labels={'x':'Date','y':'Rate'})
fig.update_layout(title_text = 'Cummulative Mortality Rate')
fig.layout.template ='plotly_dark'
fig.show()

In [None]:
temp = covid_clean.groupby('Date').sum().reset_index()
temp['nth day'] = (temp['Date']-min(temp['Date'])).apply(lambda x:str(x).split(' ')[0]).astype(int)
a = list(temp[temp['Date'] == min(temp['Date'])]['Confirmed']) 
for i in range(1,temp.shape[0]):
     a.append(temp['Confirmed'].iloc[i]-temp['Confirmed'].iloc[i-1])
temp['Day_confirmed'] = a

b = list(temp[temp['Date'] == min(temp['Date'])]['Deaths']) 
for i in range(1,temp.shape[0]):
     b.append(temp['Deaths'].iloc[i]-temp['Deaths'].iloc[i-1])
temp['Day_deaths'] = b

c = list(temp[temp['Date'] == min(temp['Date'])]['Recovered']) 
for i in range(1,temp.shape[0]):
     c.append(temp['Recovered'].iloc[i]-temp['Recovered'].iloc[i-1])
temp['Day_Recovered'] = c
temp['Day_Mortality'] = temp['Day_deaths']/temp['Day_confirmed']
fig = px.line(x = temp['nth day'],y = temp['Day_Mortality'],labels = {'x':'days','y':'rate'})
fig.update_layout(title_text = 'Mortality Day Wise')
fig.layout.template ='plotly_dark'
fig.show()

In [None]:
fig = px.choropleth(latest_data.groupby('Country/Region').sum().reset_index(), locations="Country/Region", 
                     color="Confirmed",locationmode='country names',
                    hover_name="Country/Region", range_color=[1,7000], 
                    title='Countries with Confirmed Cases',
                   color_continuous_scale=px.colors.sequential.Plasma)
fig.update(layout_coloraxis_showscale=True)
fig.show()

In [None]:
fig = px.choropleth(latest_data.groupby('Country/Region').sum().reset_index(), locations="Country/Region", 
                    color="Deaths",locationmode='country names',
                    hover_name="Country/Region", range_color=[1,5], 
                    title='World Wide Covid Deaths',
                   color_continuous_scale=px.colors.sequential.Plasma)
fig.update(layout_coloraxis_showscale=True)
fig.show()

In [None]:
fig = px.choropleth(latest_data.groupby('Country/Region').sum().reset_index(), locations="Country/Region", 
                    color="Recovered",locationmode='country names', 
                    hover_name="Country/Region", range_color=[1,5], 
                    title='Recovered Cases World Wide ',
                   color_continuous_scale=px.colors.sequential.Plasma)
fig.update(layout_coloraxis_showscale=True)
fig.show()

In [None]:
country_latest = latest_data.groupby("Country/Region").sum().reset_index()
country_latest["Mortality_Rate"] = 100*country_latest["Deaths"]/country_latest["Confirmed"]
mortality = country_latest[country_latest["Mortality_Rate"]>0].sort_values(by = 'Mortality_Rate',ascending= False)
country_latest = country_latest.sort_values(by = 'Mortality_Rate',ascending = False)

In [None]:
fig = px.bar(x = country_latest["Country/Region"],y = country_latest["Mortality_Rate"],labels={'x':'Country','y':'Rate'})
fig.update_layout(title_text = "Courty wise Mortality Rate")
fig.layout.template ='plotly_dark'
fig.show()

In [None]:
highest100 =  country_latest[country_latest["Confirmed"] >100].sort_values(by = 'Mortality_Rate',ascending = False)

In [None]:
fig = px.bar(x = highest100["Country/Region"],y = highest100["Mortality_Rate"],labels={'x':'Country','y':'Rate'})
fig.update_layout(title_text = "Courty wise Mortality Rate when cases>100")
fig.layout.template ='plotly_dark'
fig.show()

In [None]:
ebola_clean = pd.read_csv("../input/ebola-outbreak-20142016-complete-dataset/ebola_2014_2016_clean.csv",parse_dates=['Date'])
ebola_clean = ebola_clean[['Date','Country','No. of confirmed cases','No. of confirmed deaths']]
ebola_clean.columns = ['Date','Country','Confirmed','Deaths']
ebola_clean.fillna(0,inplace = True)
ebola_clean['Confirmed'] = ebola_clean['Confirmed'].astype('int')
ebola_clean['Deaths'] = ebola_clean['Deaths'].astype('int')
ebola_clean['Country_code'] = ebola_clean["Country"].apply(lambda x: do_fuzzy_search(x))
ebola_clean['Continent'] = ebola_clean["Country_code"].apply(lambda x: do_continent_search(x))
ebola_clean['Country'] = ebola_clean["Country_code"].apply(lambda x: country(x))
ebola_country = ebola_clean[ebola_clean['Date'] == max(ebola_clean['Date'])].groupby('Country').sum().reset_index()
ebola_time = ebola_clean.groupby('Date')['Confirmed', 'Deaths'].sum().reset_index()


In [None]:

ebola_country[["Country","Confirmed","Deaths"]].style.background_gradient(cmap='Reds')

In [None]:
sars_clean = pd.read_csv('/kaggle/input/sars-outbreak-2003-complete-dataset/sars_2003_complete_dataset_clean.csv')
sars_clean.columns = ['Date','Country','Confirmed','Deaths','Recovered']
sars_clean['Country_code'] = sars_clean["Country"].apply(lambda country: do_fuzzy_search(country))
sars_clean["Country_code"] = sars_clean.apply(lambda row:"HK" if row['Country'] == 'Hong Kong SAR, China'   else row['Country_code'],axis = 1)
sars_clean["Country_code"] = sars_clean.apply(lambda row:"TW" if row['Country'] == 'Taiwan, China'   else row['Country_code'],axis = 1)
sars_clean["Country_code"] = sars_clean.apply(lambda row:"IE" if row['Country'] == 'Republic of Ireland'   else row['Country_code'],axis = 1)
sars_clean["Country_code"] = sars_clean.apply(lambda row:"MO" if row['Country'] == 'Macao SAR, China'   else row['Country_code'],axis = 1)
sars_clean['Continent'] = sars_clean["Country_code"].apply(lambda x: do_continent_search(x))
sars_clean['Country'] = sars_clean["Country_code"].apply(lambda x: country(x))
sars_country = sars_clean[sars_clean['Date'] == max(sars_clean['Date'])].groupby('Country').sum().reset_index()
sars_time = sars_clean.groupby('Date')['Confirmed', 'Deaths','Recovered'].sum().reset_index()




In [None]:
sars_country[['Country','Confirmed','Deaths']].sort_values(by='Confirmed',ascending = False).head(10).style.background_gradient(cmap = 'Reds')

In [None]:
countries_confirmed_cases[['Country/Region','Confirmed','Deaths']].sort_values(by='Confirmed',ascending = False).head(10).style.background_gradient(cmap = 'Reds')

In [None]:
temp = covid_clean.groupby('Date').sum().reset_index()
fig = go.Figure()
fig.add_trace(go.Scatter(x=sars_time.index, y=sars_time['Confirmed'],
                    mode='lines',
                    name='SARS'))
fig.add_trace(go.Scatter(x=ebola_time.index, y=ebola_time['Confirmed'],
                    mode='lines',
                    name='Ebola'))
fig.add_trace(go.Scatter(x=temp.index, y=temp['Confirmed'],
                    mode='lines',
                    name='Covid19'))

fig.update_layout(barmode='stack',title_text ='epidemic Patterns(Day wise)')
fig.layout.template ='plotly_dark'
fig.show()

In [None]:
fig = px.choropleth(sars_clean.groupby('Country').sum().reset_index(), locations="Country", 
                   color="Confirmed",locationmode='country names', 
                    hover_name="Country", range_color=[1,7000], 
                    title='Sars Epidemic',
                   color_continuous_scale=px.colors.sequential.Plasma)
fig.update(layout_coloraxis_showscale=True)
fig.show()


In [None]:
fig = px.choropleth(ebola_clean.groupby('Country').sum().reset_index(), locations="Country", 
                  color="Confirmed",locationmode='country names', 
                    hover_name="Country", range_color=[1,7000], 
                    title='Ebola Epidemic',
                   color_continuous_scale=px.colors.sequential.Plasma)
fig.update(layout_coloraxis_showscale=True)
fig.show()

In [None]:
fig = px.choropleth(latest_data.groupby('Country/Region').sum().reset_index(), locations="Country/Region", 
                    color="Confirmed",locationmode='country names', 
                    hover_name="Country/Region", range_color=[1,7000], 
                    title='Countries with Confirmed Cases',
                   color_continuous_scale=px.colors.sequential.Plasma)
fig.update(layout_coloraxis_showscale=True)
fig.show()

Loading SARS-COV-2 genome sequence along with EBOLAV,SARS-COV,Bat-SARS,Civet-SARS

**Genome Camparision of SARS-COV-1,COVID-19,MERS,BAT-SARS,Civet SARS and EBOLA**

In [None]:
from Bio import SeqIO

for seq_record in SeqIO.parse("/kaggle/input/ebolav-vs-sarscov-vs-mers/EBOLAV.fasta", "fasta"):
    EBOLAV = str(seq_record.seq)
for seq_record in SeqIO.parse("/kaggle/input/ebolav-vs-sarscov-vs-mers/MERS sequence.fasta", "fasta"):
    MERSV = str(seq_record.seq)
for seq_record in SeqIO.parse("/kaggle/input/ebolav-vs-sarscov-vs-mers/SARS-COV2.fasta", "fasta"):
    COV2 = str(seq_record.seq)
for seq_record in SeqIO.parse("/kaggle/input/ebolav-vs-sarscov-vs-mers/Civet-SARS.fasta", "fasta"):
    Civet_SARS = str(seq_record.seq)
for seq_record in SeqIO.parse("/kaggle/input/ebolav-vs-sarscov-vs-mers/SARSr-COV.fasta", "fasta"):
    SARS_Cov = str(seq_record.seq)
for seq_record in SeqIO.parse("/kaggle/input/ebolav-vs-sarscov-vs-mers/BAT-SARS.fasta", "fasta"):
    Bat_SARS = str(seq_record.seq)

In [None]:

bio_details = pd.read_csv("../input/corona-details/corona.csv")
bio_details = bio_details[['Accession','Protein','Geo_Location','Isolation_Source']]

In [None]:
weights_comparison = pd.DataFrame({'Virus':['SARS-CoV-2(COVID19)','EBOLAV','MERS-CoV','SARS-COV1(SARS)','BAT-SARS','CIVET-SARS'],
              'GC%':[GC(COV2),GC(EBOLAV),GC(MERSV),GC(SARS_Cov),GC(Bat_SARS),GC(Civet_SARS)],
              'molecular-Weight':[molecular_weight(COV2),molecular_weight(EBOLAV),molecular_weight(MERSV),molecular_weight(SARS_Cov),molecular_weight(Bat_SARS),molecular_weight(Civet_SARS)]})

In [None]:
sizes = [len(r) for r in SeqIO.parse("/kaggle/input/ebolav-vs-sarscov-vs-mers/SARS-COV2.fasta", "fasta")]
fig = px.histogram(x = sizes,nbins=20,labels={'x':"Sequence length (bp)",'y':"Count"})
fig.update_layout(title_text=("COVID-19(SARS-COV-2): %i sequences\nLengths %i to %i" \
% (len(sizes),min(sizes),max(sizes))))
fig.layout.template ='plotly_dark'
fig.show()

In [None]:
sizes = [len(r) for r in SeqIO.parse("/kaggle/input/ebolav-vs-sarscov-vs-mers/SARSr-COV.fasta", "fasta")]
fig = px.histogram(x = sizes,nbins=20,labels={'x':"Sequence length (bp)",'y':"Count"})
fig.update_layout(title_text=("SARS(SARSr-COV1): %i sequences\nLengths %i to %i" \
% (len(sizes),min(sizes),max(sizes))))
fig.layout.template ='plotly_dark'

fig.show()

In [None]:
sizes = [len(r) for r in SeqIO.parse("/kaggle/input/ebolav-vs-sarscov-vs-mers/BAT-SARS.fasta", "fasta")]
fig = px.histogram(x = sizes,nbins=20,labels={'x':"Sequence length (bp)",'y':"Count"})
fig.update_layout(title_text=("Bat-SARS: %i sequences\nLengths %i to %i" \
% (len(sizes),min(sizes),max(sizes))))
fig.layout.template ='plotly_dark'

fig.show()

In [None]:
sizes = [len(r) for r in SeqIO.parse("/kaggle/input/ebolav-vs-sarscov-vs-mers/MERS sequence.fasta", "fasta")]

fig = px.histogram(x = sizes,nbins=20,labels={'x':"Sequence length (bp)",'y':"Count"})
fig.update_layout(title_text=("MERS: %i sequences\nLengths %i to %i" \
% (len(sizes),min(sizes),max(sizes))))
fig.layout.template ='plotly_dark'

fig.show()

In [None]:
color =['crimson','lightslategray','lightslategray','lightslategray','lightslategray','lightslategray']
fig = go.Figure(data=[go.Bar(x = weights_comparison["Virus"],  y =weights_comparison['GC%'],marker_color = color)])
fig.update_layout(title_text ='Guanine Cytosine Percentage')
fig.layout.template ='plotly_dark'

fig.show()


In [None]:
color =['crimson','lightslategray','lightslategray','lightslategray','lightslategray','lightslategray']
fig = go.Figure(data=[go.Bar(x = weights_comparison["Virus"],  y =weights_comparison['molecular-Weight'],marker_color = color)])
fig.update_layout(title_text ='molecular-Mass')
fig.layout.template ='plotly_dark'

fig.show()

In [None]:
fig = px.scatter(x =weights_comparison['GC%'], y =weights_comparison['molecular-Weight'],color = weights_comparison["Virus"])
fig.update_layout(title_text = 'Virus Scatter Plot',xaxis_title="guanine-Cytosine%",yaxis_title = "Molecular Mass"
)
fig.layout.template ='plotly_dark'
fig.show()

SARS-COV-2(Corona Virus) is easily identifiable from EBOLA virus and other SARS and MERS virus because of low GC% and  High Molecular Weight. SARS-COV-1(SARS epidemic 2002) and MERS are much more alike to SARS affecting Bats and Civets atleast in this matter.This would suggest a intermediate host between these animals and Humans.

According to WHO [report](https://www.who.int/docs/default-source/coronaviruse/situation-reports/20200221-sitrep-32-covid-19.pdf?sfvrsn=4802d089_2):
>  ...it is believed that the virus jumped the species barrier to humans from another intermediate animal host. This
intermediate animal host could be a domestic food animal, a wild animal, or a domesticated wild animal which has
not yet been identified..

In [None]:
from Levenshtein import distance as levenshtein_distance,seqratio
similarity = pd.DataFrame({"Virus":["SARS-CoV(SARS)","MERS","EBOLA","Civet-SARS","Bat-SARS"],"Percentage":[seqratio(COV2,SARS_Cov),seqratio(COV2,MERSV),seqratio(COV2,EBOLAV),seqratio(COV2,Civet_SARS),seqratio(COV2,Bat_SARS)]})
similarity["Percentage"] = similarity["Percentage"]*100

In [None]:
similarity.style.background_gradient(cmap='Reds')


In [None]:
color =['crimson','lightslategray','lightslategray','crimson','crimson','crimson']
fig = go.Figure(data=[go.Bar(x = similarity["Virus"],  y =similarity['Percentage'],marker_color = color)])
fig.update_layout(title_text ='Percentage Similarity(Genome)')
fig.layout.template ='plotly_dark'
fig.show()

WE Can see SARS-COV2(Corona Virus) has very high similarity in its Genome Sequence with SARS virus infecting Bats and Civets and SARSr-COV1(SARS infection virus).It is Least similar to EBOLA.

Note:We have taken Levenshtein distance as our metric for comparison.

In [None]:
isolation_source = pd.DataFrame(bio_details['Isolation_Source'].value_counts()).reset_index()
isolation_source.columns = ['Isolation_Source','Values']
isolation_source.style.background_gradient(cmap='Greens')

In [None]:
fig = px.pie(isolation_source, values='Values', names='Isolation_Source', title='Isolation_Sources')
fig.show()

Oronasopharynx and Lungs seem to be the best Isolation Sources to test for Covid-19.

**Introduction to epidemic modelling**

SIR Model:
    SIR stands for Susceptible Infectious Recovered
![SIR model](https://tinyurl.com/ro7bfse)  

> The SIR model is one of the simplest compartmental models, and many models are derivations of this basic form. The model consists of three compartments: S for the number of susceptible, I for the number of infectious, and R for the number recovered (or immune) individuals

[More reading material](https://en.wikipedia.org/wiki/Compartmental_models_in_epidemiology) 

**SIR model without vital dynamics**
If the course of the infection is short (emergent outbreak) compared with the lifetime of an individual and the disease is non-fatal, vital dynamics (birth and death) can be ignored.Equations for such a model can be written as:

![](https://tinyurl.com/qp9rmmr)

where ![](https://institutefordiseasemodeling.github.io/Documentation/malaria/_images/math/e0355a363dcd9bc715a43db6e1d37c3f42982fd3.png) is Total population.


**SIR model with vital dynamics: **

However in a population with vital dynamics, new births can provide more susceptible individuals to the population, sustaining an epidemic or allowing new introductions to spread throughout the population. In a realistic population like this, disease dynamics will reach a steady state. This is the case when diseases are endemic to a region.


Let,
![](https://tinyurl.com/u64yapz) and ![](https://tinyurl.com/w9dz2mu) 
represent the birth and death rates, respectively, for the model. To maintain a constant population, assume that   
![](https://tinyurl.com/shsb7le)

In steady state
![](https://tinyurl.com/wtosk3m) 
The ODE then becomes:

![](http://https://tinyurl.com/w79y4y8)

![](https://tinyurl.com/w79y4y8)

**SIRS MODEL with Vital Dynamics:**
The SIR model assumes people carry lifelong immunity to a disease upon recovery; this is the case for a variety of diseases. For another class of airborne diseases, for example seasonal influenza, an individual’s immunity may wane over time. In this case, the SIRS model is used to allow recovered individuals to return to a susceptible state.

![](https://tinyurl.com/wlpxlqy)

where, ![](https://institutefordiseasemodeling.github.io/Documentation/malaria/_images/math/e0355a363dcd9bc715a43db6e1d37c3f42982fd3.png) is total population


Sources: [Institute for disease modelling.](https://institutefordiseasemodeling.github.io/Documentation/malaria/model-sir.html#id9)

Futhermore, there are other models like SEIR,SEIT,SEIRS,etc which do not describe SARS-COV-2 epidemic properly as they assume a exposed parameter along with Infected. According to WHO, COVID may be transmitted even without symptoms which rules out any practical distinction between exposure and Infection. 

Also,applying Time-series forecasting models like Prophet,etc would be pretty useless because they assume growth of epidemic with time to be constant and/or Linear. 

Considering the high rate of growth of Covid epidemic,it is preferable to use a SIRS model without Vital Dynamics as at a current rate of growth birth rate and death rate of a population do not play a great role.
According to Wikipedia:
> **The dynamics of an epidemic, for example the flu, are often much faster than the dynamics of birth and death, therefore, birth and death are often omitted in simple compartmental models. **




**Model of our choice:**

**SIRS Model without Vital Dynamics** :

Why SIRS not SIR?:
[Cases](https://globalnews.ca/news/6623287/coronavirus-multiple-infections/) of reinfection of COVID-19 have come to news highlighting that survivors may or may not develop immunity.

Model Equations:

![](https://tinyurl.com/vj8jmj8) 



[](http://)You can check out a great Kernel on SIR modeling already done [here](https://www.kaggle.com/lisphilar/covid-19-data-with-sir-model/)

**China Covid Analysis**

In [None]:
China_data = covid_clean[covid_clean["country_code"] == 'CN'].groupby('Date').sum().reset_index()
min(China_data['Date'])

We are considering Chinese Population for 2020-01-22 as 1437731640 [source](http://https://www.worldometers.info/world-population/china-population/)

In [None]:
model_df =pd.DataFrame()
model_df['S'] = 1437731640 - China_data['Confirmed']
model_df['I'] = China_data['Confirmed']
model_df['R'] = China_data['Recovered']
model_df['T'] = (China_data['Date']-min(China_data['Date'])).apply(lambda x:str(x).split(' ')[0]).astype(int)
for v in model_df.columns:
    model_df[f"d{v}/dT"] = model_df[v].diff() / model_df["T"].diff()

In [None]:
plt.figure(figsize = (16,16))
sns.heatmap(model_df.head().corr(),annot = True,linewidths=0.1,cmap = 'icefire')
plt.plot()

There is perfectly negative correlation between I and S.
There is high correlation between I and R which means as cases of Infection increase,the Recovered infection increase.
dS/dT(rate of change of Susceptible Cases) is negative correlated with dI/dT which means as rate of susceptible cases decreases,rate of infected cases will increase.

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(y=model_df['dS/dT'], x=model_df['T'],
                    mode='lines',
                    name='dS/dT'))
fig.add_trace(go.Scatter(y=model_df['dI/dT'], x=model_df['T'],
                    mode='lines',
                    name='dI/dT'))
fig.add_trace(go.Scatter(y=model_df['dR/dT'], x=model_df['T'],
                    mode='lines',
                    name='dR/dT'))

fig.update_layout(barmode='stack',title_text ='Differential Rates of Change ')
fig.layout.template ='plotly_dark'

fig.show()

In [None]:
#covid_clean.groupby(['Date','Country/Region']).sum()
china_model = covid_clean[covid_clean['country_code'] == 'CN'].groupby('Date').sum().reset_index()
china_model['nth Day'] = (china_model['Date'] - min(china_model['Date'])).apply(lambda x:str(x).split(' ')[0]).astype(int)
fig = px.line(x = china_model['Date'],y = china_model['Deaths'])
fig.update_layout(title_text ='China:Confirmed Cases Cummulative')
fig.layout.template ='plotly_dark'

fig.show()

In [None]:
a = list(china_model[china_model['Date'] == min(china_model['Date'])]['Confirmed']) 
for i in range(1,china_model.shape[0]):
     a.append(china_model['Confirmed'].iloc[i]-china_model['Confirmed'].iloc[i-1])
china_model['Day_confirmed'] = a

b = list(china_model[china_model['Date'] == min(china_model['Date'])]['Deaths']) 
for i in range(1,china_model.shape[0]):
     b.append(china_model['Deaths'].iloc[i]-china_model['Deaths'].iloc[i-1])
china_model['Day_deaths'] = b

c = list(china_model[china_model['Date'] == min(china_model['Date'])]['Recovered']) 
for i in range(1,china_model.shape[0]):
     c.append(china_model['Recovered'].iloc[i]-china_model['Recovered'].iloc[i-1])
china_model['Day_Recovered'] = c

In [None]:


fig = go.Figure()
fig.add_trace(go.Scatter(x=china_model['nth Day'], y=china_model['Day_confirmed'],
                    mode='lines',
                    name='Day confirmed'))
fig.add_trace(go.Scatter(x=china_model['nth Day'], y=china_model['Day_deaths'],
                    mode='lines',
                    name='Day deaths'))
fig.add_trace(go.Scatter(x=china_model['nth Day'], y=china_model['Day_Recovered'],
                    mode='lines',
                    name='Day Recovered'))

fig.update_layout(barmode='stack',title_text ='China cases:day wise')

fig.layout.template ='plotly_dark'

fig.show()
