In [None]:
# import the necessary libraries
import numpy as np 
import pandas as pd 
import os
from datetime import datetime, timedelta

# Visualisation libraries
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()
import pycountry
import plotly.express as px
from plotly.offline import init_notebook_mode, iplot 
import plotly.graph_objs as go
import plotly.offline as py
from plotly.offline import download_plotlyjs,init_notebook_mode,plot,iplot

#!pip install pywaffle
#from pywaffle import Waffle

from ipywidgets import widgets
from IPython.display import display
!jupyter nbextension enable --py --sys-prefix widgetsnbextension

py.init_notebook_mode(connected=True)
import folium 
from folium import plugins
plt.style.use("fivethirtyeight")# for pretty graphs

# Increase the default plot size and set the color scheme
plt.rcParams['figure.figsize'] = 8, 5
#plt.rcParams['image.cmap'] = 'viridis'

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Disable warnings 
import warnings
warnings.filterwarnings('ignore')


df_master = pd.read_csv('../input/covid19-in-india/covid_19_india.csv')
df_master.drop(['Sno'],axis=1,inplace=True)
df_master.index = range(1,df_master.shape[0]+1)
df = df_master.copy()

df['Total Confirmed cases'] = df['ConfirmedIndianNational'] + df['ConfirmedForeignNational']
df['Total Active cases'] = df['ConfirmedIndianNational'] + df['ConfirmedForeignNational'] - df['Cured'] - df['Deaths']
df.rename(columns={"State/UnionTerritory": "States", "ConfirmedIndianNational": "Confirmed cases (Indian Nationals)"},inplace=True)
df.rename(columns={"ConfirmedForeignNational": "Confirmed cases (Foreign Nationals)", "Cured": "Cured/Discharged/Migrated"},inplace=True)
df = df[df.States != 'Chattisgarh']
df = df[df.States != 'Pondicherry']
df = df[df.States != 'Union Territory of Jammu and Kashmir']
df = df[df.States != 'Union Territory of Chandigarh']
df = df[df.States != 'Union Territory of Ladakh']

df.index = range(1,df.shape[0]+1)
df_states = df.copy()
df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
df.sort_values(by='Date', inplace=True)

#doubling_rate = 6.2
#Mortality_rate = 0.01
#days_to_death = 17.3

DR_slider = widgets.FloatSlider(
    value=6.2,
    min=2.0,
    max=8.0,
    step=0.1,
    description='Doubling Rate:',
    readout=True,
    readout_format='.1f',
    continuous_update=True
)
doubling_rate = DR_slider.value


M_slider = widgets.FloatSlider(
    value=1.0,
    min=0.5,
    max=10.0,
    step=0.1,
    description='Mortality Rate %:',
    readout=True,
    readout_format='.1f',
    continuous_update=True
)
Mortality_rate = M_slider.value/100


Death_slider = widgets.FloatSlider(
    value=17.3,
    min=10.0,
    max=60.0,
    step=0.5,
    description='Days to Death:',
    readout=True,
    readout_format='.1f',
    continuous_update=True
)
days_to_death = Death_slider.value

# India may have well over 10000 cases and the government knows it

With the very successful "Janata Curfew"("People's Curfew"), different parts of India officially/unofficially continue to remain under semi lockdown. Strict measures are being taken by the Indian government to slow down the spread of the virus. This hints that the government knows that the situation is worse than depicted and they are trying everything to keep it under control.

This article tries to explain why there might be well over 8000 cases (10-20x the reported cases as of 22nd March 2020) using tried and tested models and graphs.

The models used in this article is borrowed from Tomas Pueyo's [medium blog](https://medium.com/@tomaspueyo/coronavirus-act-today-or-people-will-die-f4d3d9cd99ca). I wrote a python code to incorporate this model for Indian scenario analysis.


![](https://www.cdc.gov/coronavirus/2019-ncov/images/2019-coronavirus.png)




# Abstract

The first COVID-19 case in India was reported on on 30th January 2020, a student who arrived in Kerala state from Wuhan, China followed by 2 more cases in Kerala. For almost a month, no new cases was reported in India, however, on 8th March 2020, five new cases of COVID-19 in Kerala were again reported and since then the cases have been rising affecting 22 states and resulting in 4 deaths across the country as of 21st March 2020.

On 13th March 2020, India reported its first coronavirus fatality in the state of Karnataka, followed by 3 more deaths in other states. On 21st March 2020, confirmed COVID-19 cases has risen to **300s** with the state of Maharashtra bagging the maximum number of cases. As the number of confirmed cases rise in India, the question is do these numbers represent the **true** number of cases in India. 

### **Section 1**: Estimation of **realistic** number of cases in India

### **Section 2**: Visual analysis True vs Confirmed cases & Takeaway

### **Section 3**: Visual analysis of the spread in India (Detailed)


**Credits**:
*This article is inspired by the wonderful analysis done by Tomas Pueyo in his [medium blog](https://medium.com/@tomaspueyo/coronavirus-act-today-or-people-will-die-f4d3d9cd99ca).
Data is obtained from reputable sources. Refer to the data section for more information.
Data visualization was inspired by Parul Pandey's [Kaggle page](https://www.kaggle.com/parulpandey/tracking-india-s-coronavirus-spread-wip)*

# Section 1: Estimation Algorithm


## Section 1.1: Variables

Three statistically determined variables were used to obtain the estimation curve:

doubling_rate = 6.2 days    
Mortality_rate = 0.01 or 1 %    
days_to_death = 17.3 days    

* **Doubling rate** is the time it takes for the number of cases to double. For Coronavirus, it is found to be around 6.2 days on average. For now, the same rate is assumed for the sake of analysis.

* **Mortality rate** is assumed to be around 1%. 
    REASON:
        Countries that are well prepared see a fatality rate of ~0.5% (South Korea) to ~0.9% (rest of China). Countries that are overwhelmed by the number of cases will have a fatality rate between ~3%-5%

     In other words: Countries that act fast can reduce the number of deaths by a **factor of ten**. And that’s just counting the fatality rate.
 
        Acting fast includes - massive testing, measures taken to enforce Lockdown/Social Distancing/Quarantine to reduce the rate of spread (Flatten the curve)

* **Days to death** is again statistically determined to be around 17.3 days. For now, the same rate is assumed for the sake of analysis.


## Section 1.2: Understand the Model with example
 
The number of deaths in a region can be used to guess the number of realistic current cases. As already mentioned above, the number of days from infection to death is found to be around 17.3. That means the person who died on 13th March 2020 in Karnataka probably got infected around 25th February 2020.

Then, we know the mortality rate = 1%. That means that, around 25th February 2020, there were already around ~100 cases in Karnataka (of which only one ended up in death 17.3 days later).

Now, use the average doubling time of 6.2 days. This means that, in the 17 days it took this person to die, the cases had to multiply by ~8 (=2^(17/6)). This means that, if you are not diagnosing all cases, **1 death today means 800 realistic cases today**.

This model has various assumptions/limitations, which will be discussed later. 

## Section 1.3: Estimation of True cases over time

Expand to check out the code below to understand how the curve was plotted.

In [None]:
death_df = df.copy()
death_df_filtered = death_df[death_df.Deaths != 0]

### Finding states with deaths
states = death_df_filtered.States.unique().tolist()
#print("{}\n".format(states))

####Number of days since Jan 30 2020
i = df.iloc[[0]].Date.tolist()[0]
j = death_df.iloc[-1].Date
size = int((j-i).days)+1
#print(size)


actual_total = np.zeros(size, dtype = int) ## Stores total realistic deaths
actual_total_temp = np.zeros(size, dtype = int) ## Stores deaths of individual states and dates


### Loop over each state (The onces with deaths)
count=0
for state in states:
    temp = death_df_filtered[death_df_filtered.States == state]
    #for death in temp
    #deathi = temp.iloc[[0]].Deaths.tolist()[0]
    a = temp.drop_duplicates(subset='Deaths', keep="first").Deaths.tolist()
    b = np.diff(a)
    deaths = np.concatenate(([a[0]], b), axis=0)
    #print(deaths)
    
    
    ### Loop over each day for each state
    for i in range(0, len(deaths)):
        ### Go back 17.3 days
        start = temp.iloc[[i]].Date - timedelta(days=int(days_to_death))
        start = int((death_df.iloc[-1].Date - start.tolist()[0]).days)
        #print(start)
        
        ### Calculating the realistic cases for each day and stored in array for each state for each date
        actual_total_temp[size - start-2] = deaths[i]/Mortality_rate
        for i in range(size - start + int(doubling_rate) - 2, size, int(doubling_rate)):
            actual_total_temp[i] = actual_total_temp[i-int(doubling_rate)]*2
        #print("{}\n".format(actual_total_temp))
        
        ### Smoothening the curve for each array formed above
        for i in range(size - start - 2, size-1, int(doubling_rate)):
            smoother = int(actual_total_temp[i]/doubling_rate)
            for j in range(i+1, i+int(doubling_rate)):
                actual_total_temp[j] = actual_total_temp[j-1] + smoother
                if j == size-1:
                    i = size-1
                    break
                    
        
        #print("{}\n".format(actual_total_temp))
        ### Adding each state for each day to actual_total (Array)
        actual_total = np.add(actual_total,actual_total_temp)
        actual_total_temp = actual_total_temp*0

#### Finally, actual_total is plotted next

#display(DR_slider, M_slider, Death_slider)

# Section 2: Visual analysis

## Section 2.1: Realistic cases vs Confirmed cases

In [None]:
df.sort_values(by=['States', 'Total Confirmed cases'], ascending = [True, False],inplace=True)
df.drop_duplicates(subset='States', keep="first",inplace=True)
df.sort_values(by='Total Confirmed cases', ascending = False,inplace=True)
df.index = range(1,df.shape[0]+1)

print(f'Total number of Confirmed COVID 2019 cases across India:', df['Total Confirmed cases'].sum())
print('Estimation of realistic number of cases across India: {} - {} (Around {}-{}x Confirmed cases)'.format(int(actual_total[-1]/2), actual_total[-1], 
                                                                                                       int(actual_total[-1]/2/df['Total Confirmed cases'].sum()), 
                                                                                                       int(actual_total[-1]/df['Total Confirmed cases'].sum())))
print('\n')
print(f'Total number of Active COVID 2019 cases across India:', df['Total Active cases'].sum())
print(f'Total number of Cured/Discharged/Migrated COVID 2019 cases across India:', df['Cured/Discharged/Migrated'].sum())
print(f'Total number of Deaths due to COVID 2019  across India:', df['Deaths'].sum())
print(f'Total number of States/UTs affected:', df['States'].count())


dbd_India = pd.read_excel('../input/coronavirus-cases-in-india/per_day_cases.xlsx',sheet_name='India')
fig = go.Figure()
fig.add_trace(go.Scatter(x=dbd_India['Date'], y=dbd_India['Total Cases'],
                    mode='lines+markers',name='Total Cases'))

fig.add_trace(go.Scatter(x=dbd_India['Date'], y=dbd_India['Recovered'], 
                mode='lines',name='Recovered'))
fig.add_trace(go.Scatter(x=dbd_India['Date'], y=dbd_India['Active'], 
                mode='lines',name='Active'))
fig.add_trace(go.Scatter(x=dbd_India['Date'], y=dbd_India['Deaths'], 
                mode='lines',name='Deaths'))
fig.add_trace(go.Scatter(x=dbd_India['Date'], y=actual_total, 
                mode='lines+markers',name='Estimate of real Total cases'))
    
fig.update_layout(title_text='Trend of Coronavirus Cases in India(Confirmed vs Realistic cases)',plot_bgcolor='rgb(250, 242, 242)')

fig.show()

fig = go.Figure()
fig.add_trace(go.Scatter(x=dbd_India['Date'], y=dbd_India['Deaths'], 
                mode='lines',name='Deaths'))
fig.update_layout(title_text='Deaths',plot_bgcolor='rgb(250, 242, 242)')

fig.show()

import plotly.express as px
fig = go.Figure(data=[
    go.Bar(name = "Confirmed cases", x=dbd_India.Date.tolist(), y=dbd_India['New Cases'].tolist()),
    go.Bar(name = "Realistic", x=dbd_India.Date.tolist(), y=np.diff(actual_total))
])
fig.update_layout(barmode='group')
fig.update_layout(title_text='New Coronavirus Cases in India per day',plot_bgcolor='rgb(250, 242, 242)')

fig.show()

## Section 2.2: What to takeaway from this analysis

### Indeterminable variables of the model 

* The model used isnt perfect, it just gives an idea of how many cases are truly present. There are a lot of variables to consider. For instance, age distribution in each country will also have an impact: Since mortality is much higher for older people, regions with aging population like in Europe will be harder hit on average than younger countries like India. There are more factors viz. weather - especially humidity and temperature, environmental, food and lifestyle habits. But it’s still unclear how this will impact transmission and fatality rates.

* One more thing to consider is the number of tests per million people and number of hospital beds per 1000 people. India ranks among the lowest in both. Many cases and deaths may go unreported especially in rural areas due to lack of testing and self treatment phenomenon that exists in India

**With the help of the model and the above considerations we can guess the number of true cases to be anywhere between 3000 - 6000+ as of March 21st.** 

### Best solution: Social distancing, Flattening the curve

Around **20%** of cases require hospitalization, **5%** of cases require the Intensive Care Unit (ICU), and around **2.5%** require very intensive help, with items such as ventilators or ECMO (extra-corporeal oxygenation).

The model predicts the number of cases only when Lockdown/Social Distancing/Quarantine is not enforced. Luckily our government has taken steps to implement various Lockdown measures viz. banning international flights and social distancing. If these measures are enforced and followed strictly, the number of cases may plateau at around 8000-10000+ and doesnt overburden our healthcare system. 

China took some drastic steps. They locked down 15 cities apart from Wuhan and the result can be seen in the image below

![](https://miro.medium.com/max/7790/1*z0g3NaygxsgD50x_DJUIBA.png)







# Section 3: Distribution of Cases in India

## 3.1 Confirmed total cases, deaths, active cases, recovered across India

### 3.1.1 Plot

In [None]:
#
fig = go.Figure()
fig.add_trace(go.Scatter(x=dbd_India['Date'], y=dbd_India['Total Cases'],
                    mode='lines+markers',name='Total Cases'))

fig.add_trace(go.Scatter(x=dbd_India['Date'], y=dbd_India['Recovered'], 
                mode='lines',name='Recovered'))
fig.add_trace(go.Scatter(x=dbd_India['Date'], y=dbd_India['Active'], 
                mode='lines',name='Active'))
fig.add_trace(go.Scatter(x=dbd_India['Date'], y=dbd_India['Deaths'], 
                mode='lines',name='Deaths'))

    
fig.update_layout(plot_bgcolor='rgb(250, 242, 242)')

fig.show()

### 3.1.2 Graphical visualization of confirmed Active cases in India

In [None]:
# create map and display it
India_coord = pd.read_csv('../input/coronavirus-cases-in-india/Indian Coordinates.csv')
India_coord.rename(columns={"Name of State / UT": "States"},inplace=True)
df_full = pd.merge(India_coord,df,on='States')
map = folium.Map(location=[20, 80], zoom_start=4.5,tiles='cartodbdark_matter')

for lat, lon, value, name in zip(df_full['Latitude'], df_full['Longitude'], df_full['Total Active cases'], df_full['States']):
    folium.CircleMarker([lat, lon],
                        radius=value*0.7,
                        popup = ('<strong>State</strong>: ' + str(name).capitalize() + '<br>'
                                '<strong>Active Cases</strong>: ' + str(value) + '<br>'),
                        color='red',
                        
                        fill_color='red',
                        fill_opacity=0.3 ).add_to(map)
map

## 3.2 State/Union Territories wise numbers

In [None]:
df_states.rename(columns={"Date": "Last updated"},inplace=True)
df_states.sort_values(by=['States', 'Total Confirmed cases'], ascending = [True, False],inplace=True)
df_states.drop_duplicates(subset='States', keep="first",inplace=True)
df_states.sort_values(by='Total Confirmed cases', ascending = False,inplace=True)
df_states.index = range(1,df_states.shape[0]+1)
#https://www.kaggle.com/nxrprime/styling-data-frames-covid-19-vs-conferences
def highlight_max(s):
    is_max = s == s.max()
    return ['background-color: pink' if v else '' for v in is_max]



#df.style.apply(highlight_max,subset=['Total Confirmed cases (Indian National)', 'Total Confirmed cases ( Foreign National )'])
df_states.style.apply(highlight_max,subset=['Deaths','Total Confirmed cases','Total Active cases'])

In [None]:
f, ax = plt.subplots(figsize=(12, 8))
India_coord = pd.read_csv('../input/coronavirus-cases-in-india/Indian Coordinates.csv')
India_coord.rename(columns={"Name of State / UT": "States"},inplace=True)
df_full = pd.merge(India_coord,df,on='States')
data = df_full[['States','Total Confirmed cases','Cured/Discharged/Migrated','Deaths']]
data.sort_values('Total Confirmed cases',ascending=False,inplace=True)
sns.set_color_codes("pastel")
sns.barplot(x="Total Confirmed cases", y="States", data=data,
            label="Total", color="r")

sns.set_color_codes("muted")
sns.barplot(x="Cured/Discharged/Migrated", y="States", data=data,
            label="Recovered", color="g")


# Add a legend and informative axis label
ax.legend(ncol=2, loc="lower right", frameon=True)
ax.set(xlim=(0, df_states['Total Confirmed cases'].tolist()[0]), ylabel="States",
       xlabel="Cases")
sns.despine(left=True, bottom=True)

## 3.3 National vs Foreign cases

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

colors = ['#1f77b4', '#17becf']

indian = df['Confirmed cases (Indian Nationals)'].sum()
foreign = df['Confirmed cases (Foreign Nationals)'].sum()
fig = go.Figure(data=[go.Pie(labels=['Indian Nationals','Foreign Nationals'],
                             values= [indian,foreign],hole =.35)])
                          

fig.update_traces(hoverinfo='label+percent', textinfo='value', textfont_size=20,
                  marker=dict(colors=colors, line=dict(color='#FFFFF2', width=2)))
fig.show()

fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=("National Cases","Foreign Cases"))

temp = df.sort_values('Confirmed cases (Indian Nationals)', ascending=False).sort_values('Confirmed cases (Indian Nationals)', ascending=False)

fig.add_trace(go.Bar( y=temp['Confirmed cases (Indian Nationals)'], x=temp["States"],  
                     marker=dict(color=temp['Confirmed cases (Indian Nationals)'], coloraxis="coloraxis")),
              1, 1)
                     
temp1 = df.sort_values('Confirmed cases (Foreign Nationals)', ascending=False).sort_values('Confirmed cases (Foreign Nationals)', ascending=False)

fig.add_trace(go.Bar( y=temp1['Confirmed cases (Foreign Nationals)'], x=temp1["States"],  
                     marker=dict(color=temp1['Confirmed cases (Foreign Nationals)'], coloraxis="coloraxis")),
              1, 2)                     
                     

fig.update_layout(coloraxis=dict(colorscale='rdbu'), showlegend=False,title_text="National vs Foreign Cases",plot_bgcolor='rgb(250, 242, 242)')

fig.show()



# Furthur read/watch

In [None]:
%%HTML
<center><iframe width="560" height="315" src="https://www.youtube.com/embed/mCa0JXEwDEk" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe></center>