# Covid-19 Second wave 

#### <span style="font-family: Arial;font-size:1.2em;color:#3366ff">Objective:  forecast covid-19 confirmed cases and analysis of vaccination progress in each state

# Index

* <a href="#1.1.packages">1.1.packages
* <a href="#1.2.EDA">1.2.EDA
* <a href="#1.3.Time-series-Stationary-check">1.3.Time series Stationary check
* <a href="#1.4.ACF-and-PACF-plots">1.4.ACF and PACF plots
* <a href="#1.5.Train-Test-split">1.5.Train Test split
* <a href="#1.6.-SARIMA">1.6. SARIMA
* <a href="#1.7.SARIMAX">1.7.SARIMAX
* <a href="#1.8.Forecast">1.8.Forecast
* <a href="#Reference">Reference

#  1.1.packages

In [None]:
# packages
import numpy as np 
import pandas as pd 
# visual
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
# tsa
import statsmodels.api as sm
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
# evaluation
from sklearn import metrics
# html
from IPython.core.display import HTML

# plotly offline
from plotly.offline import download_plotlyjs,init_notebook_mode
init_notebook_mode(connected=True)

# warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
# data from covid19india.org
df = pd.read_csv('../input/covid19-second-wave/case_time_series (10).csv',parse_dates=True,index_col=0)
df = df.drop('Date_YMD',axis=1)

# data from ourworldindata.org
dfo = pd.read_csv('../input/covid19-our-world-in-data/owid-covid-data (6).csv',parse_dates=True)
dfo.set_index('date',inplace=True)


df_vc = pd.read_csv('../input/covid19-in-india/covid_vaccine_statewise.csv')

# 1.2.EDA

## <span style="font-family: Arial;font-size:1.2em;color:#3366ff">i. daily confirmed cases

In [None]:
# time series
fig = px.line(df['Daily Confirmed'],title='Daily Confirmed cases in India',template='none')

fig.show()

In [None]:
## plot data
ind =dfo[dfo['location'] =='India']['new_cases_smoothed']
us = dfo[dfo['location'] =='United States']['new_cases_smoothed']
brz = dfo[dfo['location'] =='Brazil']['new_cases_smoothed']
uk = dfo[dfo['location'] =='United Kingdom']['new_cases_smoothed']


time_new_case = pd.DataFrame({'India':ind,'United states':us,'Brazil':brz,'United Kingdom':uk})


## <span style="font-family: Arial;font-size:1.2em;color:#3366ff"> ii. where India stands...

In [None]:
# plot
fig = px.line(time_new_case,template='none',title='Confirmed cases in different nations')

fig.show()


In [None]:
plot_sun_df = dfo.groupby(['continent','iso_code','location'])['total_cases'].max().reset_index()


fig = px.sunburst(plot_sun_df, path=['continent','location'], values='total_cases',
                  title= 'Confirmed cases around the world',
                  height=620,template='none')

fig.show()

* <span style="font-family: Arial;font-size:1.2em;color:#3366ff"> United states ,Brazil and India has most of the confirmed cases 

In [None]:
# total deaths across continent

cont_plot = dfo.groupby('continent')['total_deaths'].max().reset_index().sort_values('total_deaths',ascending=False)

fig =px.bar(cont_plot,x='continent',y='total_deaths',template='none',title='Total deaths across continent')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.show()

In [None]:
country_plot = dfo.groupby('location')['total_deaths'].max().reset_index().sort_values('total_deaths',ascending=False)
country_plot=country_plot.reset_index()
country_plot.drop(country_plot.index[[0,1,2,3,4,5,11]],inplace=True)

fig =px.bar(country_plot.head(20),x='location',y='total_deaths',template='none',title='Total deaths across countries')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.show()

* <span style="font-family: Arial;font-size:1.2em;color:#3366ff"> Most of the human losses happened in the top 5 countries United States, Brazil, India, Mexico and the United Kingdom

In [None]:
ind =dfo[dfo['location'] =='India']['total_deaths']
us = dfo[dfo['location'] =='United States']['total_deaths']
brz = dfo[dfo['location'] =='Brazil']['total_deaths']
uk = dfo[dfo['location'] =='United Kingdom']['total_deaths']


time_new_death = pd.DataFrame({'India':ind,'United states':us,'Brazil':brz,'United Kingdom':uk})

fig = px.line(time_new_death,template='none',title='Daily deaths in different nations')

fig.show()

## <span style="font-family: Arial;font-size:1.2em;color:#3366ff">iii. Vaccinaion_progress
    
* <span style="font-family: Arial;font-size:1.1em;color:#3366ff">how many percentage of people are vaccinated across Indian states

In [None]:
%%HTML
<div class='tableauPlaceholder' id='viz1621341619591' style='position: relative'><noscript><a href='#'><img alt='Dashboard 1 ' src='https:&#47;&#47;public.tableau.com&#47;static&#47;images&#47;To&#47;Totalvaccinatedacrossstates&#47;Dashboard1&#47;1_rss.png' style='border: none' /></a></noscript><object class='tableauViz'  style='display:none;'><param name='host_url' value='https%3A%2F%2Fpublic.tableau.com%2F' /> <param name='embed_code_version' value='3' /> <param name='site_root' value='' /><param name='name' value='Totalvaccinatedacrossstates&#47;Dashboard1' /><param name='tabs' value='no' /><param name='toolbar' value='yes' /><param name='static_image' value='https:&#47;&#47;public.tableau.com&#47;static&#47;images&#47;To&#47;Totalvaccinatedacrossstates&#47;Dashboard1&#47;1.png' /> <param name='animate_transition' value='yes' /><param name='display_static_image' value='yes' /><param name='display_spinner' value='yes' /><param name='display_overlay' value='yes' /><param name='display_count' value='yes' /><param name='language' value='en' /><param name='filter' value='publish=yes' /></object></div>                <script type='text/javascript'>                    var divElement = document.getElementById('viz1621341619591');                    var vizElement = divElement.getElementsByTagName('object')[0];                    if ( divElement.offsetWidth > 800 ) { vizElement.style.width='100%';vizElement.style.height=(divElement.offsetWidth*0.75)+'px';} else if ( divElement.offsetWidth > 500 ) { vizElement.style.width='100%';vizElement.style.height=(divElement.offsetWidth*0.75)+'px';} else { vizElement.style.width='100%';vizElement.style.height='727px';}                     var scriptElement = document.createElement('script');                    scriptElement.src = 'https://public.tableau.com/javascripts/api/viz_v1.js';                    vizElement.parentNode.insertBefore(scriptElement, vizElement);                </script>

* <span style="font-family: Arial;font-size:1.2em;color:#3366ff"> In the above map, we can observe that red colour shows the lower percentage of the population are vaccinated and green shows the higher percentage of population vaccinated

* <span style="font-family: Arial;font-size:1.2em;color:#3366ff"> In the above map we can see that below 10% of population vaccinated state are Tamilnadu, Madhya Pradesh, Uttar Pradesh,Bihar, Jharkhand, West Bengal, Assam, Meghalaya, Nagaland and Manipur
    
* <span style="font-family: Arial;font-size:1.2em;color:#3366ff"> we can also see that above 25% vaccinated states like Goa, Tripura, Sikkim, Himachal Pradesh and Ladakh

In [None]:
%%HTML
<div class='tableauPlaceholder' id='viz1620392585071' style='position: relative'><noscript><a href='#'><img alt='Total covid-19 active cases  and deaths across states ' src='https:&#47;&#47;public.tableau.com&#47;static&#47;images&#47;To&#47;Totalcovid-19activecases&#47;Totaldeathsinacrossstates&#47;1_rss.png' style='border: none' /></a></noscript><object class='tableauViz'  style='display:none;'><param name='host_url' value='https%3A%2F%2Fpublic.tableau.com%2F' /> <param name='embed_code_version' value='3' /> <param name='site_root' value='' /><param name='name' value='Totalcovid-19activecases&#47;Totaldeathsinacrossstates' /><param name='tabs' value='no' /><param name='toolbar' value='yes' /><param name='static_image' value='https:&#47;&#47;public.tableau.com&#47;static&#47;images&#47;To&#47;Totalcovid-19activecases&#47;Totaldeathsinacrossstates&#47;1.png' /> <param name='animate_transition' value='yes' /><param name='display_static_image' value='yes' /><param name='display_spinner' value='yes' /><param name='display_overlay' value='yes' /><param name='display_count' value='yes' /><param name='language' value='en' /></object></div>                <script type='text/javascript'>                    var divElement = document.getElementById('viz1620392585071');                    var vizElement = divElement.getElementsByTagName('object')[0];                    vizElement.style.width='100%';vizElement.style.height=(divElement.offsetWidth*0.75)+'px';                    var scriptElement = document.createElement('script');                    scriptElement.src = 'https://public.tableau.com/javascripts/api/viz_v1.js';                    vizElement.parentNode.insertBefore(scriptElement, vizElement);                </script>

In [None]:
%%HTML
<div class='tableauPlaceholder' id='viz1621500069349' style='position: relative'><noscript><a href='#'><img alt=' ' src='https:&#47;&#47;public.tableau.com&#47;static&#47;images&#47;va&#47;vaccination_progress_India2&#47;Dashboard1&#47;1_rss.png' style='border: none' /></a></noscript><object class='tableauViz'  style='display:none;'><param name='host_url' value='https%3A%2F%2Fpublic.tableau.com%2F' /> <param name='embed_code_version' value='3' /> <param name='site_root' value='' /><param name='name' value='vaccination_progress_India2&#47;Dashboard1' /><param name='tabs' value='yes' /><param name='toolbar' value='yes' /><param name='static_image' value='https:&#47;&#47;public.tableau.com&#47;static&#47;images&#47;va&#47;vaccination_progress_India2&#47;Dashboard1&#47;1.png' /> <param name='animate_transition' value='yes' /><param name='display_static_image' value='yes' /><param name='display_spinner' value='yes' /><param name='display_overlay' value='yes' /><param name='display_count' value='yes' /><param name='language' value='en' /><param name='filter' value='publish=yes' /></object></div>                <script type='text/javascript'>                    var divElement = document.getElementById('viz1621500069349');                    var vizElement = divElement.getElementsByTagName('object')[0];                    if ( divElement.offsetWidth > 800 ) { vizElement.style.width='100%';vizElement.style.height=(divElement.offsetWidth*0.75)+'px';} else if ( divElement.offsetWidth > 500 ) { vizElement.style.width='100%';vizElement.style.height=(divElement.offsetWidth*0.75)+'px';} else { vizElement.style.width='100%';vizElement.style.minHeight='800px';vizElement.style.maxHeight=(divElement.offsetWidth*1.77)+'px';}                     var scriptElement = document.createElement('script');                    scriptElement.src = 'https://public.tableau.com/javascripts/api/viz_v1.js';                    vizElement.parentNode.insertBefore(scriptElement, vizElement);                </script>

In [None]:
vc_pro =df_vc.groupby('State')['Total Individuals Vaccinated'].max().reset_index().sort_values(['Total Individuals Vaccinated'],ascending=False)
vc_pro = vc_pro[vc_pro['State'] != 'India']

px.bar(vc_pro, y='State',x='Total Individuals Vaccinated',color='Total Individuals Vaccinated',height=550,title='Total no. of Individual vaccinated across state')

# <span style="font-family: Arial;font-size:1.2em;color:#3366ff">

# 1.3.Time series Stationary check

* time series must be stationary and we can check this Statistically

### H0: The Time Series is non-stationary
### Ha: The Time Series is stationary

In [None]:
adfuller_test =adfuller(df['Daily Confirmed'])
print('adfuller test P-value:', adfuller_test[1])

#### so the Daily confirmed time series is stationary we can continue

# lets decompose the Time series

* Trend
* Seasonality
* error or white noise

In [None]:
plt.rcParams.update({'figure.figsize':(7,4), 'figure.dpi':90})

plt.style.use('seaborn');
decompose = seasonal_decompose(df['Daily Confirmed'],model='additive',period=14);
decompose.plot();

# 1.4.ACF and PACF plots
* auto-correlation function and partial auto-correlation function


In [None]:
# ACF

plot_acf(df['Daily Confirmed'].diff().dropna(),lags=50);

In [None]:
# pacf

plot_pacf(df['Daily Confirmed'].diff().dropna(),lags=50);

## ACF and Pacf 

### acf and pacf values 

* P =1
* d =0
* q =1
* Seasonal = 7

# 1.5.Train Test split

In [None]:
# test size
test_size = 30

In [None]:
train_size= len(df) - test_size

In [None]:
train = df[['Daily Confirmed']].iloc[:train_size]
test = df[['Daily Confirmed']].iloc[train_size:]

In [None]:
# train and test

print(train.shape)
print(test.shape)

In [None]:
# exons variables
exons=df[['Daily Recovered','Daily Deceased']]

full_data = df['Daily Confirmed']

# 1.6. SARIMA 

In [None]:

SARIMA_1017_model = sm.tsa.statespace.SARIMAX(train.values,
                                            order=(1,0,1),
                                            seasonal_order=(0,0,0,7),
                                         enforce_stationarity=False,
                                         enforce_invertibility=False)
SARIMA_1017_model = SARIMA_1017_model.fit(maxiter=1000)

In [None]:
SARIMA_1017_model.summary()

In [None]:
plt.style.use('seaborn')
SARIMA_1017_model.plot_diagnostics();

In [None]:
# forecast
forecast_SARIMA =SARIMA_1017_model.get_forecast(steps=test_size)
pred_SARIMA_1017_date = forecast_SARIMA.summary_frame(alpha=0.05).set_index(pd.date_range(start=test.index[0], periods=test_size, freq='D'))

# Model evaluation RMSE

In [None]:
RSME_SARIMA_7 = np.sqrt(metrics.mean_squared_error(test,pred_SARIMA_1017_date['mean']))


RMSE = pd.DataFrame({'Model':['RSME_SARIMA_010_7'],'RSME':[RSME_SARIMA_7]})
RMSE

# 1.7.SARIMAX 

In [None]:

SARIMAX__model = sm.tsa.statespace.SARIMAX(full_data.values,exons=exons,
                                            order=(1,0,1),
                                            seasonal_order=(1,0,1,7),
                                         enforce_stationarity=False,
                                         enforce_invertibility=False,)
SARIMAX__model = SARIMAX__model.fit(maxiter=1000)

SARIMAX__model.summary()

In [None]:
plt.style.use('seaborn')
SARIMAX__model.plot_diagnostics();

In [None]:
days=10
prediction = SARIMAX__model.get_forecast(steps=days)
pred_date = prediction.summary_frame(alpha=0.05).set_index(pd.date_range(start='2021-05-17',periods=days,freq='D'))


# 1.8.Forecast

In [None]:

plt.style.use('seaborn-whitegrid')

ax =df['Daily Confirmed'].plot(figsize=(15,6));
pred_date['mean'].plot(ax=ax, label='forecast',alpha=.50);
ax.fill_between(pred_date.index,pred_date['mean_ci_lower'], pred_date['mean_ci_upper'], color='y', alpha=.20)
plt.title('forecast for next 10 days in India');
ax.set_ylabel('Confirmed cases');
plt.legend(loc='best');

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    name="Actual",
     x=df.index, y=df["Daily Confirmed"]))

fig.add_trace(go.Scatter(
    name="prediction",mode="lines",
     x=pred_date.index, y=pred_date['mean']))

fig.add_trace(go.Scatter(
    name="lowerbound",mode="lines",
        line=dict(width=0),fillcolor='rgba(68, 68, 68, 0.3)',
        fill='tonexty',showlegend=False,
     x=pred_date.index, y=pred_date['mean_ci_lower']))

fig.add_trace(go.Scatter(name="upperbound",mode="lines",
        line=dict(width=0),fillcolor='rgba(68, 68, 68, 0.3)',fill='tonexty',showlegend=False,
                         x=pred_date.index, y=pred_date['mean_ci_upper']))

# Reference



* https://online.stat.psu.edu/stat501/lesson/14/14.1
* https://www.kaggle.com/prashant111/arima-model-for-time-series-forecasting 
* https://www.youtube.com/watch?v=FPM6it4v8MY  Time series concepts
* https://people.duke.edu/~rnau/411home.htm
* https://api.covid19india.org/ # for Dataset
* https://ourworldindata.org/covid-vaccinations # for Dataset

# feel free to post your suggestions

# Thank you