In [70]:
# importing required libraries in python 
import pandas as pd
import numpy as np
from datetime import datetime
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import plotly.graph_objects as go

In [71]:
# Reading the data from csv file and parsing the date to making it as index

df_base=pd.read_csv('data/processed/Deaths_processed.csv',
                       parse_dates=[0])  

df_base.sort_values('date',ascending=True).tail()

Unnamed: 0,date,India,US,Russia,Turkey,Brazil
550,2021-07-25,420967,610681,151352,50934,549924
551,2021-07-26,421382,610952,152069,50997,550502
552,2021-07-27,422022,611414,152836,51048,551835
553,2021-07-28,422662,611801,153620,51124,553179
554,2021-07-29,423217,612122,154404,51184,554497


In [72]:
# function to create a graph
def my_graph(x_in,df_in,y='log',slider=False):
    """          
        Parameters:
        ----------
        x_in : array 
            array of date time object, or array of numbers
        df_input : pandas dataframe 
            the plotting matrix where each column is plotted
            the name of the column will be used for the legend
        scale: str
            y-axis scale as 'log' or 'linear'
        slider: bool
            True or False for x-axis slider
    
        
        Returns:
        ----------
        
    """
    fig = go.Figure()

    for each in df_in.columns:
        fig.add_trace(go.Scatter(
                        x=x_in,
                        y=df_in[each],
                        name=each,
                        opacity=0.8))       
        
    
    fig.update_layout(autosize=True,title="India's Covid-19 Deaths forecast",
            font=dict(
            size=18,
            color="#7f7f7f"
            )
        )
    fig.update_yaxes(type=y),
    fig.update_xaxes(
                 tickfont=dict(size=14,color="#7f7f7f")
                )
    if slider==True:
        fig.update_layout(xaxis_rangeslider_visible=True)
    fig.show()

In [73]:
from sklearn import linear_model
reg = linear_model.LinearRegression(fit_intercept=False)

In [74]:
#train set
no_days=len(df_base['India'])
X=np.arange(no_days-8).reshape(-1, 1) # -8 : to match the number of records with y
y=np.array(df_base['India'][8:]) # [8:] removing initial 0 cases to handle devide by zero for log

In [75]:
reg.fit(X,y) #simple regression 

LinearRegression(fit_intercept=False)

In [76]:
# test set n Forecasting
X_hat=np.arange(no_days).reshape(-1, 1)
Y_hat=reg.predict(X_hat)

In [77]:
data_LRF=df_base[['date','India']].copy()

In [78]:
data_LRF['Forecast']=Y_hat

In [79]:
data_LRF.head()

Unnamed: 0,date,India,Forecast
0,2020-01-22,0,0.0
1,2020-01-23,0,520.80705
2,2020-01-24,0,1041.614101
3,2020-01-25,0,1562.421151
4,2020-01-26,0,2083.228201


In [80]:
my_graph(data_LRF.date,
           data_LRF.iloc[:,1:],
           y='linear',
           slider=True)

In [81]:
#train set-- converting cases into logarithmic values
no_days=len(df_base['India'])
X1=np.arange(no_days-51).reshape(-1, 1) # -8 : to match the number of records with y
y1=np.log(np.array(df_base['India'][51:])) # [8:] removing initial 0 cases to handle devide by zero for log

In [82]:
reg.fit(X1,y1)

LinearRegression(fit_intercept=False)

In [83]:
# test set n Forecasting
X_hat1=np.arange(no_days).reshape(-1, 1)
Y_hat1=reg.predict(X_hat)

In [84]:
data_LRF1=df_base[['date','India']].copy()

In [85]:
data_LRF1['Forecast']=np.exp(Y_hat1)

In [86]:
data_LRF1.tail()

Unnamed: 0,date,India,Forecast
550,2021-07-25,420967,290546900.0
551,2021-07-26,421382,301025900.0
552,2021-07-27,422022,311882900.0
553,2021-07-28,422662,323131500.0
554,2021-07-29,423217,334785700.0


In [87]:
my_graph(data_LRF1.date,
           data_LRF1.iloc[:,1:],
           y='log',
           slider=True)