In [19]:
# importing required libraries in python 
import pandas as pd
import numpy as np
from datetime import datetime
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import plotly.graph_objects as go

In [20]:
# Reading the data from csv file and parsing the date to making it as index

df_base=pd.read_csv('data/processed/Confirmed_processed.csv',
                       parse_dates=[0])  

df_base.sort_values('date',ascending=True).tail()

Unnamed: 0,date,India,US,Russia,Turkey,Brazil
550,2021-07-25,31411262,34444083,6049215,5601608,19688663
551,2021-07-26,31440951,34533179,6071893,5618417,19707662
552,2021-07-27,31484605,34603919,6094379,5638178,19749073
553,2021-07-28,31528114,34672690,6116249,5660469,19797086
554,2021-07-29,31572344,34750860,6138969,5682630,19839369


In [40]:
my_graph(df_base.date,df_base.iloc[:,1:],"Confirmed cases visualisation -India,US,Russia,Turkey and Brazil", y='linear',slider=True)

In [21]:
# function to create a graph
def my_graph(x_in,df_in,title_in,y='log',slider=False):
    """          
        Parameters:
        ----------
        x_in : array 
            array of date time object, or array of numbers
        df_input : pandas dataframe 
            the plotting matrix where each column is plotted
            the name of the column will be used for the legend
        scale: str
            y-axis scale as 'log' or 'linear'
        slider: bool
            True or False for x-axis slider
    
        
        Returns:
        ----------
        
    """
    fig = go.Figure()

    for each in df_in.columns:
        fig.add_trace(go.Scatter(
                        x=x_in,
                        y=df_in[each],
                        name=each,
                        opacity=0.8))       
        
    
    fig.update_layout(autosize=True,title=title_in,
            font=dict(
            size=18,
            color="#7f7f7f"
            )
        )
    fig.update_yaxes(type=y),
    fig.update_xaxes(
                 tickfont=dict(size=14,color="#7f7f7f")
                )
    if slider==True:
        fig.update_layout(xaxis_rangeslider_visible=True)
    fig.show()

In [22]:
from sklearn import linear_model
reg = linear_model.LinearRegression(fit_intercept=False)

In [23]:
#train set
no_days=len(df_base['India'])
X=np.arange(no_days-8).reshape(-1, 1) # -8 : to match the number of records with y
y=np.array(df_base['India'][8:]) # [8:] removing initial 0 cases to handle devide by zero for log

In [24]:
reg.fit(X,y) #simple regression 

LinearRegression(fit_intercept=False)

In [25]:
# test set n Forecasting
X_hat=np.arange(no_days).reshape(-1, 1)
Y_hat=reg.predict(X_hat)

In [26]:
data_LRF=df_base[['date','India']].copy()

In [27]:
data_LRF['Forecast']=Y_hat

In [28]:
data_LRF.head()

Unnamed: 0,date,India,Forecast
0,2020-01-22,0,0.0
1,2020-01-23,0,39635.483442
2,2020-01-24,0,79270.966883
3,2020-01-25,0,118906.450325
4,2020-01-26,0,158541.933767


In [29]:
my_graph(data_LRF.date,
           data_LRF.iloc[:,1:],"India's Covid-19 Confirmed cases forecast",
           y='linear',
           slider=True)

In [30]:
#train set-- converting cases into logarithmic values
no_days=len(df_base['India'])
X1=np.arange(no_days-8).reshape(-1, 1) # -8 : to match the number of records with y
y1=np.log(np.array(df_base['India'][8:])) # [8:] removing initial 0 cases to handle devide by zero for log

In [31]:
reg.fit(X1,y1)

LinearRegression(fit_intercept=False)

In [32]:
# test set n Forecasting
X_hat1=np.arange(no_days).reshape(-1, 1)
Y_hat1=reg.predict(X_hat)

In [33]:
data_LRF1=df_base[['date','India']].copy()

In [34]:
data_LRF1['Forecast']=np.exp(Y_hat1)

In [35]:
data_LRF1.tail()

Unnamed: 0,date,India,Forecast
550,2021-07-25,31411262,24569880000.0
551,2021-07-26,31440951,25662240000.0
552,2021-07-27,31484605,26803180000.0
553,2021-07-28,31528114,27994830000.0
554,2021-07-29,31572344,29239470000.0


In [38]:
my_graph(data_LRF1.date,
           data_LRF1.iloc[:,1:],"India's Covid-19 Confirmed cases forecast",
           y='log',
           slider=True)