In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
df_test = pd.read_csv('/kaggle/input/covid19-in-india/StatewiseTestingDetails.csv')
df_test.columns

In [3]:
df_test['State'].unique()

In [4]:
df_vaccine = pd.read_csv('/kaggle/input/covid19-in-india/covid_vaccine_statewise.csv')
df_vaccine.columns

In [5]:
df_vaccine['State'].unique()

In [6]:
df_covid = pd.read_csv('/kaggle/input/covid19-in-india/covid_19_india.csv')
df_covid.columns

In [7]:
df_covid['State/UnionTerritory'].unique()

In [8]:
# here we can see in state "India" is written so that means rows with India contains whole data of India vaccination drive according to certain dates
India = df_vaccine[df_vaccine['State']=='India']
sub_India =India.loc[:, ['Updated On','Total Individuals Vaccinated']]
#
sub_India

In [9]:
sub_India.rename(columns = {'Updated On':'Date'}, inplace = True)

In [10]:
sub_India.head()

In [11]:
sub_India.tail(10)

In [12]:
sub_India.isna().sum()

In [13]:
sub_India.shape

In [14]:
sub_India.dropna(inplace=True)

In [15]:
sub_India.shape

In [16]:
sub_India.columns

In [17]:
sub_India.set_index('Date',inplace=True)

In [18]:
sub_India['Total Individuals Vaccinated'].tail()

In [19]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaled = scaler.fit_transform(sub_India)


In [20]:
train_size = int(len(scaled) * 0.80)
test_size = len(scaled) - train_size
train, test = scaled[0:train_size,:], scaled[train_size:len(scaled),:]
print(len(train), len(test))

In [21]:
def create_dataset(scaled, look_back=1):
    dataX, dataY = [], []
    for i in range(len(scaled)-look_back-1):
        a = scaled[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(scaled[i + look_back, 0])
    return np.array(dataX), np.array(dataY)

In [22]:

look_back = 4
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)

In [23]:
trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

In [24]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten

In [25]:
model = Sequential()
model.add(LSTM(4, input_shape=(1, look_back)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=100, batch_size=1, verbose=2)

In [26]:
import math
from sklearn.metrics import mean_squared_error
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)
# invert predictions
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testY])
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))

In [27]:
from sklearn.metrics import r2_score
R_square = r2_score(trainY[0], trainPredict[:,0])
print(R_square)

In [28]:
from sklearn.metrics import mean_absolute_error
mean_absolute_error(trainY[0], trainPredict[:,0])

In [29]:
def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true))

mean_absolute_percentage_error(trainY[0], trainPredict[:,0])

In [33]:
# shift train predictions for plotting
import matplotlib.pyplot as plt
trainPredictPlot = np.empty_like(scaled)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict
# shift test predictions for plotting
testPredictPlot = np.empty_like(scaled)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(scaled)-1, :] = testPredict
# plot baseline and predictions
plt.plot(scaler.inverse_transform(scaled),color='red')
plt.plot(trainPredictPlot,color="blue")
plt.plot(testPredictPlot,color="green")
plt.show()