In [515]:
import pandas as pd  
import numpy as np  
import matplotlib.pyplot as plt  
import seaborn as seabornInstance 
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from datetime import datetime
from datetime import timedelta
from pandas.io.json import json_normalize 

In [516]:
def get_dataset(url, state, y_column, recent_days_count):
    raw_dataset = pd.read_json(url)
    df = pd.json_normalize(data = raw_dataset['data'], record_path ='regional', meta =['day'])
    df1 = df[df['loc']==state].rename(columns={'day': 'date', 'loc': 'state', 'totalConfirmed': 'cases'})
    df1 = df1.append({'date' : '2020-04-08', 'state': 'Tamil Nadu', 'cases': 738, 'deaths': 8} , ignore_index=True) # add today's data manually since it is not present in source
    dataset_all = df1[['date', 'state', 'cases', 'deaths']]
    dataset_all.insert(4, 'day', range(1, 1 + len(df1)))
    dataset = dataset_all.tail(recent_days_count)
    return dataset

In [517]:
def get_model(dataset, y_column):
    X = dataset['day'].values.reshape(-1,1)
    y = dataset[y_column].values.reshape(-1,1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
    regressor = LinearRegression()
    regressor.fit(X_train, y_train) #training the algorithm    
    y_pred = regressor.predict(X_test)
    print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))  
    print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))  
    print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
#     plot_training_data(X_train, y_train, regressor)
#     plot_test_data(X_test, y_test, y_pred)
    return regressor


In [518]:
def plot_training_data(X_train, y_train, regressor):
    y_train_pred = regressor.predict(X_train)
    plt.scatter(X_train, y_train,  color='gray')
    plt.plot(X_train, y_train_pred, color='red', linewidth=2)
    plt.show()
    return

In [519]:
def plot_test_data(X_test, y_test, y_pred):
    plt.scatter(X_test, y_test,  color='gray')
    plt.plot(X_test, y_pred, color='red', linewidth=2)
    plt.show()
    return

In [520]:
def get_next_days(dataset, prediction_duration):
    next_days = []
    next_days.append(dataset.tail(1)['day'].values[0] + 1)
    for i in range(1, prediction_duration):
        next_days.append(next_days[-1] + 1)
    return next_days   


In [521]:
def get_next_dates(dataset, prediction_duration):
    next_dates = []
    next_dates.append(datetime.strftime(datetime.strptime(dataset.tail(1)['date'].values[0] , '%Y-%m-%d') + timedelta(days=1), '%Y-%m-%d'))
    for i in range(1, prediction_duration):
        next_dates.append(datetime.strftime(datetime.strptime(next_dates[-1], '%Y-%m-%d') + timedelta(days=1), '%Y-%m-%d'))
    return next_dates

In [522]:
def get_next_day_predictions(next_days, regressor):
    next_day_predictions = []
    for i in range(0, len(next_days)):
        next_day_predictions.append(int(regressor.predict([[next_days[i]]])[0,0]))
    return next_day_predictions


In [523]:
def perform_predictions(url, state, y_column, recent_days_count, prediction_duration):
    dataset = get_dataset(url, state, y_column, recent_days_count)
    regressor = get_model(dataset, y_column)
    next_days = get_next_days(dataset, prediction_duration)
    next_dates = get_next_dates(dataset, prediction_duration)
    next_day_predictions = get_next_day_predictions(next_days, regressor)
    return next_dates, next_day_predictions


In [524]:
def result_dataframe(state, next_dates, confirmed_cases_predictions, death_cases_predictions):
    d = {
        'State': state, 
        'Date': next_dates, 
        'Confirmed_Cases_Predications': confirmed_cases_predictions,
        'Death_Cases_Predications': death_cases_predictions
    }

    df = pd.DataFrame(d)
    df.style.set_properties(**{'text-align': 'left'})
    return df

In [525]:
## Prediction Model for India - Tamil Nadu
url="https://api.rootnet.in/covid19-in/stats/history"
state='Tamil Nadu'

confirmed_recent_days=5
death_recent_days_count=6
prediction_duration=10

confirmed_cases_next_dates, confirmed_cases_predictions = perform_predictions(
    url, state, "cases", confirmed_recent_days, prediction_duration)

death_cases_next_dates, death_cases_predictions = perform_predictions(
    url, state, "deaths", death_recent_days_count, prediction_duration)

result_dataframe(state, confirmed_cases_next_dates, confirmed_cases_predictions, death_cases_predictions)

Mean Absolute Error: 0.0
Mean Squared Error: 0.0
Root Mean Squared Error: 0.0
Mean Absolute Error: 0.45000000000000284
Mean Squared Error: 0.29250000000000087
Root Mean Squared Error: 0.5408326913195992


Unnamed: 0,State,Date,Confirmed_Cases_Predications,Death_Cases_Predications
0,Tamil Nadu,2020-04-09,808,9
1,Tamil Nadu,2020-04-10,871,10
2,Tamil Nadu,2020-04-11,933,11
3,Tamil Nadu,2020-04-12,996,12
4,Tamil Nadu,2020-04-13,1058,13
5,Tamil Nadu,2020-04-14,1121,15
6,Tamil Nadu,2020-04-15,1183,16
7,Tamil Nadu,2020-04-16,1246,17
8,Tamil Nadu,2020-04-17,1308,18
9,Tamil Nadu,2020-04-18,1371,19
