# Mandatory Task 2a

In [1]:
import pandas as pd
import numpy as np

In [2]:
states_df_out = pd.read_csv("States Data/22_cleaned.csv").drop(['Unnamed: 0'],axis=1)
states_df_out.shape

(376, 5)

In [3]:
states_df_2a = states_df_out[((states_df_out['Date'] >= '2020-08-01') & (states_df_out['Date'] <= '2020-08-31'))]
states_df_2a.shape

(31, 5)

In [4]:
train, test = states_df_2a[0:-7],states_df_2a[-7:]

## AR with p = 3 and 5 

In [5]:
def getMSEandMAPE(actual, prediction):
    mse = 0
    mape = 0
    for x in range(len(actual)):
        mse += np.square(prediction[x] - actual[x])
        mape += abs(actual[x] - prediction[x])/actual[x]
    mse = mse/len(actual)
    mape = (mape/len(actual))*100
    return mse, mape 

In [6]:
def ar_train(data,p,curLen):
    X = []
    Y = []
    for i in range(curLen):
        if i+p < curLen :
            X.append([1] + list(data[i:i+p]))
            Y.append(data[i+p])            
        else:
            break
    return X, Y

def getBetaValues(X,Y):
    beta=np.matmul(np.linalg.inv(np.matmul(np.transpose(X),X)),np.matmul(np.transpose(X),Y))
    return beta

def ar_pred(train, test, p):
    data = np.hstack([train, test])
    trainLen = data.shape[0] - test.shape[0] 
    predictions = np.zeros(test.shape[0])
    for i in range(trainLen,data.shape[0]):
        dat = np.hstack([[1], data[i-p:i]])
        X,Y = ar_train(data, p, i)
        beta = getBetaValues(X,Y)
        predictions[i-trainLen] = np.matmul(dat,beta)

    return predictions

In [7]:
cols = ['TN confirmed', 'TX confirmed', 'TN deaths', 'TX deaths']
for col in cols:
    
    actual = np.array(test[col])
    prediction = ar_pred(np.array(train[col]),np.array(test[col]), 3)
    
    df_ar1 = pd.DataFrame()
    df_ar1['Actual'] = actual
    df_ar1['Predicted'] = prediction
    print('\033[1m',col,'\033[0m \n')
    print(df_ar1, '\n')
    
    mse, mape = getMSEandMAPE(actual,prediction)
    print('MSE : ', mse, ' MAPE : ', mape, '\n\n')

[1m TN confirmed [0m 

   Actual    Predicted
0   813.0  1682.735460
1  1936.0  1419.627093
2  1826.0  1718.582469
3  1636.0  1634.727556
4  1465.0  1579.627186
5   835.0  1571.120433
6  1818.0  1475.304503 

MSE :  243867.6831167538  MAPE :  36.34911235351231 


[1m TX confirmed [0m 

   Actual    Predicted
0  6397.0  4514.706349
1  5445.0  6367.400933
2  5694.0  5893.878577
3  4150.0  6077.947372
4  4733.0  5312.722264
5  3761.0  5401.093170
6  2550.0  4753.796890 

MSE :  2290498.517660823  MAPE :  34.08738360474345 


[1m TN deaths [0m 

   Actual  Predicted
0    40.0  27.932749
1    20.0  29.877670
2    25.0  19.024509
3    28.0  23.921775
4    24.0  24.715985
5    22.0  23.070699
6     7.0  23.523434 

MSE :  81.45831841988749  MAPE :  51.70323903348406 


[1m TX deaths [0m 

   Actual   Predicted
0   181.0  138.099671
1   229.0  292.103796
2   265.0  257.573662
3   196.0  233.817839
4   154.0  171.750801
5    90.0  169.063178
6    26.0  157.386058 

MSE :  4448.034264031

In [8]:
from statsmodels.tsa.ar_model import AutoReg

ar_model = AutoReg(train['TN confirmed'], lags=3).fit()

pred = ar_model.predict(start=len(train), end=(len(states_df_2a)-1), dynamic=False)

pred



24    1682.735460
25    1515.386344
26    1753.608881
27    1607.893389
28    1647.917497
29    1604.908293
30    1627.022311
dtype: float64

## EMWA with alpha = 0.5 and 0.8

In [9]:
def EWMA(data, alpha):

    y_pred = []
    y_pred.append(data[0])
    for i in range(1,len(data)):
        y_pred.append(alpha * data[i-1] + (1 - alpha) * y_pred[i-1])

    y_actual = data
    #Computing MSE and MAPE for test set
    MSE, MAPE = getMSEandMAPE(y_pred[-7:], y_actual[-7:])
    return y_pred,MSE,MAPE

In [10]:
cols = ['TN confirmed', 'TX confirmed', 'TN deaths', 'TX deaths']
for col in cols:

    y_predicted_point5, MSE_predicted_point5, MAPE_predicted_point5 = EWMA((states_df_2a[col]).tolist(),0.5)
    y_predicted_point8,MSE_predicted_point8, MAPE_predicted_point8  = EWMA((states_df_2a[col]).tolist(),0.8)

    df_ar1 = pd.DataFrame()
    df_ar1['Actual'] = states_df_2a[col][-7:]
    df_ar1['Predicted_EWMA(0.5)'] = y_predicted_point5[-7:]
    df_ar1['Predicted_EWMA(0.8)'] = y_predicted_point8[-7:]
    print('\033[1m',col,'\033[0m \n')
    print(df_ar1, '\n\n')
    
    df_ar2 = pd.DataFrame()
    df_ar2['Alpha'] = [0.5,0.8]
    df_ar2['MSE'] = MSE_predicted_point5, MSE_predicted_point8
    df_ar2['MAPE'] = MAPE_predicted_point5, MAPE_predicted_point8
    print(df_ar2, '\n\n')

[1m TN confirmed [0m 

     Actual  Predicted_EWMA(0.5)  Predicted_EWMA(0.8)
214   813.0          1149.772742           882.915525
215  1936.0           981.386371           826.983105
216  1826.0          1458.693185          1714.196621
217  1636.0          1642.346593          1803.639324
218  1465.0          1639.173296          1669.527865
219   835.0          1552.086648          1505.905573
220  1818.0          1193.543324           969.181115 


   Alpha            MSE       MAPE
0    0.5  299164.767727  37.325142
1    0.8  355407.004003  43.174667 


[1m TX confirmed [0m 

     Actual  Predicted_EWMA(0.5)  Predicted_EWMA(0.8)
214  6397.0          3918.991362          3185.316248
215  5445.0          5157.995681          5754.663250
216  5694.0          5301.497840          5506.932650
217  4150.0          5497.748920          5656.586530
218  4733.0          4823.874460          4451.317306
219  3761.0          4778.437230          4676.663461
220  2550.0          4269.718