In [None]:
import pandas as pd
import numpy as np
from pyearth import Earth
from matplotlib import pyplot as plt

dir = "C:\\Users\\George\\source\\repos\\COMP5530M-Group-Project-Inflation-Forecasting\\Data\\Train\\trains1990s.csv"

train = pd.read_csv(dir)

trainNoExogenous = train[['observation_date', 'fred_PCEPI']]

display(trainNoExogenous.head())

In [None]:
from sklearn.model_selection import train_test_split

#time series, so do not shuffle
train, val = train_test_split(trainNoExogenous, train_size=0.7, shuffle=False)

display(train.shape, val.shape, train.head(), val.head())

In [None]:
#consider encodings of dates at a later time
trainX = train.index.values
trainY = train['fred_PCEPI']

#want to be able to predict future values, so allow missing
model = Earth(allow_linear=False, allow_missing=True)
model.fit(trainX,trainY)

In [None]:
#predict training data
y_hat = model.predict(train.index)

In [None]:
display(train.head(), trainY.head())

In [None]:
print(model.trace())

In [None]:
print(model.summary())
#print(model.get_params())

In [None]:
def hinge(x):
    return max(0, x)

def lastRegressor(x0, val):
    return 0.167134*(val + hinge(x0-247))

In [None]:
display(trainX[0], trainY[0])
display(lastRegressor(trainX[0], trainX[-1]))
display(y_hat[-1])

In [None]:
def makeActualVsPredictedGraph(xvals : pd.Series, y : pd.Series, y_hat : np.ndarray):
    plt.figure(figsize=(20, 10))
    ax = plt.axes()
    ax.xaxis.set_major_locator(plt.MaxNLocator(10))
    plt.locator_params(axis='x', nbins=10)
    plt.plot(xvals, y)
    plt.plot(xvals, y_hat)
    plt.xticks(rotation=90)
    plt.xlabel('Date')
    plt.ylabel('PCEPI')
    plt.title(f'{xvals.iloc[0]} - {xvals.iloc[-1]} Actual vs. Predicted PCEPI')
    plt.show()

In [None]:
makeActualVsPredictedGraph(train['observation_date'], trainY, y_hat)

In [None]:
valX = val.index.values
valY = val['fred_PCEPI']

y_hat_val = model.predict(valX, valY)

In [None]:
makeActualVsPredictedGraph(val['observation_date'], valY, y_hat_val)

In [None]:
makeActualVsPredictedGraph(trainNoExogenous['observation_date'], trainNoExogenous['fred_PCEPI'], np.concatenate((y_hat, y_hat_val)))

In [None]:
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error

trainRMSE = np.sqrt(mean_squared_error(trainY, y_hat))
valRMSE = np.sqrt(mean_squared_error(valY, y_hat_val))
trainR2 = r2_score(trainY, y_hat)
valR2 = r2_score(valY, y_hat_val)
RMSEDf = pd.DataFrame(np.concatenate((np.array(['MARS']), np.array([trainRMSE, valRMSE, trainR2, valR2]))).reshape(1, -1), columns=['Model', 'Training RMSE', 'Validation RMSE', 'Training R2', 'Validation R2'])

In [None]:
RMSEDf.to_csv('MARS_PCEPI_eval.csv')