In [None]:
import pandas as pd
from sklearn import linear_model

# Define relative file paths for training and testing data
train_file = "..\\Data\\Train\\trains1990s.csv"

# Load and format training data (only using PCEPI, )
train_df = pd.read_csv(train_file, parse_dates=['observation_date'], date_format="%m/%y%").iloc[:,:2]
train_df['group'] = 0
train_df['time_idx'] = train_df.index

In [None]:
numVals = train_df.shape[0]

valRatio = 0.2
splitLoc = int(numVals*(1-valRatio))

train = train_df.iloc[:splitLoc, :]
val = train_df.iloc[splitLoc:, :]

In [None]:
from pytorch_forecasting import TimeSeriesDataSet

trainDataset = TimeSeriesDataSet(
    train,
    group_ids=['group'],
    target='fred_PCEPI',
    time_idx='time_idx',
    min_encoder_length=5,
    max_encoder_length=5,
    min_prediction_length=2,
    max_prediction_length=2,
    time_varying_unknown_reals=['fred_PCEPI'],
)

In [None]:
from sklearn import linear_model

regr = linear_model.LinearRegression()
display(train.index.values.reshape(-1, 1).shape)
#linear model on just PCEPI
regr.fit(train.index.values.reshape(-1, 1), train['fred_PCEPI'])

In [None]:
valDates = val.index.values.reshape(-1, 1)
y_hat = regr.predict(valDates)

display(y_hat)

In [None]:
from matplotlib import pyplot as plt

plt.plot(valDates, y_hat)
plt.plot(valDates, val['fred_PCEPI'])
plt.show()

In [None]:
trainDates = train.index.values.reshape(-1, 1)

y_train_hat = regr.predict(trainDates)

plt.plot(trainDates, y_train_hat)
plt.plot(trainDates, train['fred_PCEPI'])
plt.show()

In [None]:
import numpy as np
y_hats = np.concatenate((y_train_hat, y_hat))
display(y_hats.shape)

In [None]:
plt.figure(figsize=(20, 10))
ax = plt.axes()
ax.xaxis.set_major_locator(plt.MaxNLocator(10))
plt.locator_params(axis='x', nbins=10)
plt.plot(train_df['observation_date'], y_hats)
plt.plot(train_df['observation_date'], train_df['fred_PCEPI'])
ax.fill_between(train_df['observation_date'][splitLoc:], train_df['fred_PCEPI'][0], y_hats[-1], facecolor='green', step='pre', alpha=0.5)
plt.title('Linear Regressor Predictions on PCEPI')
plt.xlabel('Date')
plt.ylabel('PCEPI')
plt.show()

In [None]:
from sklearn.metrics import root_mean_squared_error

linearValError = root_mean_squared_error(val['fred_PCEPI'], y_hat)
linearTrainError = root_mean_squared_error(train['fred_PCEPI'], y_train_hat)
#display(linearValError, linearTrainError)

In [None]:
print(f'Validation RMSE: {linearValError}')