In [1]:
# KPMG Forecasting Project
# feature regularization
# Author: Kushal Wijesundara
# Date: June 8, 2021

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
from sklearn.linear_model import LassoCV, RidgeCV
from sklearn.metrics import mean_squared_error
df = pd.read_csv('data.csv')
df['date'] = df[['year','quarter']].apply(lambda x : '{}q{}'.format(x[0],x[1]), axis=1)
df.drop(['year','quarter'], inplace=True, axis=1, errors='ignore')
df['date'] = pd.to_datetime(df['date'], infer_datetime_format=True)
df = df.set_index('date')

In [3]:
split = 0.3
y = df.dropna().dv
x = df.dropna().drop(['dv'], axis=1)
i = int(len(x)*(1-split))
x_train = x.iloc[:i]
y_train = y.iloc[:i]
x_test = x.iloc[i:]
y_test = y.iloc[i:]
tscv = TimeSeriesSplit(3)

In [4]:
ridge = RidgeCV(alphas=(0.004, 10),cv=tscv)
results = ridge.fit(x_train, y_train)
print("Best model-Ridge:\nalpha = {} ".format(ridge.alpha_))
ridge_coef = pd.DataFrame()
ridge_coef["Features"]= x_train.columns
ridge_coef['Coef Estimate'] = pd.Series(ridge.coef_)
y_pred = ridge.predict(x_test)
mse = mean_squared_error(y_test, y_pred)
print(f'MSE = {mse}')
print(ridge_coef)

Best model-Ridge:
alpha = 0.004 
MSE = 0.05662948148935373
   Features  Coef Estimate
0  bus_conf       2.734312
1  con_cred      -0.959195
2       cpi       5.795022
3   con_exp       0.702647
4       ppi       0.119571
5       vix      -0.202275
6       inv       2.012037
7       ppe      -2.229474


In [5]:
lasso = LassoCV(alphas=(0.00001, 0.00021),cv=tscv)
results = lasso.fit(x_train, y_train)
print("Best model-Lasso:\nalpha = {} ".format(lasso.alpha_))
lasso_coef = pd.DataFrame()
lasso_coef["Features"]= x_train.columns
lasso_coef['Coef Estimate'] = pd.Series(lasso.coef_)
y_pred = lasso.predict(x_test)
mse = mean_squared_error(y_test, y_pred)
print(f'MSE = {mse}')
print(lasso_coef)

Best model-Lasso:
alpha = 0.00021 
MSE = 0.05865737639384063
   Features  Coef Estimate
0  bus_conf       1.315978
1  con_cred      -0.000000
2       cpi      10.826085
3   con_exp       0.000000
4       ppi       0.114138
5       vix      -0.197485
6       inv       1.577048
7       ppe      -2.076553
