In [1]:
import pandas as pd
import numpy as np
from pygam import LinearGAM, s, l
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

import scipy.sparse as sp
sp.csr_matrix.A = property(lambda self: self.toarray())


In [3]:
# Data Preperation
train = pd.read_csv('../../data/processed/train.csv')
test  = pd.read_csv('../../data/processed/test.csv')

train = pd.get_dummies(train, columns=['flat_model'], prefix='model')
test  = pd.get_dummies(test,  columns=['flat_model'], prefix='model')

tcols = set(train.columns)
scols = set(test.columns)
for c in tcols - scols:
    if c.startswith('model_'):
        test[c] = 0
extra = [c for c in test.columns if c.startswith('model_') and c not in tcols]
test.drop(columns=extra, inplace=True)

num_feats   = ['time','storey_avg','floor_area_sqm','flat_type_encoded','remaining_lease_months']
model_feats = [c for c in train.columns if c.startswith('model_')]

X_tr = train[num_feats + model_feats].values
y_tr = train['resale_price'].values
X_te = test [num_feats + model_feats].values
y_te = test ['resale_price'].values

In [4]:
#include terms

splines = [s(i) for i in range(len(num_feats))]
linears = [l(i) for i in range(len(num_feats), len(num_feats)+len(model_feats))]

terms = splines[0]
for t in splines[1:] + linears:
    terms += t

gam = LinearGAM(terms)

lams     = [0.1, 1, 10]
best_gam = gam.gridsearch(X_tr, y_tr, lam=lams)

  0% (0 of 3) |                          | Elapsed Time: 0:00:00 ETA:  --:--:--
 33% (1 of 3) |########                  | Elapsed Time: 0:00:04 ETA:   0:00:09
 66% (2 of 3) |#################         | Elapsed Time: 0:00:09 ETA:   0:00:04
100% (3 of 3) |##########################| Elapsed Time: 0:00:14 Time:  0:00:14


In [None]:
preds = best_gam.predict(X_te)
mae   = mean_absolute_error(y_te, preds)
rmse  = np.sqrt(mean_squared_error(y_te, preds))
r2    = r2_score(y_te, preds) 

print(f"Best λ: {best_gam.lam}")
print(f"GAM MAE:  {mae:.2f}")
print(f"GAM RMSE: {rmse:.2f}")
print(f"GAM R²:   {r2:.4f}") 

Best λ: [[0.1], [0.1], [0.1], [0.1], [0.1], [0.1], [0.1], [0.1], [0.1], [0.1], [0.1], [0.1], [0.1], [0.1], [0.1], [0.1], [0.1], [0.1], [0.1], [0.1], [0.1], [0.1], [0.1], [0.1], [0.1], [0.1]]
GAM MAE:  66568.73
GAM RMSE: 92118.07
GAM R²:   0.7385
