In [1]:
import joblib

# Load preprocessed data
X_train = joblib.load('../data/X_train.pkl')
X_test = joblib.load('../data/X_test.pkl')
y_train = joblib.load('../data/y_train.pkl')
y_test = joblib.load('../data/y_test.pkl')

In [11]:
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import numpy as np

In [13]:
y_train_log = np.log1p(y_train)  # log1p handles 0-values safely

In [16]:
pipeline = make_pipeline(
    StandardScaler(),
    Lasso(alpha=0.001, max_iter=10000)
)
pipeline.fit(X_train, y_train_log)

In [15]:
from sklearn.model_selection import GridSearchCV

from sklearn.linear_model import Lasso
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('lasso', Lasso(max_iter=10000))
])

param_grid = {'lasso__alpha': [0.01, 0.001, 0.0005, 0.0001]}
grid = GridSearchCV(pipeline, param_grid, cv=5)
grid.fit(X_train, np.log1p(y_train))

print("Best alpha:", grid.best_params_)


Best alpha: {'lasso__alpha': 0.001}


In [17]:
y_pred_lasso = lasso.predict(X_test)

rmse_lasso = np.sqrt(mean_squared_error(y_test, y_pred_lasso))
mae_lasso = mean_absolute_error(y_test, y_pred_lasso)
r2_lasso = r2_score(y_test, y_pred_lasso)

In [18]:
print(f"Lasso Regression RMSE: {rmse_lasso:.2f}")
print(f"Lasso Regression MAE: {mae_lasso:.2f}")
print(f"Lasso Regression R²: {r2_lasso:.4f}")

Lasso Regression RMSE: 33549.32
Lasso Regression MAE: 20194.97
Lasso Regression R²: 0.8533
