In [6]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler

train_df = pd.read_csv("data/processed/train_encoded.csv", index_col="time", parse_dates=True)
eval_df = pd.read_csv("data/processed/eval_encoded.csv", index_col="time", parse_dates=True)

train_df['precipitation_sum'] = train_df['precipitation_sum'].fillna(0)
eval_df['precipitation_sum'] = eval_df['precipitation_sum'].fillna(0)

print("Data loaded! Train shape:", train_df.shape)

Data loaded! Train shape: (105, 10)


In [7]:
target = 'temperature_2m_max'

# Training data
X_train = train_df.drop(columns=[target])
y_train = train_df[target]

# Evaluation data
X_eval = eval_df.drop(columns=[target])
y_eval = eval_df[target]

print(f"Training on {X_train.shape[1]} features to predict '{target}'...")

Training on 9 features to predict 'temperature_2m_max'...


In [8]:

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)


X_eval_scaled = scaler.transform(X_eval)

print("Features scaled successfully.")

Features scaled successfully.


In [9]:
lr_model = LinearRegression()
lr_model.fit(X_train_scaled, y_train)

train_preds_lr = lr_model.predict(X_train_scaled)
eval_preds_lr = lr_model.predict(X_eval_scaled)

mae_train_lr = mean_absolute_error(y_train, train_preds_lr)
mae_eval_lr = mean_absolute_error(y_eval, eval_preds_lr)

print("--- Standard Linear Regression ---")
print(f"Train MAE: {mae_train_lr:.4f} degrees")
print(f"Eval MAE:  {mae_eval_lr:.4f} degrees")

--- Standard Linear Regression ---
Train MAE: 1.1848 degrees
Eval MAE:  1.7311 degrees


In [10]:
ridge_model = Ridge(alpha=1.0)
ridge_model.fit(X_train_scaled, y_train)
eval_preds_ridge = ridge_model.predict(X_eval_scaled)

lasso_model = Lasso(alpha=0.1) 
lasso_model.fit(X_train_scaled, y_train)
eval_preds_lasso = lasso_model.predict(X_eval_scaled)

print("--- Regularization Evaluation (Eval MAE) ---")
print(f"Baseline LR MAE: {mae_eval_lr:.4f}")
print(f"Ridge MAE:       {mean_absolute_error(y_eval, eval_preds_ridge):.4f}")
print(f"Lasso MAE:       {mean_absolute_error(y_eval, eval_preds_lasso):.4f}")

--- Regularization Evaluation (Eval MAE) ---
Baseline LR MAE: 1.7311
Ridge MAE:       1.7403
Lasso MAE:       1.7023
