In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error
from utils import prep_data
from pathlib import Path
import pandas as pd
import xgboost as xgb
import numpy as np

In [None]:
DATA_FOLDER = Path("./cedas2025_material/data")
data_path = DATA_FOLDER / "chargecurves_train.parquet"
(X_train, y_train), (X_val, y_val) = prep_data(data_path, do_split=True)

In [None]:
# Various hyper-parameters to tune, using grid search CV on the pipeline
model_xgb = xgb.XGBRegressor(tree_method="hist",
                             objective="reg:squarederror", 
                             eval_metric=mean_absolute_error)

# parameter grids
parameters =  {
              'learning_rate': [.01, .005], 
              'max_depth': [5, 10],
              'subsample': [0.4, 0.7],
              'colsample_bytree': [0.7],
              'n_estimators': [500, 800],
              "n_estimators": [10],
              'reg_lambda':  [0.8]}

xgb_grid = GridSearchCV(model_xgb,
                        parameters,
                        cv = 5,
                        n_jobs = 5,
                        verbose=4,
                        scoring="neg_mean_absolute_error",
                       )

xgb_grid.fit(X_train, y_train)

best_score = xgb_grid.best_score_
best_params = xgb_grid.best_params_
print(f"{best_score=}")
print(f"{best_params=}")

In [None]:
model = xgb.XGBRegressor(tree_method="hist",
                         objective="reg:squarederror", 
                         eval_metric=mean_absolute_error,
                        **best_params)

In [None]:
model.fit(X_train, y_train)

In [None]:
preds = model.predict(X_val)

# Evaluate the model
score = mean_absolute_error(y_val, preds)

In [None]:
score

In [None]:
X_val_holdback, y_val_holdback = prep_data(DATA_FOLDER / "chargecurves_validation_holdback.parquet", do_split=False, eval_set=True)

In [None]:
val_holdback_preds = model.predict(X_val_holdback)

In [None]:
val_holdback = pd.read_parquet(DATA_FOLDER / "chargecurves_validation_holdback.parquet")
val_holdback["power_pred"] = val_holdback_preds
val_holdback["power"] = val_holdback["power"].fillna(val_holdback["power_pred"])
val_holdback.drop("power_pred", axis=1)
val_holdback.to_csv("val_preds2.csv")
val_holdback.to_parquet("val_preds2.parquet")

In [None]:
test_holdback = pd.read_parquet(DATA_FOLDER / "chargecurves_test_holdback.parquet")
test_holdback["power_pred"] = test_holdback_preds
test_holdback["power"] = test_holdback["power"].fillna(test_holdback["power_pred"])
test_holdback.drop("power_pred",axis=1)
test_holdback.to_csv("test_preds2.csv")
test_holdback.to_parquet("test_preds2.parquet")