In [1]:
from EvalModelo import Evaluador
from CargaDatos import CargaDatasets
from Modelo import ModeloEspecial
from pathlib import Path
import numpy as np
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error

In [2]:
carpeta = Path("../data/processed")

# 2) Instancia y lee
carga = CargaDatasets(
    carpeta_raw=carpeta,
    nombre_modificado="power_tetouan_city_processed.csv",
)

df = carga.leer()


# Evaluador de modelos (CV)

In [12]:
train = Evaluador(df)
cv_table, _ = train.cross_validate()
best_pipe, test_rmse = train.fit_best()
print(cv_table)
print("Mejor:", train.best_name_, "RMSE test:", round(test_rmse, 3))

              model    rmse_mean   rmse_std
0      RandomForest   863.844156  35.083149
1           XGBoost   951.048584  32.452242
2  GradientBoosting   961.337798  32.198458
3        ElasticNet  3307.388993  27.333673
4               SVR  3374.946352  28.530993
Mejor: RandomForest RMSE test: 3698.855


# Creación de modelos - train, save and register

## Random Forest

In [9]:
MODEL_FILE = 'best_model_pipeline.joblib'
# Assuming full_data DataFrame is loaded

# 1. Define Model and Predictor Object
rf_model = RandomForestRegressor(n_estimators=700, min_samples_split=2, min_samples_leaf=1,
            max_features=3, random_state=42, n_jobs=-1)
trainer = ModeloEspecial(model_path=MODEL_FILE, exp="Power_Consumption_Prediction", run_nm="RF")

# 2. Train the model and save the fitted pipeline to disk
# (This step performs the fit(X_train, y_train) and the save)
x_test, y_true_labels = trainer.train_and_save(df=df, model=rf_model)

Starting model training...




Training complete.




🏃 View run RF_PowerConsumption_Zone2_20251030_1812 at: http://127.0.0.1:5000/#/experiments/781331707748066146/runs/fa577bada661468eab3f76f3f15d2cd5
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/781331707748066146


KeyboardInterrupt: 

## Gradient Boosting

In [8]:
MODEL_FILE = 'best_model_pipeline.joblib'
# Assuming full_data DataFrame is loaded

# 1. Define Model and Predictor Object
rf_model = GradientBoostingRegressor(
            n_estimators=600, learning_rate=0.1, max_depth=5,
            min_samples_split=5, min_samples_leaf=3, random_state=42
        )
trainer = ModeloEspecial(model_path=MODEL_FILE, exp="Power_Consumption_Prediction", run_nm="GBR")

# 2. Train the model and save the fitted pipeline to disk
# (This step performs the fit(X_train, y_train) and the save)
x_test, y_true_labels = trainer.train_and_save(df=df, model=rf_model)

Starting model training...




Training complete.


Registered model 'PowerConsumption_Zone2_Pipeline' already exists. Creating a new version of this model...
2025/10/30 18:07:22 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: PowerConsumption_Zone2_Pipeline, version 2


MLFlow Run ID: 326092553b5843f8a6a0b99ae69c802f
Model successfully saved to: best_model_pipeline.joblib

Model performance on the x_test dataset:
Test RMSE: 2832.299
🏃 View run GBR_PowerConsumption_Zone2_20251030_1806 at: http://127.0.0.1:5000/#/experiments/781331707748066146/runs/326092553b5843f8a6a0b99ae69c802f
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/781331707748066146


Created version '2' of model 'PowerConsumption_Zone2_Pipeline'.


In [15]:
MODEL_FILE = 'best_model_pipeline.joblib'
# Assume you have new_data_features (X_new) that is truly unseen

# 1. Instantiate the Predictor (doesn't need the full DataFrame or model definition)
predictor = ModeloEspecial(model_path=MODEL_FILE)

# 2. Load the trained pipeline
if predictor.load_model():
    # 3. Make predictions on unseen data
    unseen_predictions = predictor.predict(x_test) 
    print("Prediction successful.")

Model successfully loaded from: best_model_pipeline.joblib
Prediction successful.


In [16]:
df.head(1)

Unnamed: 0,Temperature,Humidity,WindSpeed,GeneralDiffuseFlows,DiffuseFlows,PowerConsumption_Zone1,PowerConsumption_Zone2,PowerConsumption_Zone3,Day,Month,Hour,Minute,DayWeek,QuarterYear,DayYear
0,6.559,73.8,0.083,0.051,5.567,34055.6962,16128.87538,20240.96386,1,1,0,0,7,1,1


In [17]:
df_test = df.head(1)

In [18]:
test_prediction = predictor.predict(df_test) 

In [19]:
test_prediction

array([18392.45153046])