In [1]:
from EvalModelo import Evaluador
from CargaDatos import CargaDatasets
from Modelo import ModeloEspecial
from pathlib import Path
import numpy as np
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error

In [2]:
carpeta = Path("../data/processed")

# 2) Instancia y lee
carga = CargaDatasets(
    carpeta_raw=carpeta,
    nombre_modificado="power_tetouan_city_processed.csv",
)

df = carga.leer()


# Evaluador de modelos (CV)

In [None]:
train = Evaluador(df)
cv_table, _ = train.cross_validate()
best_pipe, test_rmse = train.fit_best()
print(cv_table)
print("Mejor:", train.best_name_, "RMSE test:", round(test_rmse, 3))

# Creaci√≥n de modelos - train, save and register

## Random Forest

In [None]:
MODEL_FILE = 'best_model_pipeline.joblib'
# Assuming full_data DataFrame is loaded

# 1. Define Model and Predictor Object
rf_model = RandomForestRegressor(n_estimators=700, min_samples_split=2, min_samples_leaf=1,
            max_features=3, random_state=42, n_jobs=-1)
trainer = ModeloEspecial(model_path=MODEL_FILE, exp="Power_Consumption_Prediction", run_nm="RF")

# 2. Train the model and save the fitted pipeline to disk
# (This step performs the fit(X_train, y_train) and the save)
x_test, y_true_labels = trainer.train_and_save(df=df, model=rf_model)

## Gradient Boosting

In [4]:
MODEL_FILE = 'best_model_pipeline.joblib'
# Assuming full_data DataFrame is loaded

# 1. Define Model and Predictor Object
rf_model = GradientBoostingRegressor(
            n_estimators=600, learning_rate=0.1, max_depth=5,
            min_samples_split=5, min_samples_leaf=3, random_state=42
        )
trainer = ModeloEspecial(model_path=MODEL_FILE, exp="Power_Consumption_Pred", run_nm="GBR")

# 2. Train the model and save the fitted pipeline to disk
# (This step performs the fit(X_train, y_train) and the save)
x_test, y_true_labels = trainer.train_and_save(df=df, model=rf_model)

Starting model training...
Training complete.
MLFlow Run ID: b570994bb7604b96b317cdb85843bcc0
Model successfully saved to: best_model_pipeline.joblib

Model performance on the x_test dataset:
Test RMSE: 2832.299
üèÉ View run GBR_PowerConsumption_Zone2_20251110_2232 at: https://dagshub.com/garc1a0scar/mna-mlops-team43.mlflow/#/experiments/1/runs/b570994bb7604b96b317cdb85843bcc0
üß™ View experiment at: https://dagshub.com/garc1a0scar/mna-mlops-team43.mlflow/#/experiments/1


In [None]:
MODEL_FILE = 'best_model_pipeline.joblib'
# Assume you have new_data_features (X_new) that is truly unseen

# 1. Instantiate the Predictor (doesn't need the full DataFrame or model definition)
predictor = ModeloEspecial(model_path=MODEL_FILE)

# 2. Load the trained pipeline
if predictor.load_model():
    # 3. Make predictions on unseen data
    unseen_predictions = predictor.predict(x_test) 
    print("Prediction successful.")

In [None]:
df.head(1)

In [None]:
df_test = df.head(1)

In [None]:
test_prediction = predictor.predict(df_test) 

In [None]:
test_prediction