In [1]:
from google.colab import drive
drive.mount('/content/drive')

import os
DIR = "/content/drive/MyDrive/MLOPS-40"
os.chdir(DIR)

Mounted at /content/drive


In [4]:
import pandas as pd
import numpy as np
from sklearn.svm import SVR
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.multioutput import MultiOutputRegressor

In [5]:
X_train = pd.read_csv('X_train.csv')
X_val   = pd.read_csv('X_val.csv')
X_test  = pd.read_csv('X_test.csv')

Y_train = pd.read_csv('Y_train.csv')
Y_val   = pd.read_csv('Y_val.csv')
Y_test  = pd.read_csv('Y_test.csv')

In [19]:
#Función de evaluación
def evaluate_model(model_name, y_true, y_pred):
    for i, zone in enumerate(y_true.columns):
        y_true_col = y_true.iloc[:, i]

        # Si y_pred es DataFrame usa iloc, si ndarray usa slicing
        if isinstance(y_pred, pd.DataFrame):
            y_pred_col = y_pred.iloc[:, i]
        else:
            y_pred_col = y_pred[:, i]

        mae = mean_absolute_error(y_true_col, y_pred_col)
        mse = mean_squared_error(y_true_col, y_pred_col)
        rmse = np.sqrt(mse)
        r2 = r2_score(y_true_col, y_pred_col)
        print(f"\n {model_name} - Zona: {zone}")
        print(f"   - MAE:  {mae:.4f}")
        print(f"   - RMSE: {rmse:.4f}")
        print(f"   - R^2:  {r2:.4f}")


In [20]:
print(f"{zone} type:", type(Y_train[zone]))
print(f"{zone} shape:", Y_train[zone].shape)
print(f"{zone} head:\n", Y_train[zone].head())


Zone 3  Power Consumption type: <class 'pandas.core.series.Series'>
Zone 3  Power Consumption shape: (23002,)
Zone 3  Power Consumption head:
 0    2.197292
1   -0.342114
2    1.463807
3   -1.133418
4    1.706870
Name: Zone 3  Power Consumption, dtype: float64


In [21]:
#Modelo 1 XGBoost con GridSearch
xgb_param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [3, 5],
    'learning_rate': [0.05, 0.1],
    'subsample': [0.8]
}


best_models = {}
y_pred_dict = {}

for zone in Y_train.columns:
    print(f"\n Ajustando XGBoost para {zone}...")
    xgb_base = XGBRegressor(random_state=42, objective='reg:squarederror', verbosity=0)

    y = Y_train[zone]

    grid = GridSearchCV(xgb_base, xgb_param_grid, cv=3, scoring='r2', n_jobs=-1, verbose=1)
    grid.fit(X_train, y)
    best_models[zone] = grid.best_estimator_

    y_pred = best_models[zone].predict(X_test)
    print(f"{zone} prediction shape: {y_pred.shape}")
    y_pred_dict[zone] = y_pred

# Crear DataFrame de predicciones con índice compatible con X_test
Y_pred_xgb = pd.DataFrame(y_pred_dict, index=X_test.index)

# Evaluar
evaluate_model("XGBoost", Y_test, Y_pred_xgb)



 Ajustando XGBoost para Zone 1 Power Consumption...
Fitting 3 folds for each of 8 candidates, totalling 24 fits
Zone 1 Power Consumption prediction shape: (4929,)

 Ajustando XGBoost para Zone 2  Power Consumption...
Fitting 3 folds for each of 8 candidates, totalling 24 fits
Zone 2  Power Consumption prediction shape: (4929,)

 Ajustando XGBoost para Zone 3  Power Consumption...
Fitting 3 folds for each of 8 candidates, totalling 24 fits
Zone 3  Power Consumption prediction shape: (4929,)

 XGBoost - Zona: Zone 1 Power Consumption
   - MAE:  0.1594
   - RMSE: 0.2143
   - R^2:  0.9535

 XGBoost - Zona: Zone 2  Power Consumption
   - MAE:  0.1781
   - RMSE: 0.2375
   - R^2:  0.9429

 XGBoost - Zona: Zone 3  Power Consumption
   - MAE:  0.1107
   - RMSE: 0.1534
   - R^2:  0.9764


In [23]:
#SVR con GridSearch
svr_param_grid = {
    'C': [1, 10],
    'epsilon': [0.01, 0.1],
    'kernel': ['rbf'],
    'gamma': ['scale', 'auto']
}

best_models_svr = {}
y_pred_svr_dict = {}

for zone in Y_train.columns:
    print(f"\n Ajustando SVR para {zone}...")
    y = Y_train[zone]
    if isinstance(y, pd.DataFrame):
        y = y.iloc[:, 0]

    svr_base = SVR()
    grid = GridSearchCV(svr_base, svr_param_grid, cv=3, scoring='r2', n_jobs=-1, verbose=1)
    grid.fit(X_train, y)
    best_models_svr[zone] = grid.best_estimator_

    y_pred = best_models_svr[zone].predict(X_test)
    y_pred_svr_dict[zone] = y_pred

# DataFrame de predicciones con índice de X_test para evitar problemas
Y_pred_svr = pd.DataFrame(y_pred_svr_dict, index=X_test.index)

# Evaluación del modelo
evaluate_model("SVR", Y_test, Y_pred_svr)


 Ajustando SVR para Zone 1 Power Consumption...
Fitting 3 folds for each of 8 candidates, totalling 24 fits

 Ajustando SVR para Zone 2  Power Consumption...
Fitting 3 folds for each of 8 candidates, totalling 24 fits

 Ajustando SVR para Zone 3  Power Consumption...
Fitting 3 folds for each of 8 candidates, totalling 24 fits

 SVR - Zona: Zone 1 Power Consumption
   - MAE:  0.2119
   - RMSE: 0.3042
   - R^2:  0.9062

 SVR - Zona: Zone 2  Power Consumption
   - MAE:  0.2332
   - RMSE: 0.3309
   - R^2:  0.8890

 SVR - Zona: Zone 3  Power Consumption
   - MAE:  0.1882
   - RMSE: 0.2702
   - R^2:  0.9268
