# Modelo de Machine Learning

## Importación de librerías

In [15]:
import pandas as pd
import numpy as np
import mlflow
from sklearn.linear_model import Ridge
from sklearn.model_selection import TimeSeriesSplit, RandomizedSearchCV
from sklearn.metrics import mean_squared_error, r2_score

## Carga de datasets

In [5]:
X_train = pd.read_csv("../../data/final/X_train_final.csv", sep =";")
X_train.head()

Unnamed: 0,tempMedia,tempMax,tempMin,humedadMedia,humedadMax,humedadMin,velViento,dirViento,velVientoMax,dirVientoVelMax,...,precipitacion,altitud,lon,lat,dia_del_año_sin,dia_del_año_cos,año,mes,mes_sin,mes_cos
0,-1.149191,-0.644914,-1.635524,0.18961,0.079664,0.032829,-0.429441,-1.506338,-0.459902,1.510954,...,-0.292242,-0.821274,-0.961184,-1.212781,0.025922,1.417772,-1.705504,-1.607476,0.717969,1.22936
1,-1.156422,-1.448554,-0.80796,1.178122,0.419198,2.146815,-0.29923,-1.52109,-0.855152,1.46254,...,-0.292242,-0.821274,-0.961184,-1.212781,0.050257,1.417144,-1.705504,-1.607476,0.717969,1.22936
2,-1.27791,-0.97966,-1.227846,0.945063,0.500391,0.757307,-0.091935,-1.337465,-0.387162,1.301512,...,-0.24223,-0.821274,-0.961184,-1.212781,0.074578,1.416096,-1.705504,-1.607476,0.717969,1.22936
3,-0.935141,-0.685034,-1.161667,0.264644,0.079664,0.163946,0.179945,-0.951233,0.710951,-0.820273,...,-0.292242,-0.821274,-0.961184,-1.212781,0.098877,1.41463,-1.705504,-1.607476,0.717969,1.22936
4,-1.040719,-0.745213,-1.291134,0.207231,0.101808,0.223394,-0.512776,-1.057488,-0.678998,-0.514003,...,-0.292242,-0.821274,-0.961184,-1.212781,0.123147,1.412746,-1.705504,-1.607476,0.717969,1.22936


In [6]:
X_test = pd.read_csv("../../data/final/X_test_final.csv", sep =";")
X_test.head()

Unnamed: 0,tempMedia,tempMax,tempMin,humedadMedia,humedadMax,humedadMin,velViento,dirViento,velVientoMax,dirVientoVelMax,...,precipitacion,altitud,lon,lat,dia_del_año_sin,dia_del_año_cos,año,mes,mes_sin,mes_cos
0,-1.198364,-1.299361,-1.23138,0.621054,0.36753,0.752307,0.637245,1.05317,0.210096,1.105752,...,0.457943,-0.821274,-0.961184,-1.212781,0.025922,1.417772,1.792456,-1.607476,0.717969,1.22936
1,-1.810143,-1.518763,-1.920642,0.751794,0.278956,0.445626,-0.772155,1.554529,-1.026924,1.539413,...,-0.24223,-0.821274,-0.961184,-1.212781,0.050257,1.417144,1.792456,-1.607476,0.717969,1.22936
2,-1.897498,-1.451062,-2.146487,0.405048,0.020615,-0.194403,-0.877366,-0.825378,-1.013778,1.136274,...,-0.24223,-0.821274,-0.961184,-1.212781,0.074578,1.416096,1.792456,-1.607476,0.717969,1.22936
3,-1.717725,-1.400913,-1.984412,0.490313,0.027996,0.378956,-0.681529,-0.94917,-0.610202,1.309932,...,-0.24223,-0.821274,-0.961184,-1.212781,0.098877,1.41463,1.792456,-1.607476,0.717969,1.22936
4,-1.345885,-1.333211,-1.307037,0.507366,-0.00891,0.703416,-0.528401,-1.366453,-0.881005,-1.871693,...,-0.292242,-0.821274,-0.961184,-1.212781,0.123147,1.412746,1.792456,-1.607476,0.717969,1.22936


In [7]:
y_test = pd.read_csv("../../data/final/y_test_final.csv", sep =";")
y_test.head()

Unnamed: 0,et0
0,1.41021
1,0.914485
2,0.952486
3,1.01542
4,1.062508


In [8]:
y_train = pd.read_csv("../../data/final/y_train_final.csv", sep =";")
y_train.head()

Unnamed: 0,et0
0,1.495588
1,0.776324
2,1.327342
3,1.796508
4,1.371563


## Modelos

Comenamoz definiendo un modelo base

In [11]:
mlflow.set_experiment("TFM_Riego_Pred_ET0_TimeSeries")

2025/10/18 14:22:02 INFO mlflow.tracking.fluent: Experiment with name 'TFM_Riego_Pred_ET0_TimeSeries' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:///home/lola/Escritorio/TFM-Sistema-Riego-Agricola/notebooks/modelado/mlruns/362603264655283847', creation_time=1760790122663, experiment_id='362603264655283847', last_update_time=1760790122663, lifecycle_stage='active', name='TFM_Riego_Pred_ET0_TimeSeries', tags={}>

In [16]:
with mlflow.start_run(run_name="Baseline_Ridge"):
    ridge_model = Ridge(random_state=42)
    
    tscv = TimeSeriesSplit(n_splits=5) # Por ejemplo, 5 pliegues
    
    rmse_scores = []
    r2_scores = []

    for fold, (train_index, val_index) in enumerate(tscv.split(X_train)):
        X_train_fold, X_val_fold = X_train.iloc[train_index], X_train.iloc[val_index]
        y_train_fold, y_val_fold = y_train.iloc[train_index], y_train.iloc[val_index]

        ridge_model.fit(X_train_fold, y_train_fold)
        predictions = ridge_model.predict(X_val_fold)
        
        rmse_scores.append(np.sqrt(mean_squared_error(y_val_fold, predictions)))
        r2_scores.append(r2_score(y_val_fold, predictions))
        
        print(f"Fold {fold+1} -> RMSE: {rmse_scores[-1]:.4f}, R²: {r2_scores[-1]:.4f}")

    mlflow.log_params(ridge_model.get_params())
    mlflow.log_param("cv_method", "TimeSeriesSplit")
    mlflow.log_param("n_splits", tscv.n_splits)
    mlflow.log_metric("rmse_cv_mean", np.mean(rmse_scores))
    mlflow.log_metric("rmse_cv_std", np.std(rmse_scores))
    mlflow.log_metric("r2_cv_mean", np.mean(r2_scores))
    mlflow.log_metric("r2_cv_std", np.std(r2_scores))
    mlflow.sklearn.log_model(ridge_model, "ridge_cv_model")
    
    print(f"RMSE CV Medio (Ridge): {np.mean(rmse_scores):.4f} | R² CV Medio (Ridge): {np.mean(r2_scores):.4f}")



Fold 1 -> RMSE: 0.4858, R²: 0.9521
Fold 2 -> RMSE: 0.4357, R²: 0.9615
Fold 3 -> RMSE: 0.4339, R²: 0.9615
Fold 4 -> RMSE: 0.4155, R²: 0.9622
Fold 5 -> RMSE: 0.3534, R²: 0.9690




RMSE CV Medio (Ridge): 0.4249 | R² CV Medio (Ridge): 0.9613
