# Modelo de Machine Learning

## Importación de librerías

In [16]:
import pandas as pd
import numpy as np
import os


import mlflow
from sklearn.linear_model import Ridge
from sklearn.model_selection import TimeSeriesSplit, RandomizedSearchCV

from sklearn.metrics import mean_squared_error, r2_score

## Carga de datasets

In [17]:
X_train = pd.read_csv("../../data/final/X_train_final.csv", sep =";")
X_train.head()

Unnamed: 0,tempMedia,tempMax,tempMin,humedadMedia,humedadMax,humedadMin,velViento,dirViento,velVientoMax,dirVientoVelMax,...,precipitacion,altitud,lon,lat,dia_del_año_sin,dia_del_año_cos,año,mes,mes_sin,mes_cos
0,-1.149191,-0.644914,-1.635524,0.189294,0.081925,0.030319,-0.423513,-1.502747,-0.453085,1.508065,...,-0.249695,-0.821274,-0.961184,-1.212781,0.025922,1.417772,-1.705504,-1.607476,0.717969,1.22936
1,-1.156422,-1.448554,-0.80796,1.172613,0.417881,2.123171,-0.296844,-1.517465,-0.838102,1.459742,...,-0.249695,-0.821274,-0.961184,-1.212781,0.050257,1.417144,-1.705504,-1.607476,0.717969,1.22936
2,-1.27791,-0.97966,-1.227846,0.940778,0.498219,0.747554,-0.095188,-1.334265,-0.382228,1.299015,...,-0.211286,-0.821274,-0.961184,-1.212781,0.074578,1.416096,-1.705504,-1.607476,0.717969,1.22936
3,-0.935141,-0.685034,-1.161667,0.263933,0.081925,0.160125,0.169296,-0.948927,0.687454,-0.818792,...,-0.249695,-0.821274,-0.961184,-1.212781,0.098877,1.41463,-1.705504,-1.607476,0.717969,1.22936
4,-1.040719,-0.745213,-1.291134,0.206823,0.103835,0.218978,-0.504581,-1.054936,-0.666509,-0.513097,...,-0.249695,-0.821274,-0.961184,-1.212781,0.123147,1.412746,-1.705504,-1.607476,0.717969,1.22936


In [18]:
X_test = pd.read_csv("../../data/final/X_test_final.csv", sep =";")
X_test.head()

Unnamed: 0,tempMedia,tempMax,tempMin,humedadMedia,humedadMax,humedadMin,velViento,dirViento,velVientoMax,dirVientoVelMax,...,precipitacion,altitud,lon,lat,dia_del_año_sin,dia_del_año_cos,año,mes,mes_sin,mes_cos
0,-1.198364,-1.299361,-1.23138,0.618471,0.366758,0.742604,0.614156,1.050836,0.199566,1.103622,...,0.326445,-0.821274,-0.961184,-1.212781,0.025922,1.417772,1.792456,-1.607476,0.717969,1.22936
1,-1.810143,-1.518763,-1.920642,0.748525,0.279117,0.438989,-0.756905,1.551034,-1.005427,1.573196,...,-0.211286,-0.821274,-0.961184,-1.212781,0.050257,1.417144,1.792456,-1.607476,0.717969,1.22936
2,-1.897498,-1.451062,-2.146487,0.4036,0.023498,-0.194642,-0.859253,-0.823363,-0.992622,1.134087,...,-0.211286,-0.821274,-0.961184,-1.212781,0.074578,1.416096,1.792456,-1.607476,0.717969,1.22936
3,-1.717725,-1.400913,-1.984412,0.488417,0.030801,0.372986,-0.668743,-0.946869,-0.599494,1.307419,...,-0.211286,-0.821274,-0.961184,-1.212781,0.098877,1.41463,1.792456,-1.607476,0.717969,1.22936
4,-1.345885,-1.333211,-1.307037,0.505381,-0.005716,0.694201,-0.519781,-1.363186,-0.863287,-1.868241,...,-0.249695,-0.821274,-0.961184,-1.212781,0.123147,1.412746,1.792456,-1.607476,0.717969,1.22936


In [19]:
y_test = pd.read_csv("../../data/final/y_test_final.csv", sep =";")
y_test.head()

Unnamed: 0,et0
0,1.41021
1,0.914485
2,0.952486
3,1.01542
4,1.062508


In [20]:
y_train = pd.read_csv("../../data/final/y_train_final.csv", sep =";")
y_train.head()

Unnamed: 0,et0
0,1.495588
1,0.776324
2,1.327342
3,1.796508
4,1.371563


## Modelos

Comenzamos definiendo un modelo base

In [21]:
project_root = "/home/lola/Escritorio/TFM-Sistema-Riego-Agricola" 
mlruns_path = os.path.join(project_root, "mlruns")
tracking_uri = "file://" + mlruns_path 
mlflow.set_tracking_uri(tracking_uri)

In [22]:
mlflow.set_experiment("TFM_Riego_Pred_ET0_TimeSeries")


<Experiment: artifact_location='file:///home/lola/Escritorio/TFM-Sistema-Riego-Agricola/mlruns/929868787444320189', creation_time=1760792170372, experiment_id='929868787444320189', last_update_time=1760792170372, lifecycle_stage='active', name='TFM_Riego_Pred_ET0_TimeSeries', tags={}>

In [23]:
with mlflow.start_run(run_name="Baseline_Ridge"):
    ridge_model = Ridge(random_state=42)
    
    tscv = TimeSeriesSplit(n_splits=5)
    
    rmse_scores = []
    r2_scores = []

    for fold, (train_index, val_index) in enumerate(tscv.split(X_train)):
        X_train_fold, X_val_fold = X_train.iloc[train_index], X_train.iloc[val_index]
        y_train_fold, y_val_fold = y_train.iloc[train_index], y_train.iloc[val_index]

        ridge_model.fit(X_train_fold, y_train_fold)
        predictions = ridge_model.predict(X_val_fold)
        
        rmse_scores.append(np.sqrt(mean_squared_error(y_val_fold, predictions)))
        r2_scores.append(r2_score(y_val_fold, predictions))
        
        print(f"Fold {fold+1} -> RMSE: {rmse_scores[-1]:.4f}, R²: {r2_scores[-1]:.4f}")

    mlflow.log_params(ridge_model.get_params())
    mlflow.log_param("cv_method", "TimeSeriesSplit")
    mlflow.log_param("n_splits", tscv.n_splits)
    mlflow.log_metric("rmse_cv_mean", np.mean(rmse_scores))
    mlflow.log_metric("rmse_cv_std", np.std(rmse_scores))
    mlflow.log_metric("r2_cv_mean", np.mean(r2_scores))
    mlflow.log_metric("r2_cv_std", np.std(r2_scores))
    mlflow.sklearn.log_model(ridge_model, "ridge_cv_model")
    
    print(f"RMSE CV Medio (Ridge): {round(np.mean(rmse_scores),4)} | R² CV Medio (Ridge): {round(np.mean(r2_scores),4)}")
    mlflow.end_run()



Fold 1 -> RMSE: 0.4933, R²: 0.9506
Fold 2 -> RMSE: 0.4415, R²: 0.9604
Fold 3 -> RMSE: 0.4397, R²: 0.9605
Fold 4 -> RMSE: 0.4201, R²: 0.9614
Fold 5 -> RMSE: 0.3563, R²: 0.9685




RMSE CV Medio (Ridge): 0.4302 | R² CV Medio (Ridge): 0.9603
