In [1]:
# Chargement des bibliothèques
if (!requireNamespace("tidymodels", quietly = TRUE)) {
  install.packages("tidymodels")
}

library(tidymodels)
library(dplyr)

# Ensure required libraries are loaded
required_packages <- c("dplyr", "rsample", "recipes", "parsnip", "workflows", "tune", "glmnet", "yardstick")

for (pkg in required_packages) {
  if (!requireNamespace(pkg, quietly = TRUE)) {
    install.packages(pkg)
  }
  library(pkg, character.only = TRUE)
}

── [1mAttaching packages[22m ────────────────────────────────────── tidymodels 1.2.0 ──

[32m✔[39m [34mbroom       [39m 1.0.6      [32m✔[39m [34mrecipes     [39m 1.0.10
[32m✔[39m [34mdials       [39m 1.2.1      [32m✔[39m [34mrsample     [39m 1.2.1 
[32m✔[39m [34mdplyr       [39m 1.1.4      [32m✔[39m [34mtibble      [39m 3.2.1 
[32m✔[39m [34mggplot2     [39m 3.5.1      [32m✔[39m [34mtidyr       [39m 1.3.1 
[32m✔[39m [34minfer       [39m 1.0.7      [32m✔[39m [34mtune        [39m 1.2.1 
[32m✔[39m [34mmodeldata   [39m 1.3.0      [32m✔[39m [34mworkflows   [39m 1.1.4 
[32m✔[39m [34mparsnip     [39m 1.2.1      [32m✔[39m [34mworkflowsets[39m 1.1.0 
[32m✔[39m [34mpurrr       [39m 1.0.2      [32m✔[39m [34myardstick   [39m 1.3.1 

── [1mConflicts[22m ───────────────────────────────────────── tidymodels_conflicts() ──
[31m✖[39m [34mpurrr[39m::[32mdiscard()[39m masks [34mscales[39m::discard()
[31m✖[39m [34mdplyr[39m::[

In [2]:
stations_data <- read.csv(file = "stations_clim_data.csv")#%>%
#filter(stations=="boromo")
dim(stations_data)

In [3]:
unique(stations_data$stations)

In [4]:
head(stations_data)

Unnamed: 0_level_0,Date,stations,tx,tn,rh,rs,ws,et0
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,1988-01-01,bobo,29.5,15.0,32,15.70637,1.72,4.46152
2,1988-01-02,bobo,31.4,17.5,27,15.64143,1.67,4.731423
3,1988-01-03,bobo,27.5,17.8,22,13.36923,2.11,4.928952
4,1988-01-04,bobo,27.5,15.0,22,15.92401,1.89,4.762524
5,1988-01-05,bobo,29.3,14.4,24,15.28877,1.11,3.817034
6,1988-01-06,bobo,29.5,16.5,26,14.89933,0.78,3.357057


Modèle en boucle

In [5]:
# Obtenir les noms des colonnes de prédicteurs
predictors <- colnames(stations_data)[-c(1,2,8)]

In [6]:
stations_names=unique(stations_data$stations)

In [7]:
stations_names <- unique(stations_data$stations)  # Définir les noms des stations

# Boucle sur chaque station
i <- 0
ns <- length(stations_names)

for (station in stations_names) {
  i <- i + 1
  cat("Processing station:", station, " (", i, "/", ns, ")\n")
  
  # Filtrer les données pour la station courante
  station_data <- stations_data %>%
    filter(stations == station)
  
  # Division des données en ensembles d'entraînement et de test
  set.seed(123)
  data_split <- initial_split(station_data, prop = 0.8)
  train_data <- training(data_split)
  test_data <- testing(data_split)
  
  # Prétraitement des données
  recipe <- recipe(et0 ~ tx + tn + rh + rs + ws, data = train_data) %>%
    step_normalize(all_predictors())
  
  # Définir le modèle glmnet
  glmnet_model <- linear_reg(penalty = tune(), mixture = tune()) %>%
    set_engine("glmnet") %>%
    set_mode("regression")
  
  # Créer le workflow
  glmnet_workflow <- workflow() %>%
    add_recipe(recipe) %>%
    add_model(glmnet_model)
  
  # Définir la grille de recherche pour les hyperparamètres
  glmnet_grid <- grid_regular(penalty(), mixture(), levels = 10)
  
  cat("Grid length:", nrow(glmnet_grid), "\n")
  
  # Validation croisée
  cv_folds <- vfold_cv(train_data, v = 5)
  
  # Entraîner et évaluer le modèle
  glmnet_fit <- tune_grid(
    glmnet_workflow,
    resamples = cv_folds,
    grid = glmnet_grid,
    metrics = metric_set(rmse),
    control = control_grid(save_pred = TRUE, parallel_over = "everything")
  )
  
  # Sélectionner le meilleur modèle
  best_params <- select_best(glmnet_fit, metric = "rmse")
  
  # Finaliser et ajuster le modèle final
  final_glmnet <- finalize_workflow(glmnet_workflow, best_params)
  glmnet_final_model <- fit(final_glmnet, data = train_data)
  
  # Sauvegarder le modèle final dans un fichier RDS
  saveRDS(glmnet_final_model, file = paste0("glmnet_model_", station, ".rds"))
}

Processing station: bobo  ( 1 / 9 )
Grid length: 100 
Processing station: boromo  ( 2 / 9 )
Grid length: 100 
Processing station: dori  ( 3 / 9 )
Grid length: 100 
Processing station: dedougou  ( 4 / 9 )
Grid length: 100 
Processing station: fada  ( 5 / 9 )
Grid length: 100 
Processing station: gaoua  ( 6 / 9 )
Grid length: 100 
Processing station: ouaga  ( 7 / 9 )
Grid length: 100 
Processing station: ouahigouya  ( 8 / 9 )
Grid length: 100 
Processing station: po  ( 9 / 9 )
Grid length: 100 
