In [13]:
# Chargement des bibliothèques
# Installation des packages manquants
if (!requireNamespace("kernlab", quietly = TRUE)) {
  install.packages("kernlab")
}
if (!requireNamespace("tidymodels", quietly = TRUE)) {
  install.packages("tidymodels")
}
if (!requireNamespace("ranger", quietly = TRUE)) {
  install.packages("ranger")
}
if (!requireNamespace("xgboost", quietly = TRUE)) {
  install.packages("xgboost")
}
if (!requireNamespace("earth", quietly = TRUE)) {
  install.packages("earth")
}
if (!requireNamespace("nnet", quietly = TRUE)) {
  install.packages("nnet")
}
if (!requireNamespace("kknn", quietly = TRUE)) {
  install.packages("kknn")
}
if (!requireNamespace("shapviz", quietly = TRUE)) {
  install.packages("shapviz")
}
if (!requireNamespace("DALEX", quietly = TRUE)) {
  install.packages("DALEX")
}
if (!requireNamespace("bonsai", quietly = TRUE)) {
  install.packages("bonsai")
}

if (!requireNamespace("doParallel", quietly = TRUE)) {
  install.packages("doParallel")
}

# Chargement des bibliothèques
library(tidymodels)
library(kernlab)
library(ranger)
library(xgboost)
library(earth)
library(nnet)
library(kknn)
library(shapviz)
library(DALEX)
library(bonsai)
library(parallel)
library(doParallel)

In [14]:
stations_data <- read.csv(file = "stations_clim_data.csv")#%>%
#filter(stations=="boromo")
dim(stations_data)

In [15]:
unique(stations_data$stations)

In [16]:
head(stations_data)

Unnamed: 0_level_0,Date,stations,tx,tn,rh,rs,ws,et0
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,1988-01-01,bobo,29.5,15.0,32,15.70637,1.72,4.46152
2,1988-01-02,bobo,31.4,17.5,27,15.64143,1.67,4.731423
3,1988-01-03,bobo,27.5,17.8,22,13.36923,2.11,4.928952
4,1988-01-04,bobo,27.5,15.0,22,15.92401,1.89,4.762524
5,1988-01-05,bobo,29.3,14.4,24,15.28877,1.11,3.817034
6,1988-01-06,bobo,29.5,16.5,26,14.89933,0.78,3.357057


Modèle en boucle

In [17]:
# Obtenir les noms des colonnes de prédicteurs
predictors <- colnames(stations_data)[-c(1,2,8)]

In [18]:
stations_names=unique(stations_data$stations)

In [20]:
# Boucle sur chaque station
i=0
ns=length(stations_names)
for (station in stations_names) {
  i=i+1
  cat("Processing station:", station, " ( ", i,"/",ns," )\n" )
  
  # Filtrer les données pour la station courante
  station_data <- stations_data %>%
    filter(stations == station)
  
  # Division des données en ensembles d'entraînement et de test
  set.seed(123)
  data_split <- initial_split(station_data, prop = 0.8)
  train_data <- training(data_split)
  test_data <- testing(data_split)
  
  # Prétraitement des données
  recipe <- recipe(et0 ~ tx + tn + rh + rs + ws, data = train_data) %>%
    step_normalize(all_predictors())
  
  # Définir le modèle MARS
  mars_model <- mars(
    num_terms = tune(),
    prod_degree = tune()
  ) %>%
    set_engine("earth") %>%
    set_mode("regression")
  
  # Créer le workflow
  mars_workflow <- workflow() %>%
    add_recipe(recipe) %>%
    add_model(mars_model)
  
  # Définir la grille de recherche aléatoire pour les hyperparamètres
  set.seed(123)
  # Finalize the parameters
  finalized_num_terms <- finalize(num_terms(), train_data)
  finalized_prod_degree <- finalize(prod_degree(), train_data)
  
  mars_grid <- grid_regular(finalized_num_terms, finalized_prod_degree, levels = 20)
  
  cat("Grid length:", nrow(mars_grid), "\n")
  
  # Validation croisée
  cv_folds <- vfold_cv(train_data, v = 5)
  
  # Entraîner et évaluer le modèle
  mars_fit <- tune_grid(
    mars_workflow,
    resamples = cv_folds,
    grid = mars_grid,
    metrics = metric_set(rmse),
    control = control_grid(save_pred = TRUE, parallel_over = "everything")
  )
  
  # Sélectionner le meilleur modèle
  best_params <- select_best(mars_fit, metric = "rmse")
  
  # Finaliser et ajuster le modèle final
  final_mars <- finalize_workflow(mars_workflow, best_params)
  mars_final_model <- fit(final_mars, data = train_data)
  
  # Sauvegarder le modèle final dans un fichier RDS
  saveRDS(mars_final_model, file = paste0("mars_model_", station, ".rds"))
}

Processing station: bobo  (  1 / 9  )
Grid length: 16 
Processing station: boromo  (  2 / 9  )
Grid length: 16 
Processing station: dori  (  3 / 9  )
Grid length: 16 
Processing station: dedougou  (  4 / 9  )
Grid length: 16 
Processing station: fada  (  5 / 9  )
Grid length: 16 
Processing station: gaoua  (  6 / 9  )
Grid length: 16 
Processing station: ouaga  (  7 / 9  )
Grid length: 16 
Processing station: ouahigouya  (  8 / 9  )
Grid length: 16 
Processing station: po  (  9 / 9  )
Grid length: 16 
