In [3]:
# Chargement des bibliothèques
# Installation des packages manquants
if (!requireNamespace("kernlab", quietly = TRUE)) {
  install.packages("kernlab")
}
if (!requireNamespace("tidymodels", quietly = TRUE)) {
  install.packages("tidymodels")
}
if (!requireNamespace("ranger", quietly = TRUE)) {
  install.packages("ranger")
}
if (!requireNamespace("xgboost", quietly = TRUE)) {
  install.packages("xgboost")
}
if (!requireNamespace("earth", quietly = TRUE)) {
  install.packages("earth")
}
if (!requireNamespace("nnet", quietly = TRUE)) {
  install.packages("nnet")
}
if (!requireNamespace("kknn", quietly = TRUE)) {
  install.packages("kknn")
}
if (!requireNamespace("shapviz", quietly = TRUE)) {
  install.packages("shapviz")
}
if (!requireNamespace("DALEX", quietly = TRUE)) {
  install.packages("DALEX")
}
if (!requireNamespace("bonsai", quietly = TRUE)) {
  install.packages("bonsai")
}

if (!requireNamespace("doParallel", quietly = TRUE)) {
  install.packages("doParallel")
}

# Chargement des bibliothèques
library(tidymodels)
library(kernlab)
library(ranger)
library(xgboost)
library(earth)
library(nnet)
library(kknn)
library(shapviz)
library(DALEX)
library(bonsai)
library(parallel)
library(doParallel)

In [6]:
stations_data <- read.csv(file = "stations_clim_data.csv")#%>%
#filter(stations=="boromo")
dim(stations_data)

In [7]:
unique(stations_data$stations)

In [5]:
head(stations_data)

Unnamed: 0_level_0,Date,stations,tx,tn,rh,rs,ws,et0
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,1988-01-01,bobo,29.5,15.0,32,15.70637,1.72,4.46152
2,1988-01-02,bobo,31.4,17.5,27,15.64143,1.67,4.731423
3,1988-01-03,bobo,27.5,17.8,22,13.36923,2.11,4.928952
4,1988-01-04,bobo,27.5,15.0,22,15.92401,1.89,4.762524
5,1988-01-05,bobo,29.3,14.4,24,15.28877,1.11,3.817034
6,1988-01-06,bobo,29.5,16.5,26,14.89933,0.78,3.357057


Modèle en boucle

In [5]:
# Obtenir les noms des colonnes de prédicteurs
predictors <- colnames(stations_data)[-c(1,2,8)]

ERROR: Error in eval(expr, envir, enclos): object 'stations_data' not found


In [8]:
stations_names=unique(stations_data$stations)

In [7]:
# Boucle sur chaque station
i=0
ns=length(stations_names)
for (station in stations_names) {
  i=i+1
  cat("Processing station:", station, " ( ", i,"/",ns," ) ","\n")
  
  # Filtrer les données pour la station courante
  station_data <- stations_data %>%
    filter(stations == station) %>%
    select(-stations, -Date)
  
  # Division des données en ensembles d'entraînement et de test
  set.seed(123)
  data_split <- initial_split(station_data, prop = 0.8)
  train_data <- training(data_split)
  test_data <- testing(data_split)
  
  # Prétraitement des données
  recipe <- recipe(et0 ~ tx + tn + rh + rs + ws, data = train_data) %>%
    step_normalize(all_predictors())
  
  # Définir le modèle Random Forest
  rand_forest_model <- rand_forest(
    trees = tune(),
    min_n = tune(),
    mtry = tune()
  ) %>%
    set_engine("ranger") %>%
    set_mode("regression")

  # Créer le workflow
  rand_forest_workflow <- workflow() %>%
    add_recipe(recipe) %>%
    add_model(rand_forest_model)
  
  # Définir la grille de recherche aléatoire pour les hyperparamètres
  set.seed(123)
  # Finalize the mtry parameter
  mtry_final <- finalize(mtry(), train_data)
  rand_forest_grid <- grid_regular(trees(), min_n(), mtry(range = c(1, ncol(train_data) - 1)), levels = 5)
  
  cat("Grid length:", nrow(rand_forest_grid), "\n")
  # Validation croisée
  cv_folds <- vfold_cv(train_data, v = 4)
  
  # Entraîner et évaluer le modèle
  rand_forest_fit <- tune_grid(
    rand_forest_workflow,
    resamples = cv_folds,
    grid = rand_forest_grid,
    metrics = metric_set(rmse),
    control = control_grid(save_pred = TRUE, parallel_over = "everything")
  )
  
  # Sélectionner le meilleur modèle
  best_params <- select_best(rand_forest_fit, metric = "rmse")
  
  # Finaliser et ajuster le modèle final
  final_rand_forest <- finalize_workflow(rand_forest_workflow, best_params)
  rand_forest_final_model <- fit(final_rand_forest, data = train_data)
  
  # Sauvegarder le modèle final dans un fichier RDS
  saveRDS(rand_forest_final_model, file = paste0("rand_forest_model_", station, ".rds"))
}

Processing station: bobo  (  1 / 9  )  
Grid length: 125 
Processing station: boromo  (  2 / 9  )  
Grid length: 125 
Processing station: dori  (  3 / 9  )  
Grid length: 125 
Processing station: dedougou  (  4 / 9  )  
Grid length: 125 
Processing station: fada  (  5 / 9  )  
Grid length: 125 
Processing station: gaoua  (  6 / 9  )  
Grid length: 125 
Processing station: ouaga  (  7 / 9  )  
Grid length: 125 
Processing station: ouahigouya  (  8 / 9  )  
Grid length: 125 
Processing station: po  (  9 / 9  )  
Grid length: 125 


In [13]:
dt_predictions <- stations_data%>%
  dplyr::filter(stations=="bobo")%>%
  select(Date)
dt_metrics <- data.frame()  # Initialize an empty data frame
for (station in stations_names) {
  mdl <- readRDS(paste0("rand_forest_model_", station, ".rds"))  
  test_data <- stations_data %>%
    dplyr::filter(stations == station)
	
	 # Division des données en ensembles d'entraînement et de test
  set.seed(123)
  data_split <- initial_split(test_data, prop = 0.8)
  train_data <- training(data_split)
  test_data <- testing(data_split)
	
write.table(x =train_data ,file =paste0("train_", station,".csv"),
            append =FALSE ,quote = FALSE,sep = ",",row.names =FALSE)
	
	
  # Prédictions et évaluation sur le test set
  dt_prediction <- predict(mdl, test_data) %>%
    bind_cols(test_data)
  
  dt_prediction2 <- dt_prediction %>%
    dplyr::select(Date, .pred)
  colnames(dt_prediction2)[2] <- station
  
  dt_metric <- dt_prediction %>%
    metrics(truth = et0, estimate = .pred) %>%
    mutate(stations = station)
  
  dt_metrics <- rbind(dt_metrics, dt_metric)
  
  dt_predictions <- dt_predictions %>%
    left_join(dt_prediction2, by = "Date")
}

In [10]:
write.table(x =dt_predictions ,file ="rf_predictions.csv" ,
            append =FALSE ,quote = FALSE,sep = ",",row.names =FALSE)


In [11]:
write.table(x =dt_metrics ,file ="rf_metrics.csv" ,
            append =FALSE ,quote = FALSE,sep = ",",row.names =FALSE)


ERROR: Error in eval(expr, envir, enclos): object 'station_data' not found


In [1]:
library(tidymodels)
library(nnet)
library(baguette)
library(stringr)
library(rlang)

── [1mAttaching packages[22m ────────────────────────────────────── tidymodels 1.2.0 ──

[32m✔[39m [34mbroom       [39m 1.0.6      [32m✔[39m [34mrecipes     [39m 1.0.10
[32m✔[39m [34mdials       [39m 1.2.1      [32m✔[39m [34mrsample     [39m 1.2.1 
[32m✔[39m [34mdplyr       [39m 1.1.4      [32m✔[39m [34mtibble      [39m 3.2.1 
[32m✔[39m [34mggplot2     [39m 3.5.1      [32m✔[39m [34mtidyr       [39m 1.3.1 
[32m✔[39m [34minfer       [39m 1.0.7      [32m✔[39m [34mtune        [39m 1.2.1 
[32m✔[39m [34mmodeldata   [39m 1.3.0      [32m✔[39m [34mworkflows   [39m 1.1.4 
[32m✔[39m [34mparsnip     [39m 1.2.1      [32m✔[39m [34mworkflowsets[39m 1.1.0 
[32m✔[39m [34mpurrr       [39m 1.0.2      [32m✔[39m [34myardstick   [39m 1.3.1 

── [1mConflicts[22m ───────────────────────────────────────── tidymodels_conflicts() ──
[31m✖[39m [34mpurrr[39m::[32mdiscard()[39m masks [34mscales[39m::discard()
[31m✖[39m [34mdplyr[39m::[

In [20]:
models <- dir(pattern = "*rand_forest_model_")
  models_parameters <- rbind()
  for (model in models) {
    idx0 <- min(unlist(str_locate_all(string = model,pattern = "_"))[2])-1
    idx <- max(unlist(str_locate_all(string = model,pattern = "_")))+1
    station_name <- str_sub(string =model ,start = idx,end =nchar(model)-4 )
    model_name <- str_sub(string =model ,start = 1,end =idx0 )
    
    model_parameter <- data.frame(models=model_name,
                                  stations=station_name)
    
    ml_model <- readRDS(model)
    model_params <- ml_model$fit$actions$model$spec$args
    for (j in 1:length(model_params)) {
      model_parameter[names(model_params)[j]] <-           quo_get_expr(model_params[[names(model_params)[j]]])
      
    }
    models_parameters <- rbind(models_parameters,model_parameter)
  }

In [21]:
head(models_parameters)
write.table(x = models_parameters,file = "fitted_values.csv",
              append = FALSE,quote = FALSE,sep = ",",row.names = FALSE)

Unnamed: 0_level_0,models,stations,mtry,trees,min_n
Unnamed: 0_level_1,<chr>,<chr>,<int>,<int>,<int>
1,rand_forest,bobo,4,2000,2
2,rand_forest,boromo,4,2000,2
3,rand_forest,dedougou,4,2000,2
4,rand_forest,dori,4,2000,2
5,rand_forest,fada,5,1500,2
6,rand_forest,gaoua,4,1500,2


In [None]:


paste0("rand_forest_model_", station, ".rds")

all_models_parameters <- list()
for (dir_name in all_models) {
  print(dir_name)
  
  write.table(x = models_parameters,file = file.path("G:/PROJET/Article/ET_ML/Fitted",paste0(dir_name,".csv")),
              append = FALSE,quote = FALSE,sep = ",",row.names = FALSE)
  all_models_parameters[[dir_name]] <- models_parameters
}