In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer


In [2]:
# Charger le fichier CSV
file_path = 'OurData.csv'
df = pd.read_csv(file_path)
# Supprimer les colonnes 'Date' et 'Heure'
data = df.drop(["Date", "Heure"], axis=1)
# Convertir toutes les colonnes en float
data = data.astype(float)

In [3]:
data.isnull().sum()

RH         1081
AirTC      1081
G          1081
Rn_Avg     1081
Le_flux    1096
Ws         1097
Psat       1081
P_air      1081
VPD        1081
Rs            0
dtype: int64

In [4]:
data.shape

(10291, 10)

In [5]:
# Imputer les valeurs manquantes par la moyenne
imputer = SimpleImputer(strategy='mean')
# Remplacer les valeurs manquantes par la moyenne dans les colonnes spécifiées
colonnes_a_traiter = ['RH', 'AirTC', 'G', 'Rn_Avg', 'Le_flux', 'Ws', 'Psat', 'P_air', 'VPD', 'Rs']
data[colonnes_a_traiter] = data[colonnes_a_traiter].fillna(-9999999999999)

In [6]:
data.isnull().sum()

RH         0
AirTC      0
G          0
Rn_Avg     0
Le_flux    0
Ws         0
Psat       0
P_air      0
VPD        0
Rs         0
dtype: int64

In [7]:
# Séparer les données en entrées (features) et cible (target)
X = data.drop("Le_flux", axis=1)
y = data["Le_flux"]

In [8]:
# Calculer la moyenne et l'écart-type des caractéristiques (features)
mean = X.mean(axis=0)
std = X.std(axis=0)

In [9]:
# Normaliser les caractéristiques (features)
X -= mean
X /= std

In [10]:
# Fractionner les données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=20)
# Normaliser la cible (target)
mean_target = y_train.mean()
std_target = y_train.std()
y_train -= mean_target
y_train /= std_target
y_test -= mean_target
y_test /= std_target

In [11]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Create a Gradient Boosting regressor
model = GradientBoostingRegressor()

# Define the parameter grid
param_grid = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.1, 0.01, 0.001],
    'max_depth': [3, 4, 5]
}

# Perform grid search with cross-validation
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

In [12]:
# Get the best model and its hyperparameters
best_model = grid_search.best_estimator_
best_params = grid_search.best_params_


In [13]:

print("Best Hyperparameters:", best_params)
# Make predictions on the test set using the best model
predictions = best_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, predictions)
mae = mean_absolute_error(y_test, predictions)

print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)

Best Hyperparameters: {'learning_rate': 0.1, 'max_depth': 4, 'n_estimators': 300}
Mean Squared Error: 0.009822858364876673
Mean Absolute Error: 0.005631688245712333


In [14]:
# Make predictions on the test set using the best model
predictions = best_model.predict(X_test)

# Print the predictions and corresponding actual values
for i in range(len(predictions)):
    print(f"Actual: {y_test.iloc[i]}, Predicted: {predictions[i]}")

Actual: 0.3466090101461702, Predicted: 0.3466089430222843
Actual: 0.34660901014616374, Predicted: 0.3466090362655515
Actual: -2.884744751488302, Predicted: -2.8847443305793976
Actual: 0.34660901014616025, Predicted: 0.3466091564514371
Actual: 0.34660901014616374, Predicted: 0.3465979407158599
Actual: 0.34660901014616374, Predicted: 0.3466090362655515
Actual: 0.34660901014616713, Predicted: 0.34660899077898466
Actual: 0.3466090101461712, Predicted: 0.34660818143347394
Actual: -2.884744751488302, Predicted: -2.884744761109393
Actual: 0.3466090101461655, Predicted: 0.34660899077898466
Actual: 0.34660901014616374, Predicted: 0.3466090484002525
Actual: 0.3466090101461645, Predicted: 0.34660888117351635
Actual: 0.3466090101461668, Predicted: 0.34660818143347394
Actual: 0.34660901014616374, Predicted: 0.3466081814334743
Actual: 0.34660901014616413, Predicted: 0.34660888117351635
Actual: 0.34660901014616374, Predicted: 0.3466090362655515
Actual: 0.34660901014616374, Predicted: 0.34660904840025

In [15]:
print(type(y_test))
print(type(predictions))


<class 'pandas.core.series.Series'>
<class 'numpy.ndarray'>


In [16]:
import pickle

# Load the saved model
