<a href="https://colab.research.google.com/github/linamarwabelkilani/modelisation-hydrologique/blob/main/Copie_de_nanouchz.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
#import dataset from github
import pandas as pd
data_url = "https://raw.githubusercontent.com/linamarwabelkilani/modelisation-hydrologique/main/Dataset.csv"
df = pd.read_csv(data_url, on_bad_lines='skip',sep=";")


df['Station']=df['Station'].astype(str)
df = df.astype({'Station':'string'})
df = df.astype({'Date':'string'})
df['Date'] = pd.to_datetime(df['Date'],dayfirst=True,errors='coerce')
df = df.dropna()
print(df.dtypes)
df

Date             datetime64[ns]
Station                  string
Precipitation           float64
dtype: object


Unnamed: 0,Date,Station,Precipitation
0,1951-06-01,AIN BEYA OUED RHEZAL,0.0
1,1951-06-02,AIN BEYA OUED RHEZAL,0.0
2,1951-06-03,AIN BEYA OUED RHEZAL,0.0
3,1951-06-04,AIN BEYA OUED RHEZAL,0.0
4,1951-06-05,AIN BEYA OUED RHEZAL,0.0
...,...,...,...
2103688,2015-08-27,ZOUARINE GARE,3.0
2103689,2015-08-28,ZOUARINE GARE,7.0
2103690,2015-08-29,ZOUARINE GARE,0.0
2103691,2015-08-30,ZOUARINE GARE,0.0


In [12]:
import numpy as np
from scipy.interpolate import interp1d
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from pykrige import OrdinaryKriging

# Méthode de l'interpolation inverse probabiliste (IPW)
def inverse_probability_weighting(x, y, kind):
    # Indices des valeurs manquantes
    missing_indices = np.isnan(y)
    # Indices des valeurs connues
    known_indices = ~missing_indices
    # Valeurs connues de x et y
    x_known = x[known_indices]
    y_known = y[known_indices]
    # Application de l'interpolation inverse aux valeurs manquantes
    if kind == 'spline':
        spline_interpolator = interp1d(x_known, y_known, kind='cubic')
        y_predicted = spline_interpolator(x)
    else:
        ipw_interpolator = interp1d(x_known, y_known, kind=kind)
        y_predicted = ipw_interpolator(x)
    return y_predicted

# Liste des stations uniques
stations = df['Station'].unique()

# Types de kind à tester
kind_types = ['linear', 'nearest', 'zero', 'quadratic', 'cubic', 'spline']

# Parcours des stations
for station in stations:
    station_data = df[df['Station'] == station]
    x = np.arange(len(station_data))  # Utilisation de l'indice de la date comme variable indépendante
    y = station_data['Precipitation'].values  # Valeurs de précipitation

    best_kind = None
    best_mse = float('inf')
    best_mae = float('inf')
    best_r2 = -float('inf')

    # Test des différents types de kind
    for kind in kind_types:
        y_predicted = inverse_probability_weighting(x, y, kind)
        
        mse = mean_squared_error(y[~np.isnan(y)], y_predicted[~np.isnan(y)])
        mae = mean_absolute_error(y[~np.isnan(y)], y_predicted[~np.isnan(y)])
        r2 = r2_score(y[~np.isnan(y)], y_predicted[~np.isnan(y)])

        if mse < best_mse:
            best_kind = kind
            best_mse = mse
            best_mae = mae
            best_r2 = r2

    print(f"Station: {station}")
    print(f"Best Kind: {best_kind}")

    # Remplacer les valeurs manquantes par les valeurs prédites du type de kind le plus convenable
    y[np.isnan(y)] = y_predicted[np.isnan(y)]

    print("Metrics for the best kind:")
    print(f"MSE: {best_mse}")
    print(f"MAE: {best_mae}")
    print(f"R^2: {best_r2}")

Station: AIN BEYA OUED RHEZAL
Best Kind: linear
Metrics for the best kind:
MSE: 0.0
MAE: 0.0
R^2: 1.0
Station: AIN DEBBA
Best Kind: linear
Metrics for the best kind:
MSE: 0.0
MAE: 0.0
R^2: 1.0
Station: AIN JEMMALA
Best Kind: linear
Metrics for the best kind:
MSE: 0.0
MAE: 0.0
R^2: 1.0
Station: AIN GUESIL 1
Best Kind: linear
Metrics for the best kind:
MSE: 0.0
MAE: 0.0
R^2: 1.0
Station: AIN HAMRAYA
Best Kind: linear
Metrics for the best kind:
MSE: 0.0
MAE: 0.0
R^2: 1.0
Station: AIN KERMA 1
Best Kind: linear
Metrics for the best kind:
MSE: 0.0
MAE: 0.0
R^2: 1.0
Station: AIN MERJA
Best Kind: linear
Metrics for the best kind:
MSE: 0.0
MAE: 0.0
R^2: 1.0
Station: AIN S'KOUM
Best Kind: linear
Metrics for the best kind:
MSE: 0.0
MAE: 0.0
R^2: 1.0
Station: AIN SALLEM
Best Kind: linear
Metrics for the best kind:
MSE: 0.0
MAE: 0.0
R^2: 1.0
Station: AIN TABIA
Best Kind: linear
Metrics for the best kind:
MSE: 0.0
MAE: 0.0
R^2: 1.0
Station: AIN TOUNGA SE
Best Kind: linear
Metrics for the best kind:


In [17]:
station_groups = df.groupby('Station')

# Créer une liste pour stocker les résultats
results = []

# Parcours des stations
for station_name, station_data in station_groups:
    # Compter le nombre de zéros dans la colonne 'Precipitation'
    zero_count = (station_data['Precipitation'] == 0).sum()
    # Calculer le pourcentage de zéros
    zero_percentage = zero_count / len(station_data) * 100
    # Ajouter les résultats à la liste
    results.append((station_name, zero_percentage))

# Trier les résultats par ordre croissant des pourcentages de zéros
sorted_results = sorted(results, key=lambda x: x[1])

# Afficher les stations avec les pourcentages de zéros
for station, percentage in sorted_results:
    print(f"Station: {station} - Pourcentage de zéros: {percentage}%")

Station: TIBAR SM - Pourcentage de zéros: 73.48592985318108%
Station: BEN METIR 2 SM - Pourcentage de zéros: 74.07556756320818%
Station: FEIJA EL SM - Pourcentage de zéros: 75.04981672365864%
Station: KEF.B.I.R.H - Pourcentage de zéros: 75.70070876288659%
Station: AIN DEBBA - Pourcentage de zéros: 75.76343955977505%
Station: BARRAGE LAKMES - Pourcentage de zéros: 76.2273009735552%
Station: BARRAGE KASSEB - Pourcentage de zéros: 76.29959213941416%
Station: KEF CMA - Pourcentage de zéros: 76.34841909485431%
Station: SMADAH UCP - Pourcentage de zéros: 76.61702127659574%
Station: SK EL ARBA (JENDOUBA - Pourcentage de zéros: 77.09526019908195%
Station: OUED MLIZ INRAT - Pourcentage de zéros: 77.11069418386491%
Station: SLOUGUIA - Pourcentage de zéros: 77.2482651182552%
Station: BOU HEURTMA - Pourcentage de zéros: 77.3529411764706%
Station: GDOUD UCP - Pourcentage de zéros: 77.7001048584411%
Station: BOU SALEM DRE - Pourcentage de zéros: 78.01422349633387%
Station: KEF EN NESOUR - Pourcentag

In [18]:
# Vérifier s'il y a des valeurs manquantes dans la colonne "Precipitation"
if df['Precipitation'].isnull().sum() == 0 and (df['Precipitation'] == '').sum() == 0:
    print("Aucune valeur manquante dans la colonne 'Precipitation'.")
else:
    print("Des valeurs manquantes sont présentes dans la colonne 'Precipitation'.")

Aucune valeur manquante dans la colonne 'Precipitation'.
