<a href="https://colab.research.google.com/github/linamarwabelkilani/modelisation-hydrologique/blob/main/nanouchz.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
#import dataset from github
import pandas as pd
data_url = "https://raw.githubusercontent.com/linamarwabelkilani/modelisation-hydrologique/main/Dataset.csv"
df = pd.read_csv(data_url, on_bad_lines='skip',sep=";")


df['Station']=df['Station'].astype(str)
df = df.astype({'Station':'string'})
df = df.astype({'Date':'string'})
df['Date'] = pd.to_datetime(df['Date'],dayfirst=True,errors='coerce')
df = df.dropna()
print(df.dtypes)
df

Date             datetime64[ns]
Station                  string
Precipitation           float64
dtype: object


Unnamed: 0,Date,Station,Precipitation
0,1951-06-01,AIN BEYA OUED RHEZAL,0.0
1,1951-06-02,AIN BEYA OUED RHEZAL,0.0
2,1951-06-03,AIN BEYA OUED RHEZAL,0.0
3,1951-06-04,AIN BEYA OUED RHEZAL,0.0
4,1951-06-05,AIN BEYA OUED RHEZAL,0.0
...,...,...,...
2103688,2015-08-27,ZOUARINE GARE,3.0
2103689,2015-08-28,ZOUARINE GARE,7.0
2103690,2015-08-29,ZOUARINE GARE,0.0
2103691,2015-08-30,ZOUARINE GARE,0.0


In [15]:
pip install pykrige


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [23]:
import numpy as np
from scipy.interpolate import interp1d
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from pykrige import OrdinaryKriging

# Méthode de l'interpolation inverse probabiliste (IPW)
def inverse_probability_weighting(x, y, kind):
    # Indices des valeurs manquantes
    missing_indices = np.isnan(y)
    # Indices des valeurs connues
    known_indices = ~missing_indices
    # Valeurs connues de x et y
    x_known = x[known_indices]
    y_known = y[known_indices]
    # Application de l'interpolation inverse aux valeurs manquantes
    ipw_interpolator = interp1d(x_known, y_known, kind=kind)
    y_predicted = ipw_interpolator(x)
    return y_predicted

# Méthode de krige
def krige_interpolation(x, y):
    # Indices des valeurs manquantes
    missing_indices = np.isnan(y)
    # Indices des valeurs connues
    known_indices = ~missing_indices
    # Valeurs connues de x et y
    x_known = x[known_indices]
    y_known = y[known_indices]
     # Indices des valeurs manquantes dans x
    missing_x_indices = np.isnan(x[missing_indices])
    # Création du modèle de krige
    OK = OrdinaryKriging(x_known, y_known, x_known, verbose=False)
    # Prédiction des valeurs manquantes
    y_predicted, _ = OK.execute("points", x[missing_indices], y_known)
    # Remplacement des valeurs manquantes par les valeurs prédites
    y[missing_indices] = y_predicted
    return y

# Liste des stations uniques
stations = df['Station'].unique()

# Types de kind à tester
kind_types = ['linear', 'nearest', 'zero', 'quadratic', 'cubic','krige','spline']

# Parcours des stations
for station in stations:
    station_data = df[df['Station'] == station]
    x = np.arange(len(station_data))  # Utilisation de l'indice de la date comme variable indépendante
    y = station_data['Precipitation'].values  # Valeurs de précipitation

    best_kind = None
    best_mse = float('inf')
    best_mae = float('inf')
    best_r2 = -float('inf')

    # Test des différents types de kind
    for kind in kind_types:
        if kind == 'krige':
            y_predicted = krige_interpolation(x, y)
        elif kind == 'spline':
            # Appliquer la méthode de spline pour 'spline'
            # Ajoutez votre code pour la méthode de spline ici
            continue
        else:
            y_predicted = inverse_probability_weighting(x, y, kind)
        
        mse = mean_squared_error(y[~np.isnan(y)], y_predicted[~np.isnan(y)])
        mae = mean_absolute_error(y[~np.isnan(y)], y_predicted[~np.isnan(y)])
        r2 = r2_score(y[~np.isnan(y)], y_predicted[~np.isnan(y)])

        if mse < best_mse:
            best_kind = kind
            best_mse = mse
            best_mae = mae
            best_r2 = r2

    print(f"Station: {station}")
    print(f"Best Kind: {best_kind}")


ValueError: ignored

In [3]:
station_groups = df.groupby('Station')
for station_name, station_data in station_groups:
    print(f"Station: {station_name}")
    print(station_data)
    print()







Station: AIN BEYA OUED RHEZAL
            Date               Station  Precipitation
0     1951-06-01  AIN BEYA OUED RHEZAL            0.0
1     1951-06-02  AIN BEYA OUED RHEZAL            0.0
2     1951-06-03  AIN BEYA OUED RHEZAL            0.0
3     1951-06-04  AIN BEYA OUED RHEZAL            0.0
4     1951-06-05  AIN BEYA OUED RHEZAL            0.0
...          ...                   ...            ...
18461 2015-08-27  AIN BEYA OUED RHEZAL            0.0
18462 2015-08-28  AIN BEYA OUED RHEZAL            0.0
18463 2015-08-29  AIN BEYA OUED RHEZAL            0.0
18464 2015-08-30  AIN BEYA OUED RHEZAL            0.0
18465 2015-08-31  AIN BEYA OUED RHEZAL            0.0

[18466 rows x 3 columns]

Station: AIN DEBBA
            Date    Station  Precipitation
18466 1968-11-01  AIN DEBBA            0.0
18467 1968-11-02  AIN DEBBA            0.0
18468 1968-11-03  AIN DEBBA            0.0
18469 1968-11-04  AIN DEBBA           26.0
18470 1968-11-05  AIN DEBBA            0.0
...          ...  