In [106]:
# Env: rive
# Cindy / Jade / Audre
import xarray as xr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import proplot as pplt # New plot library (https://proplot.readthedocs.io/en/latest/)
pplt.rc['savefig.dpi'] = 300 # 1200 is too big! #https://proplot.readthedocs.io/en/latest/basics.html#Creating-figures
from scipy import stats
import xesmf as xe # For regridding (https://xesmf.readthedocs.io/en/latest/)
import calendar
import os
import tensorflow as tf # Machine learning

In [107]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.metrics import MeanSquaredError, MeanAbsoluteError
import tensorflow.keras.backend as K
import pandas as pd
import numpy as np

# Chargement des librairies + données
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [108]:
cible = "turb"
cible2 = "cod" # carbone org dissout
cible3 = "chl_aa" # chlorofile

echantillons = pd.read_csv("../AtlasDeLEau/qualite_eau_complet.csv", dtype={'no_labo': 'string', 'no_station': 'string'})

qualite_eau = echantillons.copy()
qualite_eau['annee'] = pd.to_datetime(qualite_eau['date']).dt.year
qualite_eau['mois'] = pd.to_datetime(qualite_eau['date']).dt.month
qualite_eau = qualite_eau[(qualite_eau['mois'] >= 6) & (qualite_eau['mois'] <= 9) & (qualite_eau['annee'] >= 2016)]
qualite_eau['ID'] = qualite_eau['no_station'].astype(str) + "_" + qualite_eau['annee'].astype(str)
qualite_eau['nom_fichier'] = qualite_eau['ID'] + ".png"
qualite_eau = qualite_eau.dropna(subset=[cible])
qualite_eau[cible] = StandardScaler().fit_transform(qualite_eau[[cible]])
qualite_eau[cible2] = StandardScaler().fit_transform(qualite_eau[[cible2]])
qualite_eau[cible3] = StandardScaler().fit_transform(qualite_eau[[cible3]])
qualite_eau = qualite_eau.dropna(subset=[cible2]).dropna(subset=[cible3])

In [89]:
qualite_eau[cible]

19812    -0.224416
19813     0.038005
19814     0.084044
19819     0.130083
19820     1.373133
            ...   
132713    0.222161
132714    0.038005
132717    0.820666
132718    0.728589
132719   -0.026449
Name: turb, Length: 8965, dtype: float64

In [90]:
qualite_eau[cible2].isna().sum()

0

In [92]:
qualite_eau[cible2]

19812     0.064883
19813    -0.181757
19814    -0.072139
19819    -0.044735
19820     1.544719
            ...   
132713   -0.866866
132714   -0.866866
132717    0.037478
132718   -0.126948
132719   -0.483205
Name: cod, Length: 8965, dtype: float64

In [91]:
qualite_eau[cible3].isna().sum()

0

In [93]:
qualite_eau[cible3]

19812    -0.041733
19813    -0.059951
19814    -0.170915
19819    -0.054154
19820    -0.054154
            ...   
132713   -0.244616
132714   -0.262834
132717   -0.269459
132718   -0.300926
132719   -0.317488
Name: chl_aa, Length: 8965, dtype: float64

In [94]:
# Préparation des générateurs d'images
dossier_images = "../Imagerie/3km/"
colonne_noms_fichiers = "nom_fichier"
rescale_factor = 1/255
validation_fraction = 0.2
image_dimensions = (294, 201)
random_seed = int(np.round(np.random.uniform(0, 10000)))

train_data, val_data = train_test_split(qualite_eau, test_size=validation_fraction, random_state=random_seed)

In [95]:
train_data

Unnamed: 0,no_labo,no_projet,no_station,date,heure,alc,cat,cf,chl_aa,clo,...,AS (mg/l),PHE (µg/l),TAN (mg/l),AG (mg/l),U (mg/l),P-T-66V (µg/l),annee,mois,ID,nom_fichier
129528,Q115855-50,210,04610001,2019-09-16T00:00:00Z,1030,,41.0,16.0,0.808719,,...,,,,,,,2019,9,04610001_2019,04610001_2019.png
68145,Q121269-08,230,03030323,2020-08-03T00:00:00Z,1030,,,6000.0,0.239819,,...,,,,,,,2020,8,03030323_2020,03030323_2020.png
131260,Q111755-06,210,03090003,2019-06-03T00:00:00Z,1615,,,62.0,-0.151869,,...,,,,,,,2019,6,03090003_2019,03090003_2019.png
130789,Q107230-40,210,03030041,2018-09-10T00:00:00Z,1400,,,94.0,-0.387048,,...,,,,,,,2018,9,03030041_2018,03030041_2018.png
65778,Q119552-75,210,02330001,2020-06-01T00:00:00Z,845,,11.0,64.0,-0.281052,,...,,,,,,,2020,6,02330001_2020,02330001_2020.png
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67324,Q112988-12,210,02270002,2019-07-08T00:00:00Z,915,,9.0,42.0,-0.343159,,...,,,,,,,2019,7,02270002_2019,02270002_2019.png
125797,Q089490-08,210,01150047,2016-07-05T00:00:00Z,740,,,56.0,-0.339847,,...,,,,,,,2016,7,01150047_2016,01150047_2016.png
63993,Q091617-90,210,04670004,2016-09-13T00:00:00Z,1505,,,650.0,-0.238819,,...,,,,,,,2016,9,04670004_2016,04670004_2016.png
60056,Q088600-58,210,03020333,2016-06-06T00:00:00Z,1100,,,250.0,-0.102184,,...,,,,,,,2016,6,03020333_2016,03020333_2016.png


In [96]:
val_data

Unnamed: 0,no_labo,no_projet,no_station,date,heure,alc,cat,cf,chl_aa,clo,...,AS (mg/l),PHE (µg/l),TAN (mg/l),AG (mg/l),U (mg/l),P-T-66V (µg/l),annee,mois,ID,nom_fichier
67942,Q111680-75,210,03030031,2019-06-03T00:00:00Z,1022,,14.0,2200.0,1.065428,,...,,,,,,,2019,6,03030031_2019,03030031_2019.png
132584,Q106330-37,210,05050100,2018-08-14T00:00:00Z,1305,,,88.0,0.023687,,...,,,,,,,2018,8,05050100_2018,05050100_2018.png
94133,Q121292-04,210,02310056,2020-08-03T00:00:00Z,805,,8.7,490.0,-0.365517,,...,,,,,,,2020,8,02310056_2020,02310056_2020.png
94812,Q088789-01,210,05400006,2016-06-13T00:00:00Z,940,,,360.0,-0.242960,,...,,,,,,,2016,6,05400006_2016,05400006_2016.png
128062,Q106322-07,210,02240005,2018-08-14T00:00:00Z,800,,,50.0,-0.332394,,...,,,,,,,2018,8,02240005_2018,02240005_2018.png
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
66967,Q114646-44,210,02340038,2019-08-14T00:00:00Z,1455,,17.0,8.0,-0.135307,,...,,,,,,,2019,8,02340038_2019,02340038_2019.png
65665,Q097161-87,210,05260015,2017-07-11T00:00:00Z,1107,,,2.0,-0.163463,,...,,,,,,,2017,7,05260015_2017,05260015_2017.png
99143,Q091469-10,210,05120021,2016-09-12T00:00:00Z,940,,,170.0,-0.299270,,...,,,,,,,2016,9,05120021_2016,05120021_2016.png
67950,Q121180-07,210,03030031,2020-08-03T00:00:00Z,855,,23.0,80.0,-0.104668,,...,,,,,,,2020,8,03030031_2020,03030031_2020.png


In [109]:
datagen = ImageDataGenerator(rescale=rescale_factor)

training_generator = datagen.flow_from_dataframe(
    train_data,
    directory=dossier_images,
    x_col=colonne_noms_fichiers,
    y_col=[cible, cible3],
    target_size=image_dimensions,
    class_mode="other",
    seed=random_seed
)

validation_generator = datagen.flow_from_dataframe(
    val_data,
    directory=dossier_images,
    x_col=colonne_noms_fichiers,
    y_col=[cible, cible3],
    target_size=image_dimensions,
    class_mode="other",
    seed=random_seed
)

Found 7172 validated image filenames.
Found 1793 validated image filenames.


In [110]:
# Préparation et ajustement du modèle
model = Sequential([
    Conv2D(128, kernel_size=(3, 3), activation="relu", input_shape=(image_dimensions[0], image_dimensions[1], 3)),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(32, kernel_size=(3, 3), activation="relu"),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(32, kernel_size=(3, 3), activation="relu"),
    MaxPooling2D(pool_size=(4, 4)),
    Flatten(),
    Dense(64, activation="relu"),
    Dense(1)
])

def r2(y_true, y_pred):
    SS_res = K.sum(K.square(y_true - y_pred))
    SS_tot = K.sum(K.square(y_true - K.mean(y_true)))
    return 1 - SS_res / (SS_tot + K.epsilon())

model.summary()

Model: "sequential_18"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_56 (Conv2D)          (None, 292, 199, 128)     3584      
                                                                 
 max_pooling2d_56 (MaxPooli  (None, 146, 99, 128)      0         
 ng2D)                                                           
                                                                 
 conv2d_57 (Conv2D)          (None, 144, 97, 32)       36896     
                                                                 
 max_pooling2d_57 (MaxPooli  (None, 72, 48, 32)        0         
 ng2D)                                                           
                                                                 
 conv2d_58 (Conv2D)          (None, 70, 46, 32)        9248      
                                                                 
 max_pooling2d_58 (MaxPooli  (None, 17, 11, 32)      

In [111]:
model.compile(optimizer="adam", loss="mse", metrics=["mae", r2])

In [112]:
# Exploration des résultats
history = model.fit(
    x=training_generator,
    validation_data=validation_generator,
    epochs=5
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
history.history

In [None]:
fig, axs = pplt.subplots(ncols=2, nrows=1, refwidth=4)

axs[0].plot(history.history['loss'], label='training')
axs[0].plot(history.history['val_loss'], label='validation')
axs[0].format(xlabel='Epoch', ylabel='Loss (MSE)')

axs[1].plot(history.history['r2'], label='training')
axs[1].plot(history.history['val_r2'], label='validation')
axs[1].format(xlabel='Epoch', ylabel='$r^2$')

In [None]:
file_pi = 'qualite_eau_adam.pkl'

In [None]:
with open('/trainHistoryDict', 'wb') as file_pi:
    pickle.dump(history.history, file_pi)

In [None]:
with open('/trainHistoryDict', "rb") as file_pi:
    history = pickle.load(file_pi)