In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from keras_tuner import RandomSearch

2025-01-21 14:18:26.996538: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1737469107.017318   22772 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1737469107.023412   22772 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-21 14:18:27.046777: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# 0. Data

In [2]:
file_path="../data/ozone_complet.csv"
df = pd.read_csv(file_path, sep=";", decimal=',')

In [None]:
df

Unnamed: 0,date,maxO3,T6,T9,T12,T15,T18,Ne6,Ne9,Ne12,...,Vdir9,Vvit9,Vdir12,Vvit12,Vdir15,Vvit15,Vdir18,Vvit18,Vx,maxO3v
0,19950401,47.6,10.1,11.6,13.3,13.6,12.2,8.0,8.0,8.0,...,290.0,4.0,300.0,4.0,340.0,4.0,20.0,4.0,-3.4641,62.2
1,19950402,56.2,9.5,9.4,13.8,17.4,16.3,8.0,8.0,7.0,...,160.0,2.0,180.0,3.0,110.0,1.0,350.0,2.0,0.0000,47.6
2,19950403,61.8,3.6,8.0,16.8,21.5,20.2,4.0,5.0,2.0,...,20.0,2.0,340.0,1.0,170.0,2.0,170.0,3.0,-0.3420,56.2
3,19950404,50.8,9.5,10.5,11.4,12.2,11.4,8.0,7.0,7.0,...,10.0,4.0,350.0,3.0,350.0,3.0,350.0,4.0,-0.5209,61.8
4,19950405,59.8,9.8,10.8,13.8,14.3,13.3,8.0,7.0,8.0,...,340.0,2.0,280.0,1.0,320.0,3.0,350.0,4.0,-0.9848,50.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1459,20020926,76.0,7.0,13.7,17.0,17.9,15.3,4.0,7.0,8.0,...,290.0,4.0,330.0,5.0,330.0,6.0,330.0,3.0,-2.5000,79.0
1460,20020927,79.0,10.2,11.5,18.3,20.0,17.1,9.0,9.0,2.0,...,70.0,2.0,120.0,1.0,90.0,3.0,100.0,2.0,0.8660,76.0
1461,20020928,91.0,6.2,14.1,19.8,21.1,18.1,0.0,0.0,1.0,...,120.0,3.0,120.0,4.0,120.0,4.0,100.0,2.0,3.4641,79.0
1462,20020929,89.0,8.1,14.8,20.4,22.0,18.4,4.0,5.0,5.0,...,120.0,4.0,130.0,5.0,130.0,4.0,140.0,1.0,3.8302,91.0


In [4]:
df.isna().sum()

date       0
maxO3     73
T6         1
T9         1
T12        1
T15        1
T18        1
Ne6        2
Ne9        2
Ne12       2
Ne15       2
Ne18       5
Vdir6      1
Vvit6      1
Vdir9      1
Vvit9      1
Vdir12     1
Vvit12     1
Vdir15     1
Vvit15     1
Vdir18     1
Vvit18     1
Vx         1
maxO3v    73
dtype: int64

In [5]:
df.dropna(inplace=True)

In [6]:
# Exemple : Convertir toutes les colonnes en types numériques
df = df.apply(pd.to_numeric, errors='coerce')

# Vérification des types après conversion
print(df.dtypes)
print(df.shape)

date        int64
maxO3     float64
T6        float64
T9        float64
T12       float64
T15       float64
T18       float64
Ne6       float64
Ne9       float64
Ne12      float64
Ne15      float64
Ne18      float64
Vdir6     float64
Vvit6     float64
Vdir9     float64
Vvit9     float64
Vdir12    float64
Vvit12    float64
Vdir15    float64
Vvit15    float64
Vdir18    float64
Vvit18    float64
Vx        float64
maxO3v    float64
dtype: object
(1366, 24)


# 1. Pre-processing

In [None]:
# Données
X = df.drop("maxO3v", axis=1)  # Features
y = df["maxO3v"]  # Cible

print(f"X shape: {X.shape}")
print(f"y shape: {y.shape}")

# Division en jeu d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# Normalisation des données
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X shape: (1366, 23)
y shape: (1366,)


# 2. Modeling

In [8]:
# Définir le modèle pour Keras Tuner
def build_model(hp):
    model = tf.keras.Sequential()
    # Nombre de couches cachées à optimiser
    for i in range(hp.Int('num_layers', min_value=1, max_value=5)):
        model.add(tf.keras.layers.Dense(
            units=hp.Int(f'units_{i}', min_value=8, max_value=128, step=8),
            activation=hp.Choice('activation', values=['relu', 'tanh', 'sigmoid'])
        ))
    # Couche de sortie
    model.add(tf.keras.layers.Dense(1, activation='linear'))  # Régression
    # Compilation
    model.compile(
        optimizer=tf.keras.optimizers.Adam(
            learning_rate=hp.Float('lr', min_value=1e-4, max_value=1e-2, sampling='log')),
        loss='mse',  # Erreur quadratique moyenne pour la régression
        metrics=['mae']  # Erreur absolue moyenne
    )
    return model

# Configuration du tuner
tuner = RandomSearch(
    build_model,
    objective='val_mae',  # Minimiser l'erreur absolue moyenne
    max_trials=20,  # Nombre total d'essais
    executions_per_trial=1,  # Exécuter une seule fois chaque configuration
    directory='keras_tuner',
    project_name='optimal_layers_neurons'
)

# Recherche d'hyperparamètres
tuner.search(
    X_train_scaled, y_train,
    validation_split=0.1,  # 10% des données d'entraînement utilisées pour la validation
    epochs=50,
    batch_size=32,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    ],
    verbose=1
)

# Résultats du tuner
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

# Afficher les meilleurs hyperparamètres trouvés
print("Meilleurs hyperparamètres trouvés :")
print(f"Nombre de couches : {best_hps.get('num_layers')}")
for i in range(best_hps.get('num_layers')):
    print(f"  - Neurones dans la couche {i + 1} : {best_hps.get(f'units_{i}')}")
print(f"Activation : {best_hps.get('activation')}")
print(f"Taux d'apprentissage : {best_hps.get('lr')}")

I0000 00:00:1737469109.303044   22772 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 719 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:86:00.0, compute capability: 7.5


Reloading Tuner from keras_tuner/optimal_layers_neurons/tuner0.json

Search: Running Trial #3

Value             |Best Value So Far |Hyperparameter
4                 |2                 |num_layers
112               |24                |units_0
tanh              |tanh              |activation
0.0024629         |0.00083746        |lr
24                |8                 |units_1

Epoch 1/50


I0000 00:00:1737469111.354452   23072 service.cc:148] XLA service 0x7fbb14016090 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1737469111.354546   23072 service.cc:156]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
2025-01-21 14:18:31.393301: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
E0000 00:00:1737469111.533036   23072 cuda_dnn.cc:522] Loaded runtime CuDNN library: 9.1.0 but source was compiled with: 9.3.0.  CuDNN library needs to have matching major version and equal or higher minor version. If using a binary install, upgrade your CuDNN library.  If building from sources, make sure the library loaded at runtime is compatible with the version specified during compile configuration.
E0000 00:00:1737469111.577632   23072 cuda_dnn.cc:522] Loaded runtime CuDNN library: 9.1.0 but source was compiled with: 9

RuntimeError: Number of consecutive failures exceeded the limit of 3.
Traceback (most recent call last):
  File "/opt/conda/lib/python3.12/site-packages/keras_tuner/src/engine/base_tuner.py", line 274, in _try_run_and_update_trial
    self._run_and_update_trial(trial, *fit_args, **fit_kwargs)
  File "/opt/conda/lib/python3.12/site-packages/keras_tuner/src/engine/base_tuner.py", line 239, in _run_and_update_trial
    results = self.run_trial(trial, *fit_args, **fit_kwargs)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/conda/lib/python3.12/site-packages/keras_tuner/src/engine/tuner.py", line 314, in run_trial
    obj_value = self._build_and_fit_model(trial, *args, **copied_kwargs)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/conda/lib/python3.12/site-packages/keras_tuner/src/engine/tuner.py", line 233, in _build_and_fit_model
    results = self.hypermodel.fit(hp, model, *args, **kwargs)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/conda/lib/python3.12/site-packages/keras_tuner/src/engine/hypermodel.py", line 149, in fit
    return model.fit(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/conda/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py", line 122, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "/opt/conda/lib/python3.12/site-packages/tensorflow/python/eager/execute.py", line 59, in quick_execute
    except TypeError as e:
tensorflow.python.framework.errors_impl.FailedPreconditionError: Graph execution error:

Detected at node StatefulPartitionedCall defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "/opt/conda/lib/python3.12/site-packages/ipykernel_launcher.py", line 18, in <module>

  File "/opt/conda/lib/python3.12/site-packages/traitlets/config/application.py", line 1075, in launch_instance

  File "/opt/conda/lib/python3.12/site-packages/ipykernel/kernelapp.py", line 739, in start

  File "/opt/conda/lib/python3.12/site-packages/tornado/platform/asyncio.py", line 205, in start

  File "/opt/conda/lib/python3.12/asyncio/base_events.py", line 641, in run_forever

  File "/opt/conda/lib/python3.12/asyncio/base_events.py", line 1986, in _run_once

  File "/opt/conda/lib/python3.12/asyncio/events.py", line 88, in _run

  File "/opt/conda/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 545, in dispatch_queue

  File "/opt/conda/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 534, in process_one

  File "/opt/conda/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 437, in dispatch_shell

  File "/opt/conda/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 362, in execute_request

  File "/opt/conda/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 778, in execute_request

  File "/opt/conda/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 449, in do_execute

  File "/opt/conda/lib/python3.12/site-packages/ipykernel/zmqshell.py", line 549, in run_cell

  File "/opt/conda/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3075, in run_cell

  File "/opt/conda/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3130, in _run_cell

  File "/opt/conda/lib/python3.12/site-packages/IPython/core/async_helpers.py", line 128, in _pseudo_sync_runner

  File "/opt/conda/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3334, in run_cell_async

  File "/opt/conda/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3517, in run_ast_nodes

  File "/opt/conda/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3577, in run_code

  File "/tmp/ipykernel_22772/3022856722.py", line 32, in <module>

  File "/opt/conda/lib/python3.12/site-packages/keras_tuner/src/engine/base_tuner.py", line 234, in search

  File "/opt/conda/lib/python3.12/site-packages/keras_tuner/src/engine/base_tuner.py", line 274, in _try_run_and_update_trial

  File "/opt/conda/lib/python3.12/site-packages/keras_tuner/src/engine/base_tuner.py", line 239, in _run_and_update_trial

  File "/opt/conda/lib/python3.12/site-packages/keras_tuner/src/engine/tuner.py", line 314, in run_trial

  File "/opt/conda/lib/python3.12/site-packages/keras_tuner/src/engine/tuner.py", line 233, in _build_and_fit_model

  File "/opt/conda/lib/python3.12/site-packages/keras_tuner/src/engine/hypermodel.py", line 149, in fit

  File "/opt/conda/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/opt/conda/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 371, in fit

  File "/opt/conda/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 219, in function

  File "/opt/conda/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 132, in multi_step_on_iterator

DNN library initialization failed. Look at the errors above for more details.
	 [[{{node StatefulPartitionedCall}}]] [Op:__inference_multi_step_on_iterator_2178]
