In [1]:
import pandas as pd
import numpy as np
import gc
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten, Embedding, Concatenate, Dropout, BatchNormalization, Add, Activation
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.preprocessing import LabelEncoder, StandardScaler


2025-12-11 22:42:28.340782: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-12-11 22:42:28.443622: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-12-11 22:42:32.786030: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.


In [2]:

# Configuration
tf.random.set_seed(42)
np.random.seed(42)

# ==============================================================================
# 1. CHARGEMENT (Comme avant)
# ==============================================================================
print("Chargement des données...")
data = pd.read_csv('data/processed_data.csv')
data['date'] = pd.to_datetime(data['date'])

# Log-transform cible
data.loc[data['is_train'] == 1, 'sales'] = np.log1p(data.loc[data['is_train'] == 1, 'sales'])

if 'transactions' in data.columns:
    data.drop(columns=['transactions'], inplace=True)

ALL_COLS = [c for c in data.columns if c not in ['id', 'sales', 'is_train', 'date']]
CAT_COLS = ['store_nbr', 'family', 'city', 'state', 'type', 'cluster', 'month', 'dayofweek']
NUM_COLS = [c for c in ALL_COLS if c not in CAT_COLS]


Chargement des données...


In [None]:

# ==============================================================================
# 2. PREPROCESSING
# ==============================================================================
print("Preprocessing...")
label_encoders = {}
for col in CAT_COLS:
    le = LabelEncoder()
    data[col] = data[col].astype(str)
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

scaler = StandardScaler()
data[NUM_COLS] = scaler.fit_transform(data[NUM_COLS])
data[NUM_COLS] = data[NUM_COLS].fillna(0)

# Séparation
train_df = data[data['is_train'] == 1].copy()
test_df = data[data['is_train'] == 0].copy()

last_date = train_df['date'].max()
val_start = last_date - pd.DateOffset(days=15)
train_mask = train_df['date'] < val_start
val_mask = train_df['date'] >= val_start

def get_keras_data(df, num_cols, cat_cols):
    X_num = df[num_cols].values.astype('float32')
    X_cat = [df[c].values.astype('int32') for c in cat_cols]
    return X_num, X_cat

X_train_num, X_train_cat = get_keras_data(train_df[train_mask], NUM_COLS, CAT_COLS)
y_train = train_df.loc[train_mask, 'sales'].values.astype('float32')

X_val_num, X_val_cat = get_keras_data(train_df[val_mask], NUM_COLS, CAT_COLS)
y_val = train_df.loc[val_mask, 'sales'].values.astype('float32')

X_test_num, X_test_cat = get_keras_data(test_df, NUM_COLS, CAT_COLS)
test_ids = test_df['id'].values

del data, train_df, test_df
gc.collect()

# ==============================================================================
# 3. ARCHITECTURE RESNET TABULAIRE (ROBUSTE)
# ==============================================================================
print("Construction du modèle ResNet Tabulaire...")

def dense_residual_block(x, units, dropout_rate):
    """
    Bloc Résiduel : Input -> [Dense->BN->Act->Dropout->Dense->BN->Act->Dropout] + Input
    """
    shortcut = x
    
    # Si la dimension change, on projette le shortcut pour qu'il matche
    if x.shape[-1] != units:
        shortcut = Dense(units)(shortcut)
    
    # Couche 1
    x = Dense(units)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Dropout(dropout_rate)(x)
    
    # Couche 2
    x = Dense(units)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Dropout(dropout_rate)(x)
    
    # Addition (Skip Connection) : C'est ça qui rend le modèle "Deep" et robuste
    x = Add()([x, shortcut])
    return x

def build_resnet_model(num_features, cat_cols_info, label_encoders):
    inputs = []
    embeddings = []
    
    # --- Embeddings ---
    for col in cat_cols_info:
        vocab_size = len(label_encoders[col].classes_) + 1
        embed_dim = min(50, (vocab_size + 1) // 2)
        inp = Input(shape=(1,), name=f'input_{col}')
        inputs.append(inp)
        emb = Embedding(vocab_size, embed_dim)(inp)
        emb = Flatten()(emb)
        embeddings.append(emb)
    
    # --- Numériques ---
    input_num = Input(shape=(num_features,), name='input_numeric')
    inputs.append(input_num)
    
    # Fusion
    x = Concatenate()(embeddings + [input_num])
    
    # Projection initiale
    x = Dense(256)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    # --- Blocs Résiduels ---
    # On empile des blocs. Grâce au "Add", le gradient circule mieux.
    x = dense_residual_block(x, units=256, dropout_rate=0.2)
    x = dense_residual_block(x, units=128, dropout_rate=0.2)
    x = dense_residual_block(x, units=64, dropout_rate=0.1)
    
    # Output
    output = Dense(1, activation='linear', name='output')(x)
    
    model = Model(inputs=inputs, outputs=output)
    
    # Learning Rate un peu plus faible pour la stabilité
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005) 
    model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])
    
    return model

model = build_resnet_model(len(NUM_COLS), CAT_COLS, label_encoders)
# model.summary()

# ==============================================================================
# 4. ENTRAÎNEMENT
# ==============================================================================
print("\nDébut de l'entraînement ResNet...")

callbacks = [
    EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True, verbose=1),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6, verbose=1)
]

train_inputs = X_train_cat + [X_train_num]
val_inputs = X_val_cat + [X_val_num]

history = model.fit(
    train_inputs, y_train,
    validation_data=(val_inputs, y_val),
    epochs=30,          # On peut aller plus loin car c'est plus stable
    batch_size=2048,
    callbacks=callbacks,
    verbose=1
)

# ==============================================================================
# 5. SOUMISSION
# ==============================================================================
test_inputs = X_test_cat + [X_test_num]
preds_log = model.predict(test_inputs, batch_size=2048).flatten()
preds = np.expm1(preds_log)
preds[preds < 0] = 0

submission = pd.DataFrame({'id': test_ids, 'sales': preds})
submission.to_csv('submission_resnet.csv', index=False)
print("Terminé ! Fichier 'submission_resnet.csv' généré.")

Preprocessing...
Construction du modèle ResNet Tabulaire...

Début de l'entraînement ResNet...


2025-12-11 22:43:03.438693: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


Epoch 1/30
[1m1452/1452[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 56ms/step - loss: 0.8950 - mae: 0.6252 - val_loss: 0.4191 - val_mae: 0.4821 - learning_rate: 5.0000e-04
Epoch 2/30
[1m1452/1452[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 57ms/step - loss: 0.3433 - mae: 0.4021 - val_loss: 0.3873 - val_mae: 0.4475 - learning_rate: 5.0000e-04
Epoch 3/30
[1m1452/1452[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 58ms/step - loss: 0.2643 - mae: 0.3503 - val_loss: 0.4425 - val_mae: 0.4271 - learning_rate: 5.0000e-04
Epoch 4/30
[1m1452/1452[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 55ms/step - loss: 0.2269 - mae: 0.3224 - val_loss: 0.3762 - val_mae: 0.3846 - learning_rate: 5.0000e-04
Epoch 5/30
[1m1452/1452[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 57ms/step - loss: 0.2058 - mae: 0.3047 - val_loss: 0.4257 - val_mae: 0.3935 - learning_rate: 5.0000e-04
Epoch 6/30
[1m1452/1452[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 