<a href="https://www.kaggle.com/code/lukalafaye/compet-vfinal?scriptVersionId=201555437" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import StandardScaler
from keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler, ReduceLROnPlateau
from tensorflow.keras.metrics import AUC
from tensorflow.keras import layers, models
from sklearn.metrics import f1_score

In [None]:
# Read the training features from the CSV file
train_features = pd.read_csv('/kaggle/input/compet/train_features.csv', header=None)

# Read the training labels from the CSV file
train_labels = pd.read_csv('/kaggle/input/compet/train_labels.csv', header=None)

# Read the test features from the CSV file
test_features = pd.read_csv('/kaggle/input/compet/test_features.csv', header=None)

In [None]:
nombre_de_labels_uniques = train_labels.nunique()
print("Nombre de labels différents :", nombre_de_labels_uniques)

In [None]:
encoder = OneHotEncoder()
train_labels_encoded = encoder.fit_transform(train_labels.values.reshape(-1, 1)).toarray()

In [None]:
def create_cnn_model(input_shape, num_classes):
    model = models.Sequential()
    
    # Convolutional layers
    model.add(layers.Conv1D(64, kernel_size=3, activation='relu', input_shape=input_shape))
    model.add(layers.MaxPooling1D(pool_size=2))
    
    model.add(layers.Conv1D(128, kernel_size=3, activation='relu'))
    model.add(layers.MaxPooling1D(pool_size=2))
    
    model.add(layers.Conv1D(256, kernel_size=3, activation='relu'))
    model.add(layers.MaxPooling1D(pool_size=2))
    
    # Flatten layer
    model.add(layers.Flatten())
    
    # Dense layers
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.Dropout(0.5))
    
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dropout(0.5))
    
    # Output layer
    model.add(layers.Dense(num_classes, activation='softmax'))
    
    return model

input_shape = (1024, 1)
num_classes = 100

In [None]:
# Assuming X and y are your feature and label arrays
#X_train, X_test, y_train, y_test = train_test_split(train_features, train_labels, test_size=0.2, random_state=42)

X_train, X_val, y_train, y_val = train_test_split(train_features, train_labels, test_size=0.2, random_state=42)

# Further split the test data into validation and test sets
#X_val, X_final_test, y_val, y_final_test = train_test_split(X_test, y_test, test_size=0, random_state=42)

# Normalization des données
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_final_test_scaled = scaler.transform(X_final_test)

# Compute class weights
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weights_dict = dict(enumerate(class_weights))

# One-hot encode labels
num_classes = 100  # Replace with the actual number of classes
y_train_encoded = to_categorical(y_train, num_classes=num_classes)
y_val_encoded = to_categorical(y_val, num_classes=num_classes)
y_final_test_encoded = to_categorical(y_final_test, num_classes=num_classes)

# Data Normalization for validation and final test sets
X_val_scaled = scaler.transform(X_val)
X_final_test_scaled = scaler.transform(X_final_test)

# Early Stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Reduce Learning Rate on Plateau callback
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=1e-7)

# Learning Rate Schedule
def lr_schedule(epoch):
    lr = 1e-4
    if epoch > 5:
        lr *= 0.5
    return lr

optimizer = Adam(learning_rate=lr_schedule(0))

# Create the CNN model
cnn_model = create_cnn_model(input_shape, num_classes)

# Compile the model with F1 Score as a metric
cnn_model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=[AUC(curve='PR', name='f1_score')])

# Fit the model with class weights and early stopping
history = cnn_model.fit(
    X_train_scaled, y_train_encoded,
    epochs=20,
    validation_data=(X_val_scaled, y_val_encoded),
    batch_size=64,
    shuffle=True,
    class_weight=class_weights_dict,
    callbacks=[early_stopping, LearningRateScheduler(lr_schedule), reduce_lr]
)

In [None]:
"""
# Evaluate the model on the final test set
y_pred_final_test = cnn_model.predict(X_final_test_scaled)
y_pred_final_test_classes = np.argmax(y_pred_final_test, axis=1)
y_final_test_classes = np.argmax(y_final_test_encoded, axis=1)

# Calculate and print the F1 score
final_test_f1 = f1_score(y_final_test_classes, y_pred_final_test_classes, average='weighted')
print(f'Final Test F1 Score: {final_test_f1}')
"""

In [None]:
test_features_scaled = scaler.transform(test_features)

In [None]:
test_predictions = cnn_model.predict(test_features_scaled)
argmax_predictions = np.argmax(test_predictions, axis=1)

In [None]:
print(argmax_predictions)

In [None]:
submission = pd.DataFrame(data={
    'ID': range(0, len(argmax_predictions)),
    'Prediction': argmax_predictions
})

In [None]:
submission.to_csv('sub_on_espere_80p.csv', index=False, header=True)