In [None]:
!pip install iterative-stratification

import numpy as np
import pickle
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import precision_score, f1_score, accuracy_score, recall_score, hamming_loss

# 1. Chargement des données
X = np.load('/kaggle/input/dataset-pre-traitement-sift-bovw-pca/X_pca.npy')
y = np.load('/kaggle/input/dataset-pre-traitement-sift-bovw-pca/y.npy')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 2. Définition de la stratégie de validation (k=8)
mskf = MultilabelStratifiedKFold(n_splits=8, shuffle=True, random_state=42)

# 3. Configuration du modèle selon One-vs-Rest
base_estimator = DecisionTreeClassifier(random_state=42)
ovr_model = OneVsRestClassifier(base_estimator)

# 4. Définition des d'hyperparamètres pour le Grid Search
param_grid = {
    'estimator__max_depth': [10, 20, 30],
    'estimator__min_samples_split': [2, 10],
    'estimator__criterion': ['gini', 'entropy']
}

# 5. Grid Search
print("Début de l'optimisation par Grid Search (k=8)...")
grid_search = GridSearchCV(
    ovr_model, 
    param_grid, 
    cv=mskf, 
    scoring='f1_samples',
    n_jobs=-1
)

grid_search.fit(X_train, y_train)

# 6. Modèle final optimisé
best_model = grid_search.best_estimator_

# 7. Évaluation
y_pred = best_model.predict(X_test)
print("-" * 30)
print(f"Accuracy (Subset) : {accuracy_score(y_test, y_pred):.4f}")
print(f"Précision : {precision_score(y_test, y_pred, average ='samples'):.4f}")
print(f"Recall  : {recall_score(y_test, y_pred, average = 'samples'):.4f}")
print(f"F1-Score : {f1_score(y_test, y_pred, average = 'samples'):.4f}")
print(f"Hamming Loss      : {hamming_loss(y_test, y_pred):.4f}")