In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/dataset-pre-traitement-sift-bovw-pca/y_labels.npy
/kaggle/input/dataset-pre-traitement-sift-bovw-pca/image_names.pkl
/kaggle/input/dataset-pre-traitement-sift-bovw-pca/label_names.pkl
/kaggle/input/dataset-pre-traitement-sift-bovw-pca/y.npy
/kaggle/input/dataset-pre-traitement-sift-bovw-pca/X_pca.npy


In [2]:
import numpy as np
import pickle
import time
import warnings
warnings.filterwarnings("ignore")

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.naive_bayes import MultinomialNB
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import (
    accuracy_score, f1_score, precision_score, recall_score, hamming_loss
)

In [3]:
# -------------------------------
# 1️ Chargement des données
# -------------------------------
X = np.load("/kaggle/input/dataset-pre-traitement-sift-bovw-pca/X_pca.npy")
y = np.load("/kaggle/input/dataset-pre-traitement-sift-bovw-pca/y.npy")

with open("/kaggle/input/dataset-pre-traitement-sift-bovw-pca/label_names.pkl", "rb") as f:
    label_names = pickle.load(f)

print(X.shape, y.shape)

(8091, 100) (8091, 495)


In [4]:
# -------------------------------
# 2️ MultinomialNB → valeurs ≥ 0
# -------------------------------
X = np.maximum(X, 0)

# -------------------------------
# 3️ Division Train / Test
# -------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print(f" X_train : {X_train.shape} | X_test : {X_test.shape}")
print(f" y_train : {y_train.shape} | y_test : {y_test.shape}")

 X_train : (6472, 100) | X_test : (1619, 100)
 y_train : (6472, 495) | y_test : (1619, 495)


In [5]:
# -------------------------------
# 4️ Configuration MultinomialNB
# -------------------------------
nb_model = OneVsRestClassifier(
    MultinomialNB()
)

param_grid = {
    "estimator__alpha": [
        1e-4, 1e-3, 1e-2, 0.1, 0.5, 1.0
    ]
}

grid_search = GridSearchCV(
    estimator=nb_model,
    param_grid=param_grid,
    scoring="f1_macro",   
    cv=8,
    verbose=2,
    n_jobs=1,
    refit=True
)

In [6]:
# -------------------------------
# 5️ Entraînement
# -------------------------------
print("\n DÉMARRAGE DE L'ENTRAÎNEMENT MULTINOMIAL NB")
start = time.time()

grid_search.fit(X_train, y_train)
best_nb = grid_search.best_estimator_

end = time.time()
print(f" Entraînement terminé en {end - start:.2f} secondes")

print(" Meilleur alpha :", grid_search.best_params_)


 DÉMARRAGE DE L'ENTRAÎNEMENT MULTINOMIAL NB
Fitting 8 folds for each of 6 candidates, totalling 48 fits
[CV] END ............................estimator__alpha=0.0001; total time=   4.5s
[CV] END ............................estimator__alpha=0.0001; total time=   2.2s
[CV] END ............................estimator__alpha=0.0001; total time=   2.3s
[CV] END ............................estimator__alpha=0.0001; total time=   2.3s
[CV] END ............................estimator__alpha=0.0001; total time=   2.3s
[CV] END ............................estimator__alpha=0.0001; total time=   2.2s
[CV] END ............................estimator__alpha=0.0001; total time=   2.2s
[CV] END ............................estimator__alpha=0.0001; total time=   2.5s
[CV] END .............................estimator__alpha=0.001; total time=   2.2s
[CV] END .............................estimator__alpha=0.001; total time=   2.2s
[CV] END .............................estimator__alpha=0.001; total time=   2.2s
[CV]

In [7]:
# -------------------------------
# 6️ Évaluation
# -------------------------------
y_pred = best_nb.predict(X_test)

metrics = {
    "Accuracy": accuracy_score(y_test, y_pred),
    "Precision_micro": precision_score(y_test, y_pred, average="micro", zero_division=0),
    "Recall_micro": recall_score(y_test, y_pred, average="micro", zero_division=0),
    "F1_macro": f1_score(y_test, y_pred, average="macro", zero_division=0),
    "F1_micro": f1_score(y_test, y_pred, average="micro", zero_division=0),
    "Hamming_Loss": hamming_loss(y_test, y_pred)
}

print("\n MÉTRIQUES DU MODÈLE Naive Bayes - MULTINOMIAL NB :")
for k, v in metrics.items():
    print(f"{k:20s} : {v:.4f}")


 MÉTRIQUES DU MODÈLE Naive Bayes - MULTINOMIAL NB :
Accuracy             : 0.0000
Precision_micro      : 0.0429
Recall_micro         : 0.2064
F1_macro             : 0.0273
F1_micro             : 0.0710
Hamming_Loss         : 0.0475
