# Learn++.NC - Test sur Nouveau Dataset

**Objectif:** Charger un mod√®le entra√Æn√© (.pkl) et l'√©valuer sur un dataset totalement diff√©rent.

Compatible avec:
- `.pkl` SV Transfer (dict avec clf + scaler)
- `.pkl` LearnPPNC (objet classique)

---

In [None]:
!pip install optuna -q

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from typing import Dict, List, Tuple
import pickle
import warnings
from google.colab import files

warnings.filterwarnings('ignore')
plt.style.use('seaborn-v0_8-whitegrid')
print("Imports OK")

---
# 1. Configuration
---

In [None]:
FEATURE_COLS = [
    'Electrical speed [rad/s]',
    'I_M_a', 'I_M_b', 'I_M_c',
    'I_P_a', 'I_P_b', 'I_P_c',
    'I_B_a', 'I_B_b', 'I_B_c',
    'V_M_a', 'V_M_b', 'V_M_c',
    'V_P_a', 'V_P_b', 'V_P_c',
    'V_B_a', 'V_B_b', 'V_B_c'
]
LABEL_COL = 'Class label'

---
# 2. Charger le Mod√®le
---

In [None]:
print("üìÅ CHARGER LE MOD√àLE (.pkl):")
uploaded = files.upload()
pkl_name = list(uploaded.keys())[0]

with open(pkl_name, 'rb') as f:
    saved = pickle.load(f)

# D√©tecter le format
if isinstance(saved, dict) and 'clf' in saved:
    # Format SV Transfer
    clf = saved['clf']
    scaler = saved['scaler']
    all_classes = np.array(saved['classes'])
    MODEL_TYPE = 'sv_transfer'
    print(f"\nMod√®le charg√©! (SV Transfer)")
    print(f"  M√©thode: {saved.get('method', 'SV Transfer')}")
    print(f"  Classes connues: {list(all_classes)}")
    print(f"  Accuracy (train): {saved['accuracy_train']:.4f}")
    print(f"  Hyperparam√®tres: C={saved['C']:.4f}, gamma={saved['gamma']:.6f}")
    print(f"  Support Vectors: {saved['n_sv']}")
else:
    # Format LearnPPNC classique
    model = saved
    clf = None
    scaler = model.scaler
    all_classes = np.array(sorted(model.all_classes))
    MODEL_TYPE = 'learnpp'
    print(f"\nMod√®le charg√©! (LearnPPNC)")
    print(f"  Nombre d'experts: {len(model.experts)}")
    print(f"  Classes connues: {list(all_classes)}")
    print(f"  Hyperparam√®tres: C={model.C:.4f}, gamma={model.gamma:.6f}")
    for i, e in enumerate(model.experts):
        print(f"  Expert #{i+1}: {e['name']} - Accuracy: {e['accuracy']:.4f} - SVs: {e['n_sv']}")

---
# 3. Charger le Nouveau Dataset
---

In [None]:
print("üìÅ CHARGER LE NOUVEAU DATASET (.csv):")
uploaded = files.upload()
test_file = list(uploaded.keys())[0]

df_test = pd.read_csv(test_file)
X_test = df_test[FEATURE_COLS].values
y_test = df_test[LABEL_COL].values

print(f"\nCharg√©: {len(y_test)} √©chantillons, {len(np.unique(y_test))} classes")
print(f"Classes: {sorted(np.unique(y_test))}")
print(f"\nDistribution:")
for c in sorted(np.unique(y_test)):
    print(f"  Classe {c:2d} : {np.sum(y_test == c):4d} √©chantillons")

# V√©rifier compatibilit√©
test_classes = sorted(np.unique(y_test))
unknown = set(test_classes) - set(all_classes)
if unknown:
    print(f"\n‚ö†Ô∏è  Classes dans le test INCONNUES du mod√®le: {sorted(unknown)}")
else:
    print(f"\n‚úì Toutes les classes du test sont connues du mod√®le")

---
# 4. √âvaluation
---

In [None]:
X_test_sc = scaler.transform(X_test)

if MODEL_TYPE == 'sv_transfer':
    y_pred = clf.predict(X_test_sc)
    y_proba = clf.predict_proba(X_test_sc)
    confidence = np.max(y_proba, axis=1)
else:
    res = model.evaluate(X_test, y_test)
    y_pred = res['y_pred']
    confidence = res['confidence']

acc = accuracy_score(y_test, y_pred)

print("="*60)
print(f"  ACCURACY: {acc:.4f}")
print("="*60)
print(f"\nClassification Report:")
print(classification_report(y_test, y_pred, zero_division=0))

In [None]:
# Matrice de confusion
fig, ax = plt.subplots(figsize=(12, 10))

cm = confusion_matrix(y_test, y_pred, labels=all_classes)
cm_norm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
cm_norm = np.nan_to_num(cm_norm)

sns.heatmap(cm_norm, annot=True, fmt='.2f', cmap='Blues',
            xticklabels=all_classes.astype(int),
            yticklabels=all_classes.astype(int),
            ax=ax, vmin=0, vmax=1,
            cbar_kws={'label': 'Proportion'})

ax.set_xlabel('Classe Pr√©dite', fontsize=12)
ax.set_ylabel('Classe R√©elle', fontsize=12)
ax.set_title(f"Test sur Nouveau Dataset\nAccuracy: {acc:.2%}", fontsize=14, fontweight='bold')

plt.tight_layout()
plt.savefig('test_confusion_matrix.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
# Accuracy par classe
class_acc = []
class_count = []
for c in all_classes:
    mask = y_test == c
    if np.sum(mask) > 0:
        class_acc.append(np.mean(y_pred[mask] == c))
        class_count.append(np.sum(mask))
    else:
        class_acc.append(0)
        class_count.append(0)

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

ax = axes[0]
colors = ['green' if a > 0.9 else 'orange' if a > 0.7 else 'red' for a in class_acc]
bars = ax.bar(range(len(all_classes)), class_acc, color=colors, edgecolor='black')
ax.axhline(acc, color='blue', ls='--', lw=2, label=f'Accuracy globale: {acc:.4f}')
ax.set_xticks(range(len(all_classes)))
ax.set_xticklabels([int(c) for c in all_classes])
ax.set_xlabel('Classe')
ax.set_ylabel('Accuracy')
ax.set_title('Accuracy par Classe')
ax.set_ylim([0, 1.05])
ax.legend()

ax = axes[1]
ax.bar(range(len(all_classes)), class_count, color='steelblue', edgecolor='black')
ax.set_xticks(range(len(all_classes)))
ax.set_xticklabels([int(c) for c in all_classes])
ax.set_xlabel('Classe')
ax.set_ylabel("Nombre d'√©chantillons")
ax.set_title('Distribution des Classes (Test)')

plt.tight_layout()
plt.savefig('test_class_performance.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
# Distribution de confiance
correct = y_test == y_pred

fig, ax = plt.subplots(figsize=(8, 4))
ax.hist(confidence[correct], bins=20, alpha=0.7, label=f'Correct (n={sum(correct)})', color='green', edgecolor='black')
ax.hist(confidence[~correct], bins=20, alpha=0.7, label=f'Incorrect (n={sum(~correct)})', color='red', edgecolor='black')
ax.axvline(np.mean(confidence), color='blue', ls='--', label=f'Moyenne: {np.mean(confidence):.3f}')
ax.set_xlabel('Score de Confiance')
ax.set_ylabel('Nombre de pr√©dictions')
ax.set_title('Distribution des Scores de Confiance')
ax.legend()
plt.tight_layout()
plt.savefig('test_confidence.png', dpi=150, bbox_inches='tight')
plt.show()

print(f"Confiance moyenne (correct): {np.mean(confidence[correct]):.4f}")
print(f"Confiance moyenne (incorrect): {np.mean(confidence[~correct]):.4f}" if sum(~correct) > 0 else "Tout correct!")

---
# 5. D√©tail des Pr√©dictions
---

In [None]:
# Tableau d√©taill√© des 20 premiers exemples
n_show = min(20, len(y_test))
X_show_sc = X_test_sc[:n_show]

if MODEL_TYPE == 'sv_transfer':
    preds = clf.predict(X_show_sc)
    probas = clf.predict_proba(X_show_sc)
    confs = np.max(probas, axis=1)
    detail = pd.DataFrame({
        'y_true': y_test[:n_show].astype(int),
        'y_pred': preds.astype(int),
        'confidence': np.round(confs, 4),
        'correct': preds == y_test[:n_show]
    })
else:
    detail_df = model.get_expert_predictions(X_test[:n_show])
    detail_df['y_true'] = y_test[:n_show]
    detail_df['y_final'] = y_pred[:n_show]
    detail_df['confidence'] = confidence[:n_show]
    detail = detail_df

print(f"D√©tail des pr√©dictions ({n_show} premiers exemples):")
detail

---
# 6. T√©l√©charger les R√©sultats
---

In [None]:
import os
for f in ['test_confusion_matrix.png', 'test_class_performance.png', 'test_confidence.png']:
    if os.path.exists(f):
        files.download(f)