Expert of SVM -- differents type of voting system

In [None]:
!pip install optuna -q

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold, train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from typing import Dict, List, Tuple
import pickle
import warnings
import optuna
from optuna.samplers import TPESampler
from google.colab import files

warnings.filterwarnings('ignore')


1. Learn++.NC


In [None]:
class LearnPPNC:
    """
    Learn++.NC with SVM.
    Votes : expert predict a class, weighted by its training accuracy. The class with the most weighted votes wins.
    confiance: predict_proba
    """
    
    def __init__(self, optimize: bool = True, C: float = 10.0, gamma: float = 0.1,
                 n_trials: int = 50, cv_folds: int = 3, verbose: bool = True):
        self.optimize = optimize
        self.default_C = C
        self.default_gamma = gamma
        self.n_trials = n_trials
        self.cv_folds = cv_folds
        self.verbose = verbose
        self.C = C
        self.gamma = gamma
        self.scaler: StandardScaler = None
        self.experts: List[Dict] = []
        self.all_classes: set = set()
        self.is_initialized = False
        self.optuna_study = None
        self.history: List[Dict] = []
    
    def _log(self, msg: str):
        if self.verbose:
            print(msg)
    
    def _optimize_hyperparams(self, X: np.ndarray, y: np.ndarray):
        """Optimization Using optuna """
        self._log("OPTIMISATION OPTUNA")
        cv = StratifiedKFold(n_splits=self.cv_folds, shuffle=True, random_state=42)
        # research space for C and gamma with CV accuracy as objective 
        def objective(trial):
            C = trial.suggest_float('C', 1e-1, 1e4, log=True)
            gamma = trial.suggest_float('gamma', 1e-4, 1e1, log=True)
            svc = SVC(C=C, gamma=gamma, kernel='rbf', class_weight='balanced')
            scores = cross_val_score(svc, X, y, cv=cv, scoring='accuracy', n_jobs=-1)
            return scores.mean()
        
        self.optuna_study = optuna.create_study(
            direction='maximize',
            sampler=TPESampler(seed=42)
        )
        self.optuna_study.optimize(
            objective,
            n_trials=self.n_trials,
            show_progress_bar=self.verbose
        )
        
        self.C = self.optuna_study.best_params['C']
        self.gamma = self.optuna_study.best_params['gamma']
        
        self._log(f"\nBest Datas :")
        self._log(f"  C = {self.C:.4f}")
        self._log(f"  gamma = {self.gamma:.6f}")
        self._log(f"  CV Accuracy = {self.optuna_study.best_value:.4f}")
    
    def add_data(self, X: np.ndarray, y: np.ndarray, name: str = None):
        """
        To create new expert.
        """
        X = np.atleast_2d(X)
        y = np.array(y)
        name = name or f"Batch_{len(self.experts) + 1}"
        
        classes_in_batch = set(np.unique(y))
        new_classes = classes_in_batch - self.all_classes
        
        self._log(f"{name}")
        self._log(f"   samples: {len(y)}")
        self._log(f"   Class: {sorted(classes_in_batch)}")
        if new_classes:
            self._log(f"   new class: {sorted(new_classes)}")
        
        if not self.is_initialized:
            self.scaler = StandardScaler()
            X_scaled = self.scaler.fit_transform(X)
            
            if self.optimize:
                self._optimize_hyperparams(X_scaled, y)
            else:
                self.C = self.default_C
                self.gamma = self.default_gamma
                self._log(f"\nParameters: C={self.C}, gamma={self.gamma}")
            
            self.is_initialized = True
        
        X_scaled = self.scaler.transform(X)
        #voir sklearn SVC 
        clf = SVC(
            C=self.C,
            gamma=self.gamma,
            kernel='rbf',
            class_weight='balanced',
            probability=True,
            random_state=42
        )
        clf.fit(X_scaled, y)
        
        y_pred_train = clf.predict(X_scaled)
        accuracy = accuracy_score(y, y_pred_train)
        
        expert = {
            'clf': clf,
            'accuracy': accuracy,
            'classes': classes_in_batch,
            'name': name,
            'n_sv': len(clf.support_),
            'n_samples': len(y)
        }
        self.experts.append(expert)
        self.all_classes.update(classes_in_batch)
        
        self.history.append({
            'name': name,
            'n_samples': len(y),
            'n_classes': len(classes_in_batch),
            'new_classes': list(new_classes),
            'accuracy': accuracy,
            'n_sv': len(clf.support_),
            'expert_id': len(self.experts)
        })
        
        self._log(f"\n✓ Expert #{len(self.experts)} creaeed: {name}")
        self._log(f"   Accuracy (train): {accuracy:.4f}")
        return self
    
    def predict_with_details(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
        
        X = np.atleast_2d(X)
        X_scaled = self.scaler.transform(X)
        n_samples = X.shape[0]
        
        all_classes = np.array(sorted(self.all_classes))
        n_classes = len(all_classes)
        class_to_idx = {c: i for i, c in enumerate(all_classes)}
        
        vote_matrix = np.zeros((n_samples, n_classes))
        prob_matrix = np.zeros((n_samples, n_classes))
        total_weight = 0.0
        
        for expert in self.experts:
            clf = expert['clf']
            weight = expert['accuracy']
            total_weight += weight
            
            pred = clf.predict(X_scaled)
            proba = clf.predict_proba(X_scaled)
            expert_classes = clf.classes_
            
            for i in range(n_samples):
                if pred[i] in class_to_idx:
                    vote_matrix[i, class_to_idx[pred[i]]] += weight
                for j, c in enumerate(expert_classes):
                    if c in class_to_idx:
                        prob_matrix[i, class_to_idx[c]] += weight * proba[i, j]
        
        prob_matrix /= total_weight
        y_pred = all_classes[np.argmax(vote_matrix, axis=1)]
        confidence = np.max(prob_matrix, axis=1)
        
        return y_pred, confidence, vote_matrix
    
    def predict(self, X: np.ndarray) -> np.ndarray:
        y_pred, _, _ = self.predict_with_details(X)
        return y_pred
    
    def predict_proba(self, X: np.ndarray) -> np.ndarray:
        _, _, vote_matrix = self.predict_with_details(X)
        return vote_matrix / vote_matrix.sum(axis=1, keepdims=True)
    
    def evaluate(self, X: np.ndarray, y: np.ndarray) -> Dict:
        y_pred, confidence, vote_matrix = self.predict_with_details(X)
        all_classes = np.array(sorted(self.all_classes))
        return {
            'accuracy': accuracy_score(y, y_pred),
            'y_true': y,
            'y_pred': y_pred,
            'confidence': confidence,
            'vote_matrix': vote_matrix,
            'confusion_matrix': confusion_matrix(y, y_pred, labels=all_classes),
            'classes': all_classes
        }
    
    def get_expert_predictions(self, X: np.ndarray) -> pd.DataFrame:
        """prediction of each expert + confidence"""
        X_scaled = self.scaler.transform(np.atleast_2d(X))
        results = {'sample_idx': list(range(len(X_scaled)))}
        for i, expert in enumerate(self.experts):
            pred = expert['clf'].predict(X_scaled)
            proba = expert['clf'].predict_proba(X_scaled)
            max_proba = np.max(proba, axis=1)
            results[f'expert_{i+1}_pred'] = pred
            results[f'expert_{i+1}_conf'] = max_proba
        return pd.DataFrame(results)
    
    def save(self, filepath: str):
        with open(filepath, 'wb') as f:
            pickle.dump(self, f)
        self._log(f"Model Saved: {filepath}")
    
    @staticmethod
    def load(filepath: str) -> 'LearnPPNC':
        with open(filepath, 'rb') as f:
            return pickle.load(f)


## 2. Fonctions de Visualisation

In [None]:
def plot_optuna(model):
    """Optuna."""
    if model.optuna_study is None:
        return
    
    trials_df = model.optuna_study.trials_dataframe()
    
    fig, axes = plt.subplots(1, 1, figsize=(15, 4))
    
    ax = axes[0]
    ax.plot(trials_df.index, trials_df['value'], 'b.', alpha=0.3, label='Trials')
    ax.plot(trials_df.index, trials_df['value'].cummax(), 'r-', lw=2, label='Best')
    ax.axhline(model.optuna_study.best_value, color='green', ls='--', label=f'Final: {model.optuna_study.best_value:.4f}')
    ax.set_xlabel('Trial')
    ax.set_ylabel('Accuracy')
    ax.set_title('Convergence Optuna')
    ax.legend()
    
    plt.tight_layout()
    plt.savefig('optuna_results.png', dpi=150, bbox_inches='tight')
    plt.show()


def plot_experts_summary(model):
    """Résumé des experts."""
    if len(model.experts) == 0:
        print("Aucun expert.")
        return
    
    fig, axes = plt.subplots(1, 3, figsize=(14, 4))
    
    names = [e['name'] for e in model.experts]
    accuracies = [e['accuracy'] for e in model.experts]
    n_svs = [e['n_sv'] for e in model.experts]
    n_samples = [e['n_samples'] for e in model.experts]
    
    ax = axes[0]
    bars = ax.bar(range(len(names)), accuracies, color='steelblue', edgecolor='black')
    ax.axhline(np.mean(accuracies), color='red', ls='--', label=f'Moyenne: {np.mean(accuracies):.4f}')
    ax.set_xticks(range(len(names)))
    ax.set_xticklabels([f'E{i+1}' for i in range(len(names))], rotation=0)
    ax.set_ylabel('Accuracy')
    ax.set_title('Accuracy par Expert')
    ax.set_ylim([min(0.9, min(accuracies)-0.05), 1.0])
    ax.legend()
    for i, (bar, acc) in enumerate(zip(bars, accuracies)):
        ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.005, f'{acc:.3f}',
                ha='center', va='bottom', fontsize=9)
    
    plt.tight_layout()
    plt.savefig('experts_summary.png', dpi=150, bbox_inches='tight')
    plt.show()


def plot_confusion_matrix(results, title='Confusion Matrix'):
    fig, ax = plt.subplots(figsize=(12, 10))
    
    cm = results['confusion_matrix']
    cm_norm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    classes = results['classes']
    
    sns.heatmap(cm_norm, annot=True, fmt='.2f', cmap='Blues',
                xticklabels=classes.astype(int),
                yticklabels=classes.astype(int),
                ax=ax, vmin=0, vmax=1,
                cbar_kws={'label': 'Proportion'})
    
    ax.set_xlabel('Prediction', fontsize=12)
    ax.set_ylabel('Real Class', fontsize=12)
    ax.set_title(f"{title}\nAccuracy: {results['accuracy']:.2%}", fontsize=14, fontweight='bold')
    
    plt.tight_layout()
    plt.savefig('confusion_matrix.png', dpi=150, bbox_inches='tight')
    plt.show()



def plot_class_performance(results):
    """Performance par classe."""
    y_true = results['y_true']
    y_pred = results['y_pred']
    classes = results['classes']
    
    class_acc = []
    class_count = []
    for c in classes:
        mask = y_true == c
        if np.sum(mask) > 0:
            class_acc.append(np.mean(y_pred[mask] == c))
            class_count.append(np.sum(mask))
        else:
            class_acc.append(0)
            class_count.append(0)
    
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    ax = axes[0]
    colors = ['green' if a > 0.9 else 'orange' if a > 0.7 else 'red' for a in class_acc]
    bars = ax.bar(range(len(classes)), class_acc, color=colors, edgecolor='black')
    ax.axhline(results['accuracy'], color='blue', ls='--', lw=2, label=f'Accuracy globale: {results["accuracy"]:.4f}')
    ax.set_xticks(range(len(classes)))
    ax.set_xticklabels([int(c) for c in classes])
    ax.set_xlabel('Classe')
    ax.set_ylabel('Accuracy')
    ax.set_title('Accuracy par Classe')
    ax.set_ylim([0, 1.05])
    ax.legend()
    
    ax = axes[1]
    ax.bar(range(len(classes)), class_count, color='steelblue', edgecolor='black')
    ax.set_xticks(range(len(classes)))
    ax.set_xticklabels([int(c) for c in classes])
    ax.set_xlabel('Classe')
    ax.set_ylabel("Nombre d'échantillons")
    ax.set_title('Distribution des Classes (Test)')
    
    plt.tight_layout()
    plt.savefig('class_performance.png', dpi=150, bbox_inches='tight')
    plt.show()


## 3. Configuration

In [None]:
FEATURE_COLS = [
    'Electrical speed [rad/s]',
    'I_M_a', 'I_M_b', 'I_M_c',
    'I_P_a', 'I_P_b', 'I_P_c',
    'I_B_a', 'I_B_b', 'I_B_c',
    'V_M_a', 'V_M_b', 'V_M_c',
    'V_P_a', 'V_P_b', 'V_P_c',
    'V_B_a', 'V_B_b', 'V_B_c'
]
LABEL_COL = 'Class label'

def load_file(filepath):

    df = pd.read_csv(filepath)
    X = df[FEATURE_COLS].values
    y = df[LABEL_COL].values
    print(f"Chargé: {len(y)} échantillons, {len(np.unique(y))} classes")
    print(f"Classes: {sorted(np.unique(y))}")
    return X, y, df

---
# CHARGEMENT & SÉPARATION CONNUES / NOUVELLES
---

In [None]:
print("FICHIER:")
uploaded = files.upload()
file_name = list(uploaded.keys())[0]
X_all, y_all, df = load_file(file_name)

print(f"\nDistribution:")
for c in sorted(np.unique(y_all)):
    print(f"  Classes {c:2d} : {np.sum(y_all == c):4d} samples")

In [None]:
KNOWN_CLASSES = [1, 2, 3, 4, 5, 6, 7, 8]
NEW_CLASSES   = [9, 10, 11, 12, 13, 14, 15, 16]

print(f"Classes known  : {KNOWN_CLASSES}")
print(f"Classes unknown: {NEW_CLASSES}")

mask_known = np.isin(y_all, KNOWN_CLASSES)
mask_new   = np.isin(y_all, NEW_CLASSES)

X_known = X_all[mask_known]
y_known = y_all[mask_known]
X_new = X_all[mask_new]
y_new = y_all[mask_new]

X_known_train, X_known_test, y_known_train, y_known_test = train_test_split(
    X_known, y_known, test_size=0.2, stratify=y_known, random_state=42
)
X_new_train, X_new_test, y_new_train, y_new_test = train_test_split(
    X_new, y_new, test_size=0.2, stratify=y_new, random_state=42
)

X_test_all = np.vstack([X_known_test, X_new_test])
y_test_all = np.hstack([y_known_test, y_new_test])

print(f"\nKnown - Train: {len(y_known_train)}, Test: {len(y_known_test)}")
print(f"New   - Train: {len(y_new_train)},   Test: {len(y_new_test)}")
print(f"Test total:    {len(y_test_all)}")

---
# APPRENTISSAGE INCRÉMENTAL
---

In [None]:
model = LearnPPNC(optimize=True, n_trials=100, cv_folds=3, verbose=True)

# Sans Optuna (plus rapide):
# model = LearnPPNC(optimize=False, C=100, gamma=0.1, verbose=True)

In [None]:
model.add_data(X_known_train, y_known_train, name="Expert_1_Known")

In [None]:
plot_optuna(model)

In [None]:
print("Test on E1")
results_e1 = model.evaluate(X_known_test, y_known_test)
print(f"\nACCURACY E1: {results_e1['accuracy']:.4f}")
print(classification_report(results_e1['y_true'], results_e1['y_pred'], zero_division=0))

In [None]:
plot_confusion_matrix(results_e1, 'Expert 1 - Classes Connues')

In [None]:
model.add_data(X_new_train, y_new_train, name="Expert_2_New")

print(f"\nNombre d'experts: {len(model.experts)}")
print(f"Classes totales: {sorted(model.all_classes)}")

In [None]:
plot_experts_summary(model)

---
# COMPARAISON DES STRATÉGIES DE VOTE
---

In [None]:
print("="*60)
print("MÉTHODE 1 : Vote Pondéré Basique")
print("="*60)
print("Chaque expert vote pour sa prédiction, pondéré par son accuracy.")
print("Problème: Expert 1 vote AUSSI pour les classes 9-16 (qu'il ne connaît pas).")

results_basic = model.evaluate(X_test_all, y_test_all)
acc_basic = results_basic['accuracy']
y_pred_basic = results_basic['y_pred']

print(f"\nACCURACY = {acc_basic:.4f}")
print(classification_report(y_test_all, y_pred_basic, zero_division=0))

In [None]:
print("="*60)
print("MÉTHODE 2 : DW-CAV (Class-Aware Voting)")
print("="*60)
print("Chaque expert ne vote QUE pour les classes qu'il connaît.")
print("Expert 1 → vote sur [1-8], ignoré pour [9-16]")
print("Expert 2 → vote sur [9-16], ignoré pour [1-8]")

ALL_CLASSES = np.array(sorted(model.all_classes))
n_classes = len(ALL_CLASSES)
class_to_idx = {c: i for i, c in enumerate(ALL_CLASSES)}
n_test = len(y_test_all)

X_test_sc = model.scaler.transform(X_test_all)

vote_dwcav = np.zeros((n_test, n_classes))

for exp in model.experts:
    clf = exp['clf']
    w = exp['accuracy']
    known = exp['classes']
    proba = clf.predict_proba(X_test_sc)
    expert_classes = clf.classes_

    for i in range(n_test):
        for j, c in enumerate(expert_classes):
            if c in class_to_idx and c in known:
                vote_dwcav[i, class_to_idx[c]] += w * proba[i, j]

y_pred_dwcav = ALL_CLASSES[np.argmax(vote_dwcav, axis=1)]
acc_dwcav = accuracy_score(y_test_all, y_pred_dwcav)

print(f"\nACCURACY = {acc_dwcav:.4f}")
print(classification_report(y_test_all, y_pred_dwcav, zero_division=0))

In [None]:
print("="*60)
print("MÉTHODE 3 : Max-Confidence Selection")
print("="*60)
print("On prend la prédiction de l'expert LE PLUS CONFIANT.")
print("Un expert qui ne connaît pas la classe → confiance basse → ignoré.")

y_pred_maxconf = np.zeros(n_test, dtype=int)
conf_maxconf = np.zeros(n_test)

for i in range(n_test):
    best_conf = -1
    best_pred = -1
    for exp in model.experts:
        clf = exp['clf']
        x_i = X_test_sc[i:i+1]
        pred_i = clf.predict(x_i)[0]
        proba_i = clf.predict_proba(x_i)[0]
        max_proba_i = np.max(proba_i)
        if max_proba_i > best_conf:
            best_conf = max_proba_i
            best_pred = pred_i
    y_pred_maxconf[i] = best_pred
    conf_maxconf[i] = best_conf

acc_maxconf = accuracy_score(y_test_all, y_pred_maxconf)

print(f"\nACCURACY = {acc_maxconf:.4f}")
print(classification_report(y_test_all, y_pred_maxconf, zero_division=0))

---
# TRANSFERT DES SUPPORT VECTORS
**Principe:** On extrait les SVs de Expert 1 (résumé compact de classes 1-8) et on les ajoute aux données de Expert 2. Ainsi Expert 2 connaît les 16 classes.

---

In [None]:
print("+E2 Transfert SVs ")
print("="*60)

# Extraire les SVs de Expert 1
expert1_clf = model.experts[0]['clf']
X_known_train_sc = model.scaler.transform(X_known_train)
sv_indices = expert1_clf.support_
X_sv1 = X_known_train_sc[sv_indices]
y_sv1 = y_known_train[sv_indices]

print(f"SVs from E1: {len(y_sv1)}")
print(f"Classes inside SVs: {sorted(np.unique(y_sv1))}")

# E2 = old SVs + new data
X_new_train_sc = model.scaler.transform(X_new_train)
X_e2_transfer = np.vstack([X_sv1, X_new_train_sc])
y_e2_transfer = np.hstack([y_sv1, y_new_train])


expert2_transfer = SVC(
    C=model.C, gamma=model.gamma, kernel='rbf',
    class_weight='balanced', probability=True, random_state=42
)
expert2_transfer.fit(X_e2_transfer, y_e2_transfer)
acc2_tr_train = accuracy_score(y_e2_transfer, expert2_transfer.predict(X_e2_transfer))
print(f"Train accuracy: {acc2_tr_train:.4f}, SVs: {len(expert2_transfer.support_)}")

y_pred_transfer = expert2_transfer.predict(X_test_sc)
acc_transfer = accuracy_score(y_test_all, y_pred_transfer)

print(f"\nACCURACY = {acc_transfer:.4f}")
print(classification_report(y_test_all, y_pred_transfer, zero_division=0))

In [None]:

print("DW-CAV + Transfert SVs")

print("E1 vote 1-8, E2 1-16")

vote_dwcav_tr = np.zeros((n_test, n_classes))

#E1
proba1 = expert1_clf.predict_proba(X_test_sc)
ec1 = expert1_clf.classes_
w1 = model.experts[0]['accuracy']
for i in range(n_test):
    for j, c in enumerate(ec1):
        if c in class_to_idx and c in set(KNOWN_CLASSES):
            vote_dwcav_tr[i, class_to_idx[c]] += w1 * proba1[i, j]

# E2 
proba2 = expert2_transfer.predict_proba(X_test_sc)
ec2 = expert2_transfer.classes_
w2 = acc2_tr_train
for i in range(n_test):
    for j, c in enumerate(ec2):
        if c in class_to_idx:
            vote_dwcav_tr[i, class_to_idx[c]] += w2 * proba2[i, j]

y_pred_dwcav_tr = ALL_CLASSES[np.argmax(vote_dwcav_tr, axis=1)]
acc_dwcav_tr = accuracy_score(y_test_all, y_pred_dwcav_tr)

print(f"\nACCURACY = {acc_dwcav_tr:.4f}")
print(classification_report(y_test_all, y_pred_dwcav_tr, zero_division=0))

---
# RÉSULTATS COMPARATIFS
---

In [None]:
methods = [
    ("Vote Basique",         y_pred_basic,    acc_basic,    '#E53935'),
    ("DW-CAV",               y_pred_dwcav,    acc_dwcav,    '#2196F3'),
    ("Max-Confidence",       y_pred_maxconf,  acc_maxconf,  '#4CAF50'),
    ("E2 transféré seul",    y_pred_transfer, acc_transfer, '#FF9800'),
    ("DW-CAV + Transfert",   y_pred_dwcav_tr, acc_dwcav_tr, "#891C9C"),
]

# Confusion matrices
top3 = [methods[1], methods[3], methods[4]]
fig, axes = plt.subplots(1, 3, figsize=(24, 7))
for ax, (name, y_pred, acc, _) in zip(axes, top3):
    cm = confusion_matrix(y_test_all, y_pred, labels=ALL_CLASSES)
    cm_norm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    cm_norm = np.nan_to_num(cm_norm)
    sns.heatmap(cm_norm, annot=True, fmt='.2f', cmap='Blues',
                xticklabels=ALL_CLASSES.astype(int),
                yticklabels=ALL_CLASSES.astype(int),
                ax=ax, vmin=0, vmax=1, cbar=False, annot_kws={'size': 8})
    ax.set_xlabel('Prédite')
    ax.set_ylabel('Réelle')
    ax.set_title(f"{name}\nAccuracy: {acc:.2%}", fontsize=13, fontweight='bold')
    ax.axhline(y=len(KNOWN_CLASSES), color='red', lw=2, ls='--')
    ax.axvline(x=len(KNOWN_CLASSES), color='red', lw=2, ls='--')
plt.suptitle("Meilleures Stratégies — Learn++.NC", fontsize=15, fontweight='bold', y=1.02)
plt.tight_layout()
plt.savefig('confusion_comparison.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(18, 6))

names_short = ['Vote\nBasic', 'DW-CAV', 'Max-\nConf.', 'E2 transf.\nseul', 'DW-CAV\n+ Transfert']
accs_all = [m[2] for m in methods]
colors_all = [m[3] for m in methods]

# Global
ax = axes[0]
bars = ax.bar(names_short, accs_all, color=colors_all, edgecolor='black', width=0.55)
for bar, a in zip(bars, accs_all):
    ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
            f'{a:.1%}', ha='center', va='bottom', fontsize=12, fontweight='bold')
ax.set_ylim([0, 1.15])
ax.set_ylabel('Accuracy', fontsize=12)
ax.set_title('Accuracy Globale (16 classes)', fontsize=14, fontweight='bold')
ax.axhline(1.0, color='gray', ls='--', alpha=0.3)

# Connues vs Nouvelles
ax = axes[1]
acc_groups = []
for name, y_pred, acc, _ in methods:
    mk = n
    p.isin(y_test_all, KNOWN_CLASSES)
    mn = np.isin(y_test_all, NEW_CLASSES)
    acc_groups.append((accuracy_score(y_test_all[mk], y_pred[mk]),
                       accuracy_score(y_test_all[mn], y_pred[mn])))

x = np.arange(5)
w = 0.3
bars1 = ax.bar(x - w/2, [a[0] for a in acc_groups], w, label='known (1-8)', color='#2196F3', edgecolor='black')
bars2 = ax.bar(x + w/2, [a[1] for a in acc_groups], w, label='unknown(9-16)', color='#FF9800', edgecolor='black')
for bar, a in zip(bars1, [a[0] for a in acc_groups]):
    ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
            f'{a:.1%}', ha='center', va='bottom', fontsize=9, fontweight='bold')
for bar, a in zip(bars2, [a[1] for a in acc_groups]):
    ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
            f'{a:.1%}', ha='center', va='bottom', fontsize=9, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(names_short, fontsize=9)
ax.set_ylim([0, 1.15])
ax.set_ylabel('Accuracy', fontsize=12)
ax.set_title('known vs unknown classes', fontsize=14, fontweight='bold')
ax.legend(fontsize=10)

plt.tight_layout()
plt.savefig('accuracy_comparison.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(18, 6))

for idx, (name, y_pred, acc, col) in enumerate(methods):
    class_accs = []
    for c in ALL_CLASSES:
        mask = y_test_all == c
        class_accs.append(np.mean(y_pred[mask] == c) if np.sum(mask) > 0 else 0)
    offset = (idx - 2) * 0.16
    ax.bar(np.arange(n_classes) + offset, class_accs, width=0.16,
           label=f'{name} ({acc:.1%})', color=col, edgecolor='black', alpha=0.85)

ax.set_xticks(range(n_classes))
ax.set_xticklabels([int(c) for c in ALL_CLASSES])
ax.set_xlabel('Classe', fontsize=12)
ax.set_ylabel('Accuracy', fontsize=12)
ax.set_title('Accuracy par Classe — 5 Stratégies', fontsize=14, fontweight='bold')
ax.set_ylim([0, 1.15])
ax.legend(fontsize=9, loc='lower right')
ax.axvline(x=7.5, color='red', ls='--', lw=2, alpha=0.5)
ax.text(3.5, 1.08, 'CONNUES', ha='center', fontsize=12, color='#2196F3', fontweight='bold')
ax.text(11.5, 1.08, 'NOUVELLES', ha='center', fontsize=12, color='#FF9800', fontweight='bold')

plt.tight_layout()
plt.savefig('per_class_comparison.png', dpi=150, bbox_inches='tight')
plt.show()

---
# SAUVEGARDE
---

In [None]:
# Sauvegarder le MEILLEUR modèle = Expert transféré (97.7%)
# On sauvegarde : le SVM, le scaler, et les métadonnées
best_model = {
    'clf': expert2_transfer,
    'scaler': model.scaler,
    'classes': sorted(model.all_classes),
    'accuracy_train': acc2_tr_train,
    'C': model.C,
    'gamma': model.gamma,
    'method': 'SV Transfer',
    'n_sv': len(expert2_transfer.support_),
}

with open('learnpp_sv_transfer.pkl', 'wb') as f:
    pickle.dump(best_model, f)

print(f"Modèle sauvegardé: learnpp_sv_transfer.pkl")
print(f"  Méthode: SV Transfer")
print(f"  Classes: {best_model['classes']}")
print(f"  Accuracy train: {best_model['accuracy_train']:.4f}")
print(f"  SVs: {best_model['n_sv']}")

files.download('learnpp_sv_transfer.pkl')

In [None]:
import os
for f in ['optuna_results.png', 'experts_summary.png', 'confusion_matrix.png',
          'confusion_comparison.png', 'accuracy_comparison.png', 'per_class_comparison.png',
          'confidence_distribution.png', 'vote_examples.png', 'class_performance.png']:
    if os.path.exists(f):
        files.download(f)