Imports

In [68]:
import numpy as np
np.set_printoptions(threshold=10000,suppress=True)
import pandas as pd
import warnings
import matplotlib
import matplotlib.pyplot as plt
warnings.filterwarnings('ignore')

## Données
Charger les données

In [69]:
def load_data(path):
    data = pd.read_csv('data/' + path, header=None, sep=r"\s+")
    X = data.iloc[:, :-1].values
    y = data.iloc[:, -1].values
    print("X shape:", X.shape)
    print("y shape:", y.shape)
    return X, y

X, y = load_data('iris.txt')

# afficher la premiere ligne de X et y
print(X[:1])
print(y[:1])



X shape: (150, 4)
y shape: (150,)
[[5.1 3.5 1.4 0.2]]
[1]


Découpage de la base d'apprentissage

In [70]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/3, random_state=42, stratify=y)

print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)

X_train shape: (100, 4)
X_test shape: (50, 4)


## Perceptron Multi-classe

In [71]:
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score

perceptron = Perceptron(max_iter=1000, eta0=0.1, random_state=42)
perceptron.fit(X_train, y_train)

y_pred = perceptron.predict(X_test)

acc = accuracy_score(y_test, y_pred)
print(f"Accuracy: {acc:.2f}")


Accuracy: 0.80


In [72]:
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score

def _bold(txt: str) -> str:
    return f"\033[1m{txt}\033[0m"

def display_metrics(y_test, y_pred, title: str | None = None):
    if title:
        print(_bold(title))
        
    # Matrice de confusion
    cm = confusion_matrix(y_test, y_pred)
    print("Matrice de confusion:")
    print(cm)

    # Accuracy globale
    accuracy = accuracy_score(y_test, y_pred)
    print(f"\nAccuracy globale: {accuracy:.4f}")

    # Précision pour chaque classe
    precision = precision_score(y_test, y_pred, average=None, zero_division=0)
    print("\nPrécision par classe:")
    for i, p in enumerate(precision):
        print(f"  Classe {i+1}: {p:.4f}")

    # Rappel pour chaque classe
    recall = recall_score(y_test, y_pred, average=None, zero_division=0)
    print("\nRappel par classe:")
    for i, r in enumerate(recall):
        print(f"  Classe {i+1}: {r:.4f}")

display_metrics(y_test, y_pred)

Matrice de confusion:
[[15  1  0]
 [ 0  8  9]
 [ 0  0 17]]

Accuracy globale: 0.8000

Précision par classe:
  Classe 1: 1.0000
  Classe 2: 0.8889
  Classe 3: 0.6538

Rappel par classe:
  Classe 1: 0.9375
  Classe 2: 0.4706
  Classe 3: 1.0000


## Perceptron Multi Couche

In [73]:
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(hidden_layer_sizes=(3,), max_iter=1000, random_state=42,  alpha=0.001)

mlp.fit(X_train, y_train)

y_pred_mlp = mlp.predict(X_test)

display_metrics(y_test, y_pred_mlp)

Matrice de confusion:
[[16  0  0]
 [ 0 16  1]
 [ 0  0 17]]

Accuracy globale: 0.9800

Précision par classe:
  Classe 1: 1.0000
  Classe 2: 1.0000
  Classe 3: 0.9444

Rappel par classe:
  Classe 1: 1.0000
  Classe 2: 0.9412
  Classe 3: 1.0000


Version normalisée

In [74]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

mlp.fit(X_train_scaled, y_train)

y_pred_mlp_scaled = mlp.predict(X_test_scaled)

display_metrics(y_test, y_pred_mlp_scaled)

Matrice de confusion:
[[16  0  0]
 [ 1 15  1]
 [ 0  3 14]]

Accuracy globale: 0.9000

Précision par classe:
  Classe 1: 0.9412
  Classe 2: 0.8333
  Classe 3: 0.9333

Rappel par classe:
  Classe 1: 1.0000
  Classe 2: 0.8824
  Classe 3: 0.8235


## Application à tous les datasets

In [75]:
def pipeline(path, seed=42):
    X, y = load_data(path)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/3, random_state=seed, stratify=y)
    
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Multi classe
    # ------------
    perceptron = Perceptron(max_iter=1000, eta0=0.1, random_state=seed)
    print(_bold("\nPerceptron:"))
    print("------------")

    # Non normalisé
    perceptron.fit(X_train, y_train)
    y_pred = perceptron.predict(X_test)
    display_metrics(y_test, y_pred, title="Non normalisé")

    # Noramlisé
    perceptron.fit(X_train_scaled, y_train)
    y_pred_norm = perceptron.predict(X_test_scaled)
    display_metrics(y_test, y_pred_norm, title="Normalisé")

    # Multi couche
    # ------------
    mlp = MLPClassifier(hidden_layer_sizes=(3,), max_iter=1000, random_state=seed,  alpha=0.001)

    print(_bold("\nPerceptron Multi-Couche:"))  
    print("-------------------------")  

    # Non normalisé
    mlp.fit(X_train, y_train)
    y_pred = mlp.predict(X_test)
    display_metrics(y_test, y_pred, title="Non normalisé")

    # Noramlisé
    mlp.fit(X_train_scaled, y_train)
    y_pred_norm = mlp.predict(X_test_scaled)
    display_metrics(y_test, y_pred_norm, title="Normalisé")

In [76]:
datasources = ["iris", "glass", "Lsun", "Wave", "breast-cancer-wisconsin"]
for ds in datasources :
    print("-------------")
    print(_bold(ds))
    print("-------------")
    pipeline(ds + ".txt")

-------------
[1miris[0m
-------------
X shape: (150, 4)
y shape: (150,)
[1m
Perceptron:[0m
------------
[1mNon normalisé[0m
Matrice de confusion:
[[15  1  0]
 [ 0  8  9]
 [ 0  0 17]]

Accuracy globale: 0.8000

Précision par classe:
  Classe 1: 1.0000
  Classe 2: 0.8889
  Classe 3: 0.6538

Rappel par classe:
  Classe 1: 0.9375
  Classe 2: 0.4706
  Classe 3: 1.0000
[1mNormalisé[0m
Matrice de confusion:
[[15  1  0]
 [ 5  7  5]
 [ 0  2 15]]

Accuracy globale: 0.7400

Précision par classe:
  Classe 1: 0.7500
  Classe 2: 0.7000
  Classe 3: 0.7500

Rappel par classe:
  Classe 1: 0.9375
  Classe 2: 0.4118
  Classe 3: 0.8824
[1m
Perceptron Multi-Couche:[0m
-------------------------
[1mNon normalisé[0m
Matrice de confusion:
[[16  0  0]
 [ 0 16  1]
 [ 0  0 17]]

Accuracy globale: 0.9800

Précision par classe:
  Classe 1: 1.0000
  Classe 2: 1.0000
  Classe 3: 0.9444

Rappel par classe:
  Classe 1: 1.0000
  Classe 2: 0.9412
  Classe 3: 1.0000
[1mNormalisé[0m
Matrice de confusion:
[[1

## Bagging

In [78]:
from sklearn.pipeline import make_pipeline
from sklearn.utils import resample
from sklearn.preprocessing import StandardScaler, LabelEncoder

def _majority_vote(pred_matrix_int: np.ndarray, n_classes: int) -> np.ndarray:
    """
    pred_matrix_int: shape (K, n_samples), valeurs entières 0..n_classes-1
    Retour: shape (n_samples,)
    """
    # Pour chaque colonne (un échantillon), on prend l'argmax du bincount
    # Simple et efficace sans dépendances.
    K, n = pred_matrix_int.shape
    out = np.empty(n, dtype=int)
    for j in range(n):
        counts = np.bincount(pred_matrix_int[:, j], minlength=n_classes)
        out[j] = int(np.argmax(counts))
    return out


def bagging_mlp_pipeline(path, K=10, seed=42):
    """
    a) crée K bootstrap sur train
    b) entraîne un MLP sur chaque bootstrap
    c) agrège par vote majoritaire
    d) évalue sur test
    """
    X, y = load_data(path)

    # Split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=1/3, random_state=seed, stratify=y
    )

    # Encodage labels (pour vote majoritaire fiable même si y est string)
    le = LabelEncoder()
    y_train_int = le.fit_transform(y_train)
    y_test_int = le.transform(y_test)
    n_classes = len(le.classes_)

    # Modèles appris
    models = []
    preds = []

    print(_bold("\nBagging MLP"))
    print("-----------")
    print(f"K (bootstrap samples): {K}\n")

    for k in range(K):
        # a) Bootstrap sur la base d'apprentissage A
        Xb, yb_int = resample(
            X_train, y_train_int,
            replace=True,
            n_samples=len(X_train),
            random_state=seed + k
        )

        # b) MLP sur chaque bootstrap
        # Pipeline = scaler fit sur bootstrap + mlp
        model = make_pipeline(
            StandardScaler(),
            MLPClassifier(
                hidden_layer_sizes=(3,),
                max_iter=1000,
                random_state=seed + k,
                alpha=0.001
            )
        )
        model.fit(Xb, yb_int)
        models.append(model)

        # Prédiction sur T (base de test)
        preds_k = model.predict(X_test)
        preds.append(preds_k)

    preds = np.asarray(preds)  # shape (K, n_test)

    # c) Modèle agrégé H (vote majoritaire)
    y_pred_int = _majority_vote(preds, n_classes=n_classes)
    y_pred = le.inverse_transform(y_pred_int)

    # d) Évaluation sur T
    display_metrics(y_test, y_pred, title="Aggregated model H (majority vote)")


In [84]:
res = bagging_mlp_pipeline("iris.txt", K=10)

X shape: (150, 4)
y shape: (150,)
[1m
Bagging MLP[0m
-----------
K (bootstrap samples): 10

[1mAggregated model H (majority vote)[0m
Matrice de confusion:
[[16  0  0]
 [ 0 16  1]
 [ 0  3 14]]

Accuracy globale: 0.9200

Précision par classe:
  Classe 1: 1.0000
  Classe 2: 0.8421
  Classe 3: 0.9333

Rappel par classe:
  Classe 1: 1.0000
  Classe 2: 0.9412
  Classe 3: 0.8235
