In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset
import numpy as np

from EVO import LogisticRegression, EvolutionOptimizer



In [12]:
from torchvision import datasets, transforms
from torch.utils.data import Subset
import torch
import random

# Set random seed for reproducibility
random.seed(42)
torch.manual_seed(42)

# --- Load and preprocess MNIST (binary classification: 0 vs not-0) ---
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.view(-1))  # flatten 28x28 to 784
])

mnist_train = datasets.MNIST(root='./data', train=True, download=True, transform=transform)

# Binary classification: 0 vs not-0
binary_targets = torch.tensor([1.0 if label == 0 else 0.0 for _, label in mnist_train])
filtered_indices = [i for i, t in enumerate(binary_targets) if t == 1.0 or t == 0.0]

# Shuffle indices and split into train and validation
random.shuffle(filtered_indices)
split_idx = int(0.8 * len(filtered_indices))  # 80% train, 20% val
train_indices = filtered_indices[:split_idx]
val_indices = filtered_indices[split_idx:]

# --- Training Set ---
train_subset = Subset(mnist_train, train_indices)
X_train = torch.stack([train_subset[i][0] for i in range(len(train_subset))])
y_train = torch.tensor([1.0 if train_subset[i][1] == 0 else 0.0 for i in range(len(train_subset))])
X_train = torch.cat([X_train, torch.ones(X_train.size(0), 1)], dim=1)  # add bias

# --- Validation Set ---
val_subset = Subset(mnist_train, val_indices)
X_val = torch.stack([val_subset[i][0] for i in range(len(val_subset))])
y_val = torch.tensor([1.0 if val_subset[i][1] == 0 else 0.0 for i in range(len(val_subset))])
X_val = torch.cat([X_val, torch.ones(X_val.size(0), 1)], dim=1)  # add bias

In [22]:
def population_majority_vote_accuracy(model, X, y):
    """
    For each sample, collect predictions from all individuals in the population,
    take the majority vote, and compare to ground truth labels.
    """
    n = X.size(0)
    votes = torch.zeros((len(model.population), n))

    for i, w in enumerate(model.population):
        logits = X @ w
        preds = (logits > 0).float()
        votes[i] = preds

    # Sum over voters and apply majority rule (>50%)
    majority_preds = (votes.mean(dim=0) > 0.5).float()
    accuracy = (majority_preds == y).float().mean().item() * 100
    return accuracy

def average_pairwise_distance(population):
    n = len(population)
    total_dist = 0
    count = 0

    for i in range(n):
        for j in range(i + 1, n):
            dist = torch.norm(population[i] - population[j])
            total_dist += dist
            count += 1

    return total_dist / count if count > 0 else 0

In [36]:
# --- Train with Evolutionary Optimizer ---
model = LogisticRegression()
optimizer = EvolutionOptimizer(model)
optimizer.set_population_size(100)
optimizer.set_mutation_rate(1)
optimizer.set_mutation_intensity(1)

best_accs = []
majority_accs = []
losses = []

for epoch in range(150):
    optimizer.step(X_train, y_train)

    # Accuracy from best model (model.w)
    y_hat_best = model.predict(X_train)
    acc_best = (y_hat_best == y_train).float().mean().item() * 100

    # Accuracy from majority vote
    acc_majority = population_majority_vote_accuracy(model, X, y)

    # Loss of best model
    current_loss = model.loss(X, y).item()

    # Store values
    best_accs.append(acc_best)
    majority_accs.append(acc_majority)
    losses.append(current_loss)

    print(f"Epoch {epoch+1:02d} - Loss: {current_loss:.4f} | Best Acc: {acc_best:.2f}% | Majority Vote Acc: {acc_majority:.2f}%")





Epoch 01 - Loss: 14.0789 | Best Acc: 9.85% | Majority Vote Acc: 9.55%
Epoch 02 - Loss: 10.8500 | Best Acc: 18.52% | Majority Vote Acc: 9.55%
Epoch 03 - Loss: 10.6503 | Best Acc: 22.20% | Majority Vote Acc: 9.55%
Epoch 04 - Loss: 6.1940 | Best Acc: 45.61% | Majority Vote Acc: 9.55%
Epoch 05 - Loss: 3.7000 | Best Acc: 58.82% | Majority Vote Acc: 10.45%
Epoch 06 - Loss: 1.9633 | Best Acc: 76.17% | Majority Vote Acc: 19.75%
Epoch 07 - Loss: 1.2230 | Best Acc: 85.59% | Majority Vote Acc: 39.90%
Epoch 08 - Loss: 1.5674 | Best Acc: 84.43% | Majority Vote Acc: 71.40%
Epoch 09 - Loss: 1.0500 | Best Acc: 89.27% | Majority Vote Acc: 87.90%
Epoch 10 - Loss: 1.0478 | Best Acc: 90.05% | Majority Vote Acc: 92.15%
Epoch 11 - Loss: 1.0084 | Best Acc: 91.97% | Majority Vote Acc: 91.90%
Epoch 12 - Loss: 0.9259 | Best Acc: 91.97% | Majority Vote Acc: 90.85%
Epoch 13 - Loss: 0.8724 | Best Acc: 92.35% | Majority Vote Acc: 91.50%
Epoch 14 - Loss: 0.6550 | Best Acc: 94.48% | Majority Vote Acc: 92.55%
Epoch 15

In [37]:
diversity = average_pairwise_distance(model.population)
print(f"Diversity: {diversity:.4f}")

# Accuracy of the best individual model
y_pred_best = model.predict(X_val)
acc_best = (y_pred_best == y_val).float().mean().item() * 100

# Accuracy of the ensemble via majority vote
acc_majority = population_majority_vote_accuracy(model, X_val, y_val)

# Display the results
print(f"Validation Accuracy - Best Model: {acc_best:.2f}%")
print(f"Validation Accuracy - Majority Vote: {acc_majority:.2f}%")


Diversity: 245.8089
Validation Accuracy - Best Model: 98.83%
Validation Accuracy - Majority Vote: 99.05%
