<a href="https://colab.research.google.com/github/nikalitt1/Clementine-Agriculture/blob/main/Population_based_MLP_Training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [50]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.datasets import Omniglot
import torchvision.transforms as transforms
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import accuracy_score
import random

# ====== Setup ======
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

# ====== Load Omniglot Dataset ======
transform = transforms.Compose([
    transforms.Resize((28, 28)),  # Resize images to 28x28
    transforms.ToTensor(),        # Convert PIL image to tensor scaled [0,1]
])

train_dataset = Omniglot(root='./data', background=True, download=True, transform=transform)
test_dataset = Omniglot(root='./data', background=False, download=True, transform=transform)

full_data = train_dataset + test_dataset

# Extract data and labels into numpy arrays
X = []
y = []
for img, label in full_data:
    X.append(img.view(-1).numpy())  # flatten 28x28 -> 784 vector
    y.append(label)

X = np.stack(X)
y = np.array(y)

max_classes = 50
mask = y < max_classes
X = X[mask]
y = y[mask]

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Train/val/test split: 70/15/15 stratified
X_trainval, X_test, y_trainval, y_test = train_test_split(
    X, y_encoded, test_size=0.15, random_state=42, stratify=y_encoded)
X_train, X_val, y_train, y_val = train_test_split(
    X_trainval, y_trainval, test_size=0.1765, random_state=42, stratify=y_trainval)

# Convert to tensors
X_train_t = torch.tensor(X_train, dtype=torch.float32)
y_train_t = torch.tensor(y_train, dtype=torch.long)
X_val_t = torch.tensor(X_val, dtype=torch.float32)
y_val_t = torch.tensor(y_val, dtype=torch.long)
X_test_t = torch.tensor(X_test, dtype=torch.float32)
y_test_t = torch.tensor(y_test, dtype=torch.long)

batch_size = 64
train_loader = DataLoader(torch.utils.data.TensorDataset(X_train_t, y_train_t), batch_size=batch_size, shuffle=True)
val_loader = DataLoader(torch.utils.data.TensorDataset(X_val_t, y_val_t), batch_size=batch_size)
test_loader = DataLoader(torch.utils.data.TensorDataset(X_test_t, y_test_t), batch_size=batch_size)

# ====== Define MLP Model ======
class OmniglotMLP(nn.Module):
    def __init__(self, input_size=28*28, num_classes=max_classes):
        super(OmniglotMLP, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_size, 256),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, num_classes)
        )
    def forward(self, x):
        return self.net(x)

# ====== Evaluation function ======
def evaluate_model(model, dataloader):
    model.eval()
    all_preds = []
    all_targets = []
    with torch.no_grad():
        for xb, yb in dataloader:
            xb, yb = xb.to(device), yb.to(device)
            xb = xb.view(xb.size(0), -1)  # Flatten
            logits = model(xb)
            preds = torch.argmax(logits, dim=1)
            all_preds.append(preds.cpu().numpy())
            all_targets.append(yb.cpu().numpy())
    all_preds = np.concatenate(all_preds)
    all_targets = np.concatenate(all_targets)
    return accuracy_score(all_targets, all_preds)

# ====== Training function for one epoch ======
def train_one_epoch(model, optimizer, criterion, dataloader, model_id=None, epoch_num=None, log_every=100):
    model.train()
    running_loss = 0.0
    for batch_idx, (xb, yb) in enumerate(dataloader, 1):
        xb, yb = xb.to(device), yb.to(device)
        xb = xb.view(xb.size(0), -1)  # Flatten
        optimizer.zero_grad()
        logits = model(xb)
        loss = criterion(logits, yb)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if batch_idx % log_every == 0:
            avg_loss = running_loss / log_every
            running_loss = 0.0
            prefix = f"Model {model_id} - Epoch {epoch_num}" if model_id is not None and epoch_num is not None else ""
            print(f"{prefix} - Batch {batch_idx}/{len(dataloader)} - Loss: {avg_loss:.4f}")

# ====== Evolutionary Training with Elitism ======
def evolution_training_until_one(
    population_size=10,
    survival_rate=0.6,
    epochs_per_gen=10,
    log_every_batches=100
):
    population = []
    optimizers = []
    criterion = nn.CrossEntropyLoss()

    # Initialize population and optimizers
    for _ in range(population_size):
        model = OmniglotMLP().to(device)
        optimizer = optim.Adam(model.parameters(), lr=0.001)
        population.append(model)
        optimizers.append(optimizer)

    generation = 0
    best_model = None
    best_optimizer = None
    best_score = -float('inf')

    while len(population) > 1:
        generation += 1
        print(f"\nðŸŒ± Generation {generation} | Population size: {len(population)}")

        # Train all models in population
        for i, (model, optimizer) in enumerate(zip(population, optimizers)):
            print(f"  - Training model {i+1}/{len(population)}")
            for epoch in range(1, epochs_per_gen + 1):
                train_one_epoch(model, optimizer, criterion, train_loader, model_id=i+1, epoch_num=epoch, log_every=log_every_batches)

        # Evaluate all models
        scores = [evaluate_model(m, val_loader) for m in population]
        sorted_indices = np.argsort(scores)[::-1]  # Descending order

        # Update best model (elitism)
        if scores[sorted_indices[0]] > best_score:
            best_score = scores[sorted_indices[0]]
            best_model = population[sorted_indices[0]]
            best_optimizer = optimizers[sorted_indices[0]]
            print(f"  ðŸŽ‰ New best model with accuracy: {best_score:.4f}")

        # Survival selection
        survivors = max(1, int(len(population) * survival_rate))
        population = [population[i] for i in sorted_indices[:survivors]]
        optimizers = [optimizers[i] for i in sorted_indices[:survivors]]

        # Add best model back if lost (elitism)
        if best_model not in population:
            population.append(best_model)
            optimizers.append(best_optimizer)
            print("  ðŸ”„ Best model preserved with elitism")

        print(f"  âœ… Best Acc This Gen: {scores[sorted_indices[0]]:.4f} | Worst: {scores[sorted_indices[-1]]:.4f}")

    # Save best model
    torch.save(best_model.state_dict(), "best_omniglot_model.pth")
    return best_model

# ====== Run Evolution Training ======
best_model = evolution_training_until_one(
    population_size=1000,    # You can adjust this as needed
    survival_rate=0.6,
    epochs_per_gen=10,
    log_every_batches=100
)

# ====== Final Evaluation ======
best_model.eval()
test_accuracy = evaluate_model(best_model, test_loader)
print(f"\nðŸŽ¯ Final test accuracy on Omniglot: {test_accuracy * 100:.2f}%")



ðŸŒ± Generation 1 | Population size: 1000
  - Training model 1/1000
  - Training model 2/1000
  - Training model 3/1000
  - Training model 4/1000
  - Training model 5/1000
  - Training model 6/1000
  - Training model 7/1000
  - Training model 8/1000
  - Training model 9/1000
  - Training model 10/1000
  - Training model 11/1000
  - Training model 12/1000
  - Training model 13/1000
  - Training model 14/1000
  - Training model 15/1000
  - Training model 16/1000
  - Training model 17/1000
  - Training model 18/1000
  - Training model 19/1000
  - Training model 20/1000
  - Training model 21/1000
  - Training model 22/1000
  - Training model 23/1000
  - Training model 24/1000
  - Training model 25/1000
  - Training model 26/1000
  - Training model 27/1000
  - Training model 28/1000
  - Training model 29/1000
  - Training model 30/1000
  - Training model 31/1000
  - Training model 32/1000
  - Training model 33/1000
  - Training model 34/1000
  - Training model 35/1000
  - Training model 36

In [49]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.datasets import Omniglot
import torchvision.transforms as transforms
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import accuracy_score

# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Resize transform: resize to 28x28 + ToTensor()
transform = transforms.Compose([
    transforms.Resize((28, 28)),  # Resize images to 28x28
    transforms.ToTensor(),        # Convert PIL image to tensor and scale to [0,1]
])

# Load Omniglot dataset (background=True for training, False for testing)
train_dataset = Omniglot(root='./data', background=True, download=True, transform=transform)
test_dataset = Omniglot(root='./data', background=False, download=True, transform=transform)

# Combine train and test for one dataset
full_data = train_dataset + test_dataset

# Extract data and labels into numpy arrays
X = []
y = []
for img, label in full_data:
    X.append(img.view(-1).numpy())  # flatten 28x28 -> 784
    y.append(label)

X = np.stack(X)
y = np.array(y)

# Limit to first 50 classes to keep problem manageable
max_classes = 50
mask = y < max_classes
X = X[mask]
y = y[mask]

# Encode labels 0..49
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Train/val/test split: 70% train, 15% val, 15% test
X_trainval, X_test, y_trainval, y_test = train_test_split(
    X, y_encoded, test_size=0.15, random_state=42, stratify=y_encoded
)
X_train, X_val, y_train, y_val = train_test_split(
    X_trainval, y_trainval, test_size=0.1765, random_state=42, stratify=y_trainval
)

# Convert to tensors
X_train_t = torch.tensor(X_train, dtype=torch.float32)
y_train_t = torch.tensor(y_train, dtype=torch.long)
X_val_t = torch.tensor(X_val, dtype=torch.float32)
y_val_t = torch.tensor(y_val, dtype=torch.long)
X_test_t = torch.tensor(X_test, dtype=torch.float32)
y_test_t = torch.tensor(y_test, dtype=torch.long)

# Create dataloaders
batch_size = 64
train_loader = DataLoader(torch.utils.data.TensorDataset(X_train_t, y_train_t), batch_size=batch_size, shuffle=True)
val_loader = DataLoader(torch.utils.data.TensorDataset(X_val_t, y_val_t), batch_size=batch_size)
test_loader = DataLoader(torch.utils.data.TensorDataset(X_test_t, y_test_t), batch_size=batch_size)

# Define MLP model with input size 784 (28x28) and 50 classes
class OmniglotMLP(nn.Module):
    def __init__(self, input_size=28*28, num_classes=max_classes):
        super(OmniglotMLP, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_size, 256),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(128, num_classes)
        )
    def forward(self, x):
        return self.net(x)

# Evaluation function
def evaluate_model(model, dataloader):
    model.eval()
    all_preds = []
    all_targets = []
    with torch.no_grad():
        for xb, yb in dataloader:
            xb, yb = xb.to(device), yb.to(device)
            xb = xb.view(xb.size(0), -1)  # Flatten
            logits = model(xb)
            preds = torch.argmax(logits, dim=1)
            all_preds.append(preds.cpu().numpy())
            all_targets.append(yb.cpu().numpy())
    all_preds = np.concatenate(all_preds)
    all_targets = np.concatenate(all_targets)
    acc = accuracy_score(all_targets, all_preds)
    return acc

# Training function for one epoch
def train_one_epoch(model, optimizer, criterion, dataloader):
    model.train()
    running_loss = 0
    for xb, yb in dataloader:
        xb, yb = xb.to(device), yb.to(device)
        xb = xb.view(xb.size(0), -1)  # Flatten
        optimizer.zero_grad()
        logits = model(xb)
        loss = criterion(logits, yb)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * xb.size(0)
    return running_loss / len(dataloader.dataset)

# Instantiate model, loss, optimizer
model = OmniglotMLP().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Optional: Enable cudnn benchmark for performance if CUDA
if device.type == 'cuda':
    torch.backends.cudnn.benchmark = True

# Training loop
num_epochs = 150
for epoch in range(num_epochs):
    train_loss = train_one_epoch(model, optimizer, criterion, train_loader)
    if (epoch + 1) % 5 == 0 or epoch == 0:
        val_acc = evaluate_model(model, val_loader)
        print(f"Epoch {epoch+1}/{num_epochs} - Train Loss: {train_loss:.4f} - Val Accuracy: {val_acc:.4f}")

# Final evaluation on test set
test_acc = evaluate_model(model, test_loader)
print(f"\nFinal Test Accuracy: {test_acc:.4f}")


Epoch 1/150 - Train Loss: 3.9461 - Val Accuracy: 0.0266
Epoch 5/150 - Train Loss: 3.9162 - Val Accuracy: 0.0199
Epoch 10/150 - Train Loss: 3.9137 - Val Accuracy: 0.0199
Epoch 15/150 - Train Loss: 3.9105 - Val Accuracy: 0.0166
Epoch 20/150 - Train Loss: 3.8874 - Val Accuracy: 0.0233
Epoch 25/150 - Train Loss: 3.8297 - Val Accuracy: 0.0465
Epoch 30/150 - Train Loss: 3.7909 - Val Accuracy: 0.0498
Epoch 35/150 - Train Loss: 3.7763 - Val Accuracy: 0.0465
Epoch 40/150 - Train Loss: 3.7469 - Val Accuracy: 0.0465
Epoch 45/150 - Train Loss: 3.7436 - Val Accuracy: 0.0399
Epoch 50/150 - Train Loss: 3.7301 - Val Accuracy: 0.0498
Epoch 55/150 - Train Loss: 3.7450 - Val Accuracy: 0.0532
Epoch 60/150 - Train Loss: 3.7190 - Val Accuracy: 0.0764
Epoch 65/150 - Train Loss: 3.7264 - Val Accuracy: 0.0797
Epoch 70/150 - Train Loss: 3.6988 - Val Accuracy: 0.0532
Epoch 75/150 - Train Loss: 3.6832 - Val Accuracy: 0.0631
Epoch 80/150 - Train Loss: 3.6974 - Val Accuracy: 0.0864
Epoch 85/150 - Train Loss: 3.6649

In [48]:
import math

def generations_to_one(pop_size, survival_rate):
    # Calculate how many generations until population shrinks to 1 or less
    return 1 + math.ceil(math.log(1 / pop_size) / math.log(survival_rate))

def total_epochs_final_model(pop_size, survival_rate, epochs_per_gen):
    G = generations_to_one(pop_size, survival_rate)
    total = G * epochs_per_gen
    return total, G

# Example usage:
pop_size = 1000
survival_rate = 0.6
epochs_per_gen = 10  # for example


total, generations = total_epochs_final_model(pop_size, survival_rate, epochs_per_gen)
print(f"Generations: {generations}, Total epochs for final model: {total}")


Generations: 15, Total epochs for final model: 150
