<a href="https://colab.research.google.com/github/nikalitt1/Clementine-Agriculture/blob/main/Population_based_NN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.datasets import Omniglot
import torchvision.transforms as transforms
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import accuracy_score

# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Resize transform: resize to 28x28 + ToTensor()
transform = transforms.Compose([
    transforms.Resize((28, 28)),  # Resize images to 28x28
    transforms.ToTensor(),        # Convert PIL image to tensor and scale to [0,1]
])

# Load Omniglot dataset (background=True for training, False for testing)
train_dataset = Omniglot(root='./data', background=True, download=True, transform=transform)
test_dataset = Omniglot(root='./data', background=False, download=True, transform=transform)

# Combine train and test for one dataset
full_data = train_dataset + test_dataset

# Extract data and labels into numpy arrays
X = []
y = []
for img, label in full_data:
    X.append(img.numpy())  # Keep as image tensor (1, 28, 28)
    y.append(label)

X = np.stack(X)
y = np.array(y)

# Limit to first 50 classes to keep problem manageable
max_classes = 200
mask = y < max_classes
X = X[mask]
y = y[mask]

# Encode labels 0..49
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Train/val/test split: 70% train, 15% val, 15% test
X_trainval, X_test, y_trainval, y_test = train_test_split(
    X, y_encoded, test_size=0.15, random_state=42, stratify=y_encoded
)
X_train, X_val, y_train, y_val = train_test_split(
    X_trainval, y_trainval, test_size=0.1765, random_state=42, stratify=y_trainval
)

# Convert to tensors
X_train_t = torch.tensor(X_train, dtype=torch.float32)
y_train_t = torch.tensor(y_train, dtype=torch.long)
X_val_t = torch.tensor(X_val, dtype=torch.float32)
y_val_t = torch.tensor(y_val, dtype=torch.long)
X_test_t = torch.tensor(X_test, dtype=torch.float32)
y_test_t = torch.tensor(y_test, dtype=torch.long)

# Create dataloaders
batch_size = 64
train_loader = DataLoader(torch.utils.data.TensorDataset(X_train_t, y_train_t), batch_size=batch_size, shuffle=True)
val_loader = DataLoader(torch.utils.data.TensorDataset(X_val_t, y_val_t), batch_size=batch_size)
test_loader = DataLoader(torch.utils.data.TensorDataset(X_test_t, y_test_t), batch_size=batch_size)

# ====== Define CNN Model (unchanged) ======
class OmniglotCNN(nn.Module):
    def __init__(self, num_classes=max_classes):
        super(OmniglotCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),  # input channel=1 (grayscale)
            nn.ReLU(),
            nn.MaxPool2d(2),  # 28x28 -> 14x14
            nn.Dropout(0.25),

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),  # 14x14 -> 7x7
            nn.Dropout(0.25)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 7 * 7, 128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

# ====== Evaluation function (unchanged) ======
def evaluate_model(model, dataloader):
    model.eval()
    all_preds = []
    all_targets = []
    with torch.no_grad():
        for xb, yb in dataloader:
            xb, yb = xb.to(device), yb.to(device)
            logits = model(xb)
            preds = torch.argmax(logits, dim=1)
            all_preds.append(preds.cpu().numpy())
            all_targets.append(yb.cpu().numpy())
    all_preds = np.concatenate(all_preds)
    all_targets = np.concatenate(all_targets)
    acc = accuracy_score(all_targets, all_preds)
    return acc

# ====== Training function for one epoch (adapted for population training) ======
def train_one_epoch(model, optimizer, criterion, dataloader, model_id=None, epoch_num=None, log_every=10):
    model.train()
    running_loss = 0
    print(f"Model {model_id} - Starting Epoch {epoch_num}")
    for batch_idx, (xb, yb) in enumerate(dataloader, 1):
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        logits = model(xb)
        loss = criterion(logits, yb)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if batch_idx % log_every == 0:
            avg_loss = running_loss / log_every
            running_loss = 0
            print(f"Model {model_id} - Epoch {epoch_num} - Batch {batch_idx}/{len(dataloader)} - Loss: {avg_loss:.4f}")
    print(f"Model {model_id} - Finished Epoch {epoch_num}")

# ====== Evolutionary Training loop (population based) ======
def evolution_training_until_one(population_size=10, survival_rate=0.6, epochs_per_gen=10, log_every_batches=10):
    population, optimizers = [], []
    criterion = nn.CrossEntropyLoss()
    for _ in range(population_size):
        model = OmniglotCNN().to(device)
        optimizer = optim.Adam(model.parameters(), lr=0.001)
        population.append(model)
        optimizers.append(optimizer)

    best_model, best_optimizer, best_score = None, None, -float('inf')
    generation = 0

    while len(population) > 1:
        generation += 1
        print(f"\nðŸŒ± Generation {generation} | Population size: {len(population)}")

        for i, (model, optimizer) in enumerate(zip(population, optimizers)):
            for epoch in range(1, epochs_per_gen + 1):
                train_one_epoch(model, optimizer, criterion, train_loader,
                                model_id=i+1, epoch_num=epoch, log_every=log_every_batches)
            print(f"Model {i+1} done training for {epochs_per_gen} epochs in generation {generation}")

        scores = [evaluate_model(m, val_loader) for m in population]
        sorted_indices = np.argsort(scores)[::-1]

        if scores[sorted_indices[0]] > best_score:
            best_score = scores[sorted_indices[0]]
            best_model = population[sorted_indices[0]]
            best_optimizer = optimizers[sorted_indices[0]]
            print(f"  ðŸŽ‰ New best model with accuracy: {best_score:.4f}")

        survivors = max(1, int(len(population) * survival_rate))
        population = [population[i] for i in sorted_indices[:survivors]]
        optimizers = [optimizers[i] for i in sorted_indices[:survivors]]

        if best_model not in population:
            population.append(best_model)
            optimizers.append(best_optimizer)
            print("  ðŸ”„ Best model preserved with elitism")

        print(f"  âœ… Best Acc This Gen: {scores[sorted_indices[0]]:.4f} | Worst: {scores[sorted_indices[-1]]:.4f}")

    torch.save(best_model.state_dict(), "best_omniglot_cnn_model.pth")
    return best_model

# ====== Run Training ======
best_model = evolution_training_until_one(
    population_size=200,
    survival_rate=0.6,
    epochs_per_gen=10,
    log_every_batches=10
)

# ====== Final Evaluation ======
test_acc = evaluate_model(best_model, test_loader) * 100
print(f"\nFinal Test Accuracy: {test_acc:.4f} %")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Model 8 - Finished Epoch 7
Model 8 - Starting Epoch 8
Model 8 - Epoch 8 - Batch 10/88 - Loss: 2.2841
Model 8 - Epoch 8 - Batch 20/88 - Loss: 2.1546
Model 8 - Epoch 8 - Batch 30/88 - Loss: 2.1479
Model 8 - Epoch 8 - Batch 40/88 - Loss: 2.2014
Model 8 - Epoch 8 - Batch 50/88 - Loss: 2.2855
Model 8 - Epoch 8 - Batch 60/88 - Loss: 2.2039
Model 8 - Epoch 8 - Batch 70/88 - Loss: 2.1026
Model 8 - Epoch 8 - Batch 80/88 - Loss: 2.2541
Model 8 - Finished Epoch 8
Model 8 - Starting Epoch 9
Model 8 - Epoch 9 - Batch 10/88 - Loss: 2.1639
Model 8 - Epoch 9 - Batch 20/88 - Loss: 2.1646
Model 8 - Epoch 9 - Batch 30/88 - Loss: 2.1701
Model 8 - Epoch 9 - Batch 40/88 - Loss: 2.1713
Model 8 - Epoch 9 - Batch 50/88 - Loss: 2.2274
Model 8 - Epoch 9 - Batch 60/88 - Loss: 2.2529
Model 8 - Epoch 9 - Batch 70/88 - Loss: 2.1996
Model 8 - Epoch 9 - Batch 80/88 - Loss: 2.0628
Model 8 - Finished Epoch 9
Model 8 - Starting Epoch 10
Model 8 - Epoch 10 -

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.datasets import Omniglot
import torchvision.transforms as transforms
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import accuracy_score

# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Resize transform: resize to 28x28 + ToTensor()
transform = transforms.Compose([
    transforms.Resize((28, 28)),  # Resize images to 28x28
    transforms.ToTensor(),        # Convert PIL image to tensor and scale to [0,1]
])

# Load Omniglot dataset (background=True for training, False for testing)
train_dataset = Omniglot(root='./data', background=True, download=True, transform=transform)
test_dataset = Omniglot(root='./data', background=False, download=True, transform=transform)

# Combine train and test for one dataset
full_data = train_dataset + test_dataset

# Extract data and labels into numpy arrays
X = []
y = []
for img, label in full_data:
    X.append(img.numpy())  # Keep as image tensor (1, 28, 28)
    y.append(label)

X = np.stack(X)
y = np.array(y)

# Limit to first 50 classes to keep problem manageable
max_classes = 200
mask = y < max_classes
X = X[mask]
y = y[mask]

# Encode labels 0..49
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Train/val/test split: 70% train, 15% val, 15% test
X_trainval, X_test, y_trainval, y_test = train_test_split(
    X, y_encoded, test_size=0.15, random_state=42, stratify=y_encoded
)
X_train, X_val, y_train, y_val = train_test_split(
    X_trainval, y_trainval, test_size=0.1765, random_state=42, stratify=y_trainval
)

# Convert to tensors
X_train_t = torch.tensor(X_train, dtype=torch.float32)
y_train_t = torch.tensor(y_train, dtype=torch.long)
X_val_t = torch.tensor(X_val, dtype=torch.float32)
y_val_t = torch.tensor(y_val, dtype=torch.long)
X_test_t = torch.tensor(X_test, dtype=torch.float32)
y_test_t = torch.tensor(y_test, dtype=torch.long)

# Create dataloaders
batch_size = 64
train_loader = DataLoader(torch.utils.data.TensorDataset(X_train_t, y_train_t), batch_size=batch_size, shuffle=True)
val_loader = DataLoader(torch.utils.data.TensorDataset(X_val_t, y_val_t), batch_size=batch_size)
test_loader = DataLoader(torch.utils.data.TensorDataset(X_test_t, y_test_t), batch_size=batch_size)

# ====== Define CNN Model ======
class OmniglotCNN(nn.Module):
    def __init__(self, num_classes=max_classes):
        super(OmniglotCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),  # input channel=1 (grayscale)
            nn.ReLU(),
            nn.MaxPool2d(2),  # 28x28 -> 14x14
            nn.Dropout(0.25),

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),  # 14x14 -> 7x7
            nn.Dropout(0.25)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 7 * 7, 128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

# Evaluation function
def evaluate_model(model, dataloader):
    model.eval()
    all_preds = []
    all_targets = []
    with torch.no_grad():
        for xb, yb in dataloader:
            xb, yb = xb.to(device), yb.to(device)
            logits = model(xb)  # no flatten before CNN
            preds = torch.argmax(logits, dim=1)
            all_preds.append(preds.cpu().numpy())
            all_targets.append(yb.cpu().numpy())
    all_preds = np.concatenate(all_preds)
    all_targets = np.concatenate(all_targets)
    acc = accuracy_score(all_targets, all_preds)
    return acc

# Training function for one epoch
def train_one_epoch(model, optimizer, criterion, dataloader):
    model.train()
    running_loss = 0
    for xb, yb in dataloader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        logits = model(xb)
        loss = criterion(logits, yb)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * xb.size(0)
    return running_loss / len(dataloader.dataset)

# Instantiate model, loss, optimizer
model = OmniglotCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Optional: Enable cudnn benchmark for performance if CUDA
if device.type == 'cuda':
    torch.backends.cudnn.benchmark = True

# Training loop
num_epochs = 120
for epoch in range(num_epochs):
    train_loss = train_one_epoch(model, optimizer, criterion, train_loader)
    if (epoch + 1) % 5 == 0 or epoch == 0:
        val_acc = evaluate_model(model, val_loader)
        print(f"Epoch {epoch+1}/{num_epochs} - Train Loss: {train_loss:.4f} - Val Accuracy: {val_acc:.4f}")

# Final evaluation on test set
test_acc = evaluate_model(model, test_loader) * 100
print(f"\nFinal Test Accuracy: {test_acc:.4f} %")


Epoch 1/120 - Train Loss: 5.3065 - Val Accuracy: 0.0050
Epoch 5/120 - Train Loss: 5.2999 - Val Accuracy: 0.0050
Epoch 10/120 - Train Loss: 5.2998 - Val Accuracy: 0.0050
Epoch 15/120 - Train Loss: 5.2994 - Val Accuracy: 0.0050
Epoch 20/120 - Train Loss: 5.2992 - Val Accuracy: 0.0050
Epoch 25/120 - Train Loss: 5.3003 - Val Accuracy: 0.0050
Epoch 30/120 - Train Loss: 5.2992 - Val Accuracy: 0.0050
Epoch 35/120 - Train Loss: 5.2991 - Val Accuracy: 0.0050
Epoch 40/120 - Train Loss: 5.2991 - Val Accuracy: 0.0050
Epoch 45/120 - Train Loss: 5.2990 - Val Accuracy: 0.0050
Epoch 50/120 - Train Loss: 5.2990 - Val Accuracy: 0.0050
Epoch 55/120 - Train Loss: 5.2990 - Val Accuracy: 0.0050
Epoch 60/120 - Train Loss: 5.2990 - Val Accuracy: 0.0050
Epoch 65/120 - Train Loss: 5.2990 - Val Accuracy: 0.0050
Epoch 70/120 - Train Loss: 5.2990 - Val Accuracy: 0.0050
Epoch 75/120 - Train Loss: 5.2990 - Val Accuracy: 0.0050
Epoch 80/120 - Train Loss: 5.2990 - Val Accuracy: 0.0050
Epoch 85/120 - Train Loss: 5.2990

In [6]:
import math

def generations_to_one(pop_size, survival_rate):
    # Calculate how many generations until population shrinks to 1 or less
    return 1 + math.ceil(math.log(1 / pop_size) / math.log(survival_rate))

def total_epochs_final_model(pop_size, survival_rate, epochs_per_gen):
    G = generations_to_one(pop_size, survival_rate)
    total = G * epochs_per_gen
    return total, G

# Example usage:
pop_size = 200
survival_rate = 0.6
epochs_per_gen = 10  # for example


total, generations = total_epochs_final_model(pop_size, survival_rate, epochs_per_gen)
print(f"Generations: {generations}, Total epochs for final model: {total}")


Generations: 12, Total epochs for final model: 120


In [62]:
import torch
torch.cuda.empty_cache()