In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import random
from datasets import load_dataset
import copy


In [2]:
import sys
import os

# Get the absolute path to the parent directory of 'src'
project_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
sys.path.append(project_root)

from src.algos import BP
from src.nets import MLP, LSTM
from src.train import train_model
from src.utils import create_binary_task

NameError: name 'Optional' is not defined

In [34]:
from datasets import load_dataset
import torch
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader

# Load tiny-imagenet from Hugging Face
ds = load_dataset("zh-plus/tiny-imagenet")

# Set random seed for reproducibility
torch.manual_seed(42)

# Create a PyTorch Dataset wrapper for Hugging Face dataset
class TinyImageNetDataset(Dataset):
    def __init__(self, hf_dataset, transform=None):
        self.dataset = hf_dataset
        self.transform = transform
        self.targets = [item['label'] for item in self.dataset]

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        item = self.dataset[idx]
        image = item['image']
        label = item['label']
        
        if self.transform:
            image = self.transform(image)
            
        return image, label

# Tiny ImageNet preprocessing - adjusted for 64x64 images
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                       std=[0.229, 0.224, 0.225])
])

transform = None

# Create train and test datasets
train_dataset = TinyImageNetDataset(ds['train'], transform=transform)
test_dataset = TinyImageNetDataset(ds['valid'], transform=transform)

# Create data loaders
train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=128,
    shuffle=True
)

test_loader = DataLoader(
    dataset=test_dataset,
    batch_size=128,
    shuffle=False
)

In [25]:

# Define hyperparameters
input_size = 64 * 64 * 3  # ImageNet images are 224x224x3
hidden_size = 256  # Increased for more complex data
num_classes = 2  # Binary classification
num_epochs = 10  # Reduced epochs due to larger dataset
batch_size = 128
learning_rate = 0.001
dropout_rate = 0.2

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [26]:

# Initialize models for binary classification
mlp_model = MLP(
    input_size=input_size,
    hidden_size=hidden_size,
    num_classes=num_classes,
    dropout_rate=dropout_rate
).to(device)

lstm_model = LSTM(
    input_size=64 * 3,  # Each row of the image * RGB channels
    hidden_size=hidden_size,
    num_layers=1,
    num_classes=num_classes,
    dropout_rate=dropout_rate
).to(device)

bp_mlp = BP(
    net=mlp_model,
    step_size=0.001,
    loss='nll',
    opt='adam',
    weight_decay=0.0,
    to_perturb=False,
    perturb_scale=0.1,
    device=str(device)
)

bp_lstm = BP(
    net=lstm_model,
    step_size=0.001,
    loss='nll',
    opt='adam',
    weight_decay=0.0,
    to_perturb=False,
    perturb_scale=0.1,
    device=str(device)
)

bp_mlp_perturb = BP(
    net=copy.deepcopy(mlp_model),
    step_size=0.001,
    loss='nll',
    opt='adam',
    weight_decay=0.0,
    to_perturb=True,
    perturb_scale=0.1,
    device=str(device)
)

bp_lstm_perturb = BP(
    net=copy.deepcopy(lstm_model),
    step_size=0.001,
    loss='nll',
    opt='adam',
    weight_decay=0.0,
    to_perturb=True,
    perturb_scale=0.1,
    device=str(device)
)

experiments = [
    ('mlp', bp_mlp),
    ('lstm', bp_lstm),
    ('mlp_perturb', bp_mlp_perturb),
    ('lstm_perturb', bp_lstm_perturb)
]

In [31]:
# Create 1000 random class pairs from ImageNet's 1000 classes
all_classes = list(range(1000))  # ImageNet has 1000 classes
binary_tasks = []
random.seed(42)

while len(binary_tasks) < 1000:
    class1, class2 = random.sample(all_classes, 2)
    if (class1, class2) not in binary_tasks and (class2, class1) not in binary_tasks:
        binary_tasks.append((class1, class2))

# Shuffle the tasks
random.shuffle(binary_tasks)


In [32]:
import importlib
from src import utils  # Import the module
importlib.reload(utils)  # Force reload

# Now reimport the specific function
from src.utils import create_binary_task

In [35]:

# Rest of the training loop remains the same
train_logs = {}

for experiment_name, experiment in experiments:
    print(f"Training {experiment_name}...")
    all_train_losses = []
    all_test_accuracies = []
    task_boundaries = []
    
    for task_idx, (class1, class2) in enumerate(binary_tasks):
        print(f"\nStarting Binary Task {task_idx + 1}: Class {class1} vs Class {class2}")
        
        # Create binary datasets
        binary_train = create_binary_task(train_dataset, (class1, class2))
        binary_test = create_binary_task(test_dataset, (class1, class2))
        
        # Train the model on this binary task
        train_losses, test_accuracies = train_model(
            algo=experiment,
            train_data=binary_train,
            test_data=binary_test,
            num_epochs=num_epochs,
            device=device,
            batch_size=batch_size
        )
        
        all_train_losses.extend(train_losses)
        all_test_accuracies.extend(test_accuracies)
        task_boundaries.append(len(all_train_losses))
    
    train_logs[experiment_name] = {
        'all_train_losses': all_train_losses,
        'all_test_accuracies': all_test_accuracies,
        'task_boundaries': task_boundaries
    }

Training mlp...

Starting Binary Task 1: Class 906 vs Class 109


TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'PIL.JpegImagePlugin.JpegImageFile'>

In [None]:
import os

# Ensure 'output' directory exists
output_dir = 'output'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
    print(f"Directory '{output_dir}' was created.")

In [None]:
import pickle

# Save training logs for each experiment
for experiment_name, train_log in train_logs.items():
    # Create filename using experiment name
    log_filename = f"{output_dir}/imagenet_binary_{experiment_name}_logs.pkl"
    
    # Save logs using pickle
    with open(log_filename, 'wb') as f:
        pickle.dump(train_log, f)
    print(f"Training logs saved to: {log_filename}")


In [None]:
for experiment_name, train_log in train_logs.items():
    all_train_losses = train_log['all_train_losses']
    all_test_accuracies = train_log['all_test_accuracies']
    task_boundaries = train_log['task_boundaries']
    
    # Calculate accuracy boundaries
    # Each task runs for num_epochs, so boundaries occur every num_epochs
    accuracy_boundaries = [(i + 1) * num_epochs for i in range(len(task_boundaries) - 1)]

    # 1. Combined Plot: Training Loss and Test Accuracy
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 10))
    
    # Training loss
    ax1.plot(all_train_losses)
    ax1.set_title(f'{experiment_name.upper()} - Training Loss Across Binary Tasks', fontsize=14)
    ax1.set_xlabel('Training Steps', fontsize=12)
    ax1.set_ylabel('Loss', fontsize=12)
    for boundary in task_boundaries:
        ax1.axvline(x=boundary, color='red', linestyle='--', alpha=0.5)

    # Test accuracy
    ax2.plot(all_test_accuracies)
    ax2.set_title(f'{experiment_name.upper()} - Test Accuracy Across Binary Tasks', fontsize=14)
    ax2.set_xlabel('Epochs', fontsize=12)
    ax2.set_ylabel('Accuracy (%)', fontsize=12)
    for boundary in accuracy_boundaries:
        ax2.axvline(x=boundary, color='red', linestyle='--', alpha=0.5)

    plt.tight_layout()
    combined_path = f"{output_dir}/imagenet_binary_{experiment_name}_combined.png"
    plt.savefig(combined_path)
    print(f"Combined figure saved to: {combined_path}")
    plt.show()

    # 2. Separate Plot: Training Loss
    plt.figure(figsize=(8, 6))
    plt.plot(all_train_losses)
    plt.title(f'{experiment_name.upper()} - Training Loss', fontsize=14)
    plt.xlabel('Training Steps', fontsize=12)
    plt.ylabel('Loss', fontsize=12)
    for boundary in task_boundaries:
        plt.axvline(x=boundary, color='red', linestyle='--', alpha=0.5)
    plt.tight_layout()
    loss_path = f"{output_dir}/imagenet_binary_{experiment_name}_loss.png"
    plt.savefig(loss_path)
    print(f"Loss figure saved to: {loss_path}")
    plt.show()

    # 3. Separate Plot: Test Accuracy
    plt.figure(figsize=(8, 6))
    plt.plot(all_test_accuracies)
    plt.title(f'{experiment_name.upper()} - Test Accuracy', fontsize=14)
    plt.xlabel('Epochs', fontsize=12)
    plt.ylabel('Accuracy (%)', fontsize=12)
    for boundary in accuracy_boundaries:
        plt.axvline(x=boundary, color='red', linestyle='--', alpha=0.5)
    plt.tight_layout()
    accuracy_path = f"{output_dir}/imagenet_binary_{experiment_name}_accuracy.png"
    plt.savefig(accuracy_path)
    print(f"Accuracy figure saved to: {accuracy_path}")
    plt.show()