***Challenge 1***

Here the goal is to train on 25 samples. In this preliminary testbed the evaluation will be done on a 2000 sample validation set. Note in the end the final evaluation will be done on the full CIFAR-10 test set as well as potentially a separate dataset. The validation samples here should not be used for training in any way, the final evaluation will provide only random samples of 25 from a datasource that is not the CIFAR-10 training data. 

Feel free to modify this testbed to your liking, including the normalization transformations etc. Note however the final evaluation testbed will have a rigid set of components where you will need to place your answer. The only constraint is the data. Refer to the full project instructions for more information.


Setup training functions. Again you are free to fully modify this testbed in your prototyping within the constraints of the data used. You can use tools outside of pytorch for training models if desired as well although the torchvision dataloaders will still be useful for interacting with the cifar-10 dataset. 

In [1]:
import torch
import torch.nn as nn 
import torch.nn.functional as F

In [2]:
def train(model, device, train_loader, optimizer, epoch, display=True):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        # loss = model.weight_regularization_loss()
        loss.backward()
        optimizer.step()
    if display:
      print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
          epoch, batch_idx * len(data), len(train_loader.dataset),
          100. * batch_idx / len(train_loader), loss.item()))

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.cross_entropy(output, target, size_average=False).item() # sum up batch loss
            pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    return 100. * correct / len(test_loader.dataset)

In [3]:
import random
import torch.nn as nn

class EvoDCNN(nn.Module):
    def __init__(self, num_layers, num_filters, filter_size, activation_func, pooling_func, hidden_size, initialization, dropout_value, short_connection, batch_norm):
        super(EvoDCNN, self).__init__()
        
        layers = []
        in_channels = 3 
        self.short_connection= short_connection
        
        # define the layers of the CNN based on the hyperparameters
        for i in range(num_layers):
            layer = nn.Conv2d(in_channels, num_filters, filter_size)
            initialization(layer.weight)
            layers.append(layer)
            if batch_norm:
              layers.append(nn.BatchNorm2d(num_filters))
            layers.append(activation_func())
            layers.append(nn.Dropout(p=dropout_value))
            # layers.append(pooling_func(filter_size))
            in_channels = num_filters
        
        self.cnn_layers = nn.Sequential(*layers)
        # print( 'num_layers:', num_layers,'num_filters',num_filters,'filter_size:',filter_size,'activation_func:',activation_func,'pooling_func:',pooling_func)
        self.fc_layer = nn.Linear(hidden_size[0]*hidden_size[1]*hidden_size[2], 10) 
        
    def forward(self, x):
        identity = x # save the input tensor for the short connection
        x = self.cnn_layers(x)
        # print('shape ',x.shape[1],x.shape[2],x.shape[3])
        x = x.view(x.size(0), -1) # flatten the output of the CNN layers
        # r = x.shape[1]#*x.shape[2]#*x.shape[3]
        # x = nn.Linear(r, 10) 
        x = self.fc_layer(x)
        if self.short_connection: # add the input tensor to the output of the convolutional block
            identity = identity.view(identity.size(0), -1) # resize identity tensor
            identity = identity[:, :x.shape[1]] # match the number of output features
            x = x + identity
        return x


def calculate_size(num_layers, num_filters, filter_size, activation_func, pooling_func, input_size, dropout_value, batch_norm):
    # Define a sequential model with the specified number of layers
    model = nn.Sequential()
    in_channels = input_size[0]
    # print(in_channels,num_filters, filter_size)
    for i in range(num_layers):
        model.add_module(f'conv{i+1}', nn.Conv2d(in_channels, num_filters, filter_size))
        model.add_module(f'batchnorm{i+1}', nn.BatchNorm2d(num_filters))
        model.add_module(f'activation{i+1}', activation_func())
        model.add_module(f'dropout{i+1}',nn.Dropout(p=dropout_value))
        model.add_module(f'batch_norm{i+1}',nn.BatchNorm2d(num_filters))
        # model.add_module(f'pooling{i+1}', pooling_func(filter_size))
        in_channels = num_filters

    # Calculate the output size of the model
    x = torch.randn(input_size).unsqueeze(0)
    output_size = model(x).size()[1:]
    return output_size


In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Define the hyperparameters to search over
hyperparameter_ranges = {
    'num_layers': range(2, 5),
    'num_filters': [8, 16, 32, 64, 128, 256],
    'filter_size': [3, 5, 7],
    'activation_func': [nn.ReLU, nn.Sigmoid],
    'pooling_func': [nn.MaxPool2d, nn.AvgPool2d],
    'learning_rate': [0.0001, 0.001, 0.01, 0.1],
    'optimizer': [optim.Adam, optim.SGD, optim.RMSprop, optim.Adagrad, optim.Adadelta],
    'initialization': [nn.init.normal_, nn.init.uniform_, nn.init.trunc_normal_, 
                       nn.init.normal_, nn.init.xavier_normal_, nn.init.xavier_uniform_, 
                       nn.init.kaiming_normal_, nn.init.kaiming_uniform_],
    'dropout': [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6],
    'short_connection': [0, 1],
    'batch_norm': [0, 1]
}

# Define the function to generate a random population
def generate_random_population(population_size, hyperparameter_ranges):
    population = []
    for i in range(population_size):
        member = []
        member.append(random.choice(hyperparameter_ranges['num_layers']))
        member.append(random.choice(hyperparameter_ranges['num_filters']))
        member.append(random.choice(hyperparameter_ranges['filter_size']))
        member.append(random.choice(hyperparameter_ranges['activation_func']))   
        member.append(random.choice(hyperparameter_ranges['pooling_func']))
        member.append(random.choice(hyperparameter_ranges['learning_rate']))
        member.append(random.choice(hyperparameter_ranges['optimizer']))
        member.append(random.choice(hyperparameter_ranges['initialization']))
        member.append(random.choice(hyperparameter_ranges['dropout']))
        member.append(random.choice(hyperparameter_ranges['short_connection']))
        member.append(random.choice(hyperparameter_ranges['batch_norm']))
        population.append(member)
    return population


def evaluate_population(members, train_loader, test_loader, num_epochs=5):
    best_member = None
    best_loss = float('inf')
    running_accuracy = 0.0
    for i, member in enumerate(members):
        input_size = (3,32,32)
        y = calculate_size(member[0], member[1], member[2], member[3], member[4], input_size, member[8], member[10])
        model = EvoDCNN(member[0], member[1], member[2], member[3], member[4], y, member[7], member[8], member[9], member[10])
        #model = model.to(device)
        if torch.cuda.is_available():
            model.cuda()
        model.train()
        criterion = nn.CrossEntropyLoss()
        optimizer = member[6](model.parameters(), lr=member[5])
        for epoch in range(num_epochs):
            running_loss = 0.0     
            for j, data in enumerate(train_loader, 0):
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                running_loss += loss.item()
            print(f"Epoch {epoch+1}: loss {running_loss/len(train_loader)}")
        correct = 0
        total = 0
        model.eval()
        with torch.no_grad():
            for data in test_loader:
                images, labels = data
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        accuracy = 100 * correct / total
        # Add their accuracy
        member.append(accuracy)
        print(f"Accuracy for member {i+1}: {accuracy}%")
        # print(running_accuracy,' < ',accuracy)
        if running_accuracy < accuracy:
            running_accuracy = accuracy
            best_member = member
            # print('current bm:',best_member)
    print('Best member is:', best_member)
    return best_member, members


In [5]:
# Define the mutation operator
def mutation(member, hyperparameter_ranges):
   mutated_member = member.copy()
   param = ['num_layers', 'num_filters', 'filter_size', 'activation_func', 'pooling_func','learning_rate', 'optimizer', 'initialization', 'dropout', 'short_connection', 'batch_norm']
   i = random.randint(0, 10)
   new_param = random.choice(hyperparameter_ranges[param[i]])
   while new_param == mutated_member[i]:
      new_param = random.choice(hyperparameter_ranges[param[i]])
   
   mutated_member[i] = new_param

   return mutated_member

def crossover(parent1, parent2):
    crossover_point = np.random.randint(1, len(parent1))
    child_chromosomes = parent1[:crossover_point] + parent2[crossover_point:]
    return child_chromosomes

def select_parents(population, num_parents):
    fitness_scores = [p[-1] for p in population]
    total_fitness = sum(fitness_scores)
    probabilities = [score/total_fitness for score in fitness_scores]
    parents = []
    for i in range(num_parents):
        parent_idx = np.random.choice(len(population), p=probabilities)
        parents.append(population[parent_idx])
    return parents


In [6]:
# import torch.utils.data as data
import torch.utils.data as data_utils

def select_best_model(initial_train_data): 
    # Convert initial_train_data into a dataset object
    train_dataset = initial_train_data.dataset

    # Set the size of the test set
    test_size = int(len(train_dataset) * 0.2) # 20% of the data will be used for testing

    # Calculate the size of the training set
    train_size = len(train_dataset) - test_size

    # Use random_split to create a train and test dataset
    train_dataset, test_dataset = data_utils.random_split(train_dataset, [train_size, test_size])

    # Create a DataLoader for the train and test dataset
    train_loader = data_utils.DataLoader(train_dataset, batch_size=128, shuffle=True)
    test_loader = data_utils.DataLoader(test_dataset, batch_size=128, shuffle=True)
    
    # Generate the initial population
    population_size = 5
    initial_population = generate_random_population(population_size, hyperparameter_ranges)
    
    # Evaluate the initial population
    best_member, members = evaluate_population(initial_population, train_loader, test_loader)
    
    # Run the genetic algorithm
    num_generations = 1
    combined_population = members
    for generation in range(num_generations):
        print('Generation: ', generation)
        # Select the parents for crossover
        parents = select_parents(members, 2)
        
        # Perform crossover and mutation to create the new population
        new_population = []
        for i in range(population_size):
            parent1 = parents[random.randint(0, len(parents)-1)]
            parent2 = parents[random.randint(0, len(parents)-1)]
            child = crossover(parent1, parent2)
            child = mutation(child, hyperparameter_ranges)
            new_population.append(child)
        
        # Evaluate the new population
        new_best_member, new_members = evaluate_population(new_population, train_loader, test_loader)
        combined_population = combined_population + new_members
        members = new_members
    sorted_list = sorted(combined_population, key=lambda x: x[-1])
    final_best_member = sorted_list[-1]
    print("Best member:", final_best_member)
    return final_best_member


In [None]:
from numpy.random import RandomState
import numpy as np
import torch.optim as optim
from torch.utils.data import Subset

  
from torchvision import datasets, transforms
normalize = transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))

transform_val = transforms.Compose([transforms.ToTensor(), normalize]) #careful to keep this one same
transform_train = transforms.Compose([transforms.ToTensor(), normalize]) 

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

##### Cifar Data
cifar_data = datasets.CIFAR10(root='.',train=True, transform=transform_train, download=True)
    
#We need two copies of this due to weird dataset api 
cifar_data_val = datasets.CIFAR10(root='.',train=True, transform=transform_val, download=True)
    

accs = []

for seed in range(1, 5):
  prng = RandomState(seed)
  random_permute = prng.permutation(np.arange(0, 1000))
  classes =  prng.permutation(np.arange(0,10))
  indx_train = np.concatenate([np.where(np.array(cifar_data.targets) == classe)[0][random_permute[0:25]] for classe in classes[0:2]])
  indx_val = np.concatenate([np.where(np.array(cifar_data.targets) == classe)[0][random_permute[25:225]] for classe in classes[0:2]])


  train_data = Subset(cifar_data, indx_train)
  val_data = Subset(cifar_data_val, indx_val)

  print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))
  
  train_loader = torch.utils.data.DataLoader(train_data,
                                             batch_size=128, 
                                             shuffle=True)

  val_loader = torch.utils.data.DataLoader(val_data,
                                           batch_size=128, 
                                           shuffle=False)
  


  best_model = select_best_model(train_data)
  input_size = (3,32,32)
  y = calculate_size(best_model[0], best_model[1], best_model[2], best_model[3], best_model[4], input_size, best_model[8], best_model[10])
  model = EvoDCNN(best_model[0], best_model[1], best_model[2], best_model[3], best_model[4], y, best_model[7], best_model[8], best_model[9], best_model[10])
  model.to(device)
  optimizer = best_model[6](model.parameters(), lr=best_model[5])

  for epoch in range(100):
    train(model, device, train_loader, optimizer, epoch, display=epoch%5==0)
    
  accs.append(test(model, device, val_loader))

accs = np.array(accs)
print('Acc over 5 instances: %.2f +- %.2f'%(accs.mean(),accs.std()))


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 45091732.39it/s]


Extracting ./cifar-10-python.tar.gz to .
Files already downloaded and verified
Num Samples For Training 50 Num Samples For Val 400
Epoch 1: loss 2.467020674635427
Epoch 2: loss 2.3552102311350667
Epoch 3: loss 2.355137522609089
Epoch 4: loss 2.35498519141834
Epoch 5: loss 2.222622519103102
Accuracy for member 1: 24.62%
Epoch 1: loss 8.218504267378737
Epoch 2: loss 2.358458701033181
Epoch 3: loss 2.36049069535618
Epoch 4: loss 2.3616379282345026
Epoch 5: loss 2.360328037517901
Accuracy for member 2: 10.25%
Epoch 1: loss 1.9286543926872766
Epoch 2: loss 1.6300531770474613
Epoch 3: loss 1.4958377410047732
Epoch 4: loss 1.4017783643338626
Epoch 5: loss 1.31934947441942
Accuracy for member 3: 53.4%
Epoch 1: loss 2.9033129283795343
Epoch 2: loss 2.3079451699607287
Epoch 3: loss 2.3088459587706542
Epoch 4: loss 2.309035978378198
Epoch 5: loss 2.309220004005554
Accuracy for member 4: 10.06%
Epoch 1: loss 107740892.43127304
Epoch 2: loss 2.692625802926743
Epoch 3: loss 2.307408584954259
Epoch 4