***Challenge 1***

Here the goal is to train on 25 samples. In this preliminary testbed the evaluation will be done on a 2000 sample validation set. Note in the end the final evaluation will be done on the full CIFAR-10 test set as well as potentially a separate dataset. The validation samples here should not be used for training in any way, the final evaluation will provide only random samples of 25 from a datasource that is not the CIFAR-10 training data.

Feel free to modify this testbed to your liking, including the normalization transformations etc. Note however the final evaluation testbed will have a rigid set of components where you will need to place your answer. The only constraint is the data. Refer to the full project instructions for more information.


Setup training functions. Again you are free to fully modify this testbed in your prototyping within the constraints of the data used. You can use tools outside of pytorch for training models if desired as well although the torchvision dataloaders will still be useful for interacting with the cifar-10 dataset.

In [1]:
import  torch.nn.functional as F
def train(model, device, train_loader, optimizer,  epoch,criterion=F.cross_entropy, display=True):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
    if display:
      print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
          epoch, batch_idx * len(data), len(train_loader.dataset),
          100. * batch_idx / len(train_loader), loss.item()))
    return loss.item()

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.cross_entropy(output, target, size_average=False).item() # sum up batch loss
            pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    return 100. * correct / len(test_loader.dataset)

***Challenge 2***

You may use the same testbed but without the constraints on external datasets or models trained on exeternal datasets. You may not however use any of the CIFAR-10 training set.

In [4]:
import torchvision.models as models

from numpy.random import RandomState
import numpy as np
import torch
import torch.optim as optim
from torch.utils.data import Subset
import timm
import torch.nn as nn


from torchvision import datasets, transforms
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                std=[0.229, 0.224, 0.225])

# We resize images to allow using imagenet pre-trained models, is there a better way?
resize = transforms.Resize(224)

transform_val = transforms.Compose([resize, transforms.ToTensor(), normalize]) #careful to keep this one same
transform_train = transforms.Compose([resize, transforms.ToTensor(), normalize])

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(device) # you will really need gpu's for this part

##### Cifar Data
cifar_data = datasets.CIFAR10(root='.',train=True, transform=transform_train, download=True)

#We need two copies of this due to weird dataset api
cifar_data_val = datasets.CIFAR10(root='.',train=True, transform=transform_val, download=True)

accs = []

# r_model = torchvision.models.resnet18(pretrained=True)    

for seed in range( 5):
    prng = RandomState(seed)
    random_permute = prng.permutation(np.arange(0, 5000))
    classes =  prng.permutation(np.arange(0,10))
    indx_train = np.concatenate([np.where(np.array(cifar_data.targets) == classe)[0][random_permute[0:25]] for classe in classes[0:2]])
    indx_val = np.concatenate([np.where(np.array(cifar_data.targets) == classe)[0][random_permute[25:225]] for classe in classes[0:2]])

    train_data = Subset(cifar_data, indx_train)
    val_data = Subset(cifar_data_val, indx_val)

    print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))

    train_loader = torch.utils.data.DataLoader(train_data,
                                                batch_size=128,
                                                shuffle=True)

    val_loader = torch.utils.data.DataLoader(val_data,
                                            batch_size=128,
                                            shuffle=False)


    model = models.mobilenet_v3_large(pretrained=True)
    # model = r_model
    # model.classifier = nn.Linear(256 * 6 * 6, 10)
    model.classifier = nn.Linear(960, 10)
    
    optimizer = torch.optim.SGD(model.classifier.parameters(),
                                lr=0.01, momentum=0.9,
                                weight_decay=0.0005)
    model.to(device)
    for epoch in range(10):
        train(model, device, train_loader, optimizer, epoch, display=True)

    accs.append(test(model, device, val_loader))

accs = np.array(accs)
print('Acc over 5 instances: %.2f +- %.2f'%(accs.mean(),accs.std()))

cuda
Files already downloaded and verified
Files already downloaded and verified
Num Samples For Training 50 Num Samples For Val 400





Test set: Average loss: 0.4800, Accuracy: 328/400 (82.00%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.4026, Accuracy: 322/400 (80.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.3778, Accuracy: 386/400 (96.50%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.4608, Accuracy: 345/400 (86.25%)

Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.4924, Accuracy: 343/400 (85.75%)

Acc over 5 instances: 86.20 +- 5.59


In [2]:
import torchvision.models as models

from numpy.random import RandomState
import numpy as np
import torch
import torch.optim as optim
from torch.utils.data import Subset
import timm
import torch
import torch.nn as nn
import torch.nn.functional as F

torch.cuda.benchmark = True

from torchvision import datasets, transforms
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                std=[0.229, 0.224, 0.225])

# We resize images to allow using imagenet pre-trained models, is there a better way?
resize = transforms.Resize(224)

transform_val = transforms.Compose([resize, transforms.ToTensor(), normalize]) #careful to keep this one same
transform_train = transforms.Compose([resize, transforms.ToTensor(), normalize])

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(device) # you will really need gpu's for this part

##### Cifar Data
cifar_data = datasets.CIFAR10(root='.',train=True, transform=transform_train, download=True)

#We need two copies of this due to weird dataset api
cifar_data_val = datasets.CIFAR10(root='.',train=True, transform=transform_val, download=True)

accs = []

resnext = timm.create_model('resnext26ts', pretrained=True, num_classes=10)
efficient_net = timm.create_model('tf_efficientnet_b0', pretrained=True, num_classes=10)
resnet18 = timm.create_model('resnet18', pretrained=True, num_classes=10)
mobilenet_v3_large = timm.create_model('mobilenetv3_large_100', pretrained=True, num_classes=10)
cait = timm.create_model('cait_xxs24_224', pretrained=True, num_classes=10)
edgenext = timm.create_model('edgenext_xx_small', pretrained=True, num_classes=10)
inception_v4 = timm.create_model('inception_v4', pretrained=True, num_classes=10)
classfiers = [ inception_v4]

from timm.scheduler.cosine_lr import CosineLRScheduler
from timm.optim import Lion, AdamW, SGDP

def run(model):
    print(model.__class__.__name__)
    accs = np.array([])
    times = np.array([])
    losses = np.array([])   
    model.to(device)
    optimizer = SGDP(model.parameters(), lr=0.01, weight_decay=0.0005)
    
    scheduler = CosineLRScheduler(optimizer, t_initial=5)
    for seed in range(5):
        print("seed", seed)
        prng = RandomState(seed)
        random_permute = prng.permutation(np.arange(0, 5000))
        classes =  prng.permutation(np.arange(0,10))
        indx_train = np.concatenate([np.where(np.array(cifar_data.targets) == classe)[0][random_permute[0:25]] for classe in classes[0:2]])
        indx_val = np.concatenate([np.where(np.array(cifar_data.targets) == classe)[0][random_permute[25:225]] for classe in classes[0:2]])

        train_data = Subset(cifar_data, indx_train)
        val_data = Subset(cifar_data_val, indx_val)

        print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))

        train_loader = torch.utils.data.DataLoader(train_data,
                                                    batch_size=128,
                                                    shuffle=True)

        val_loader = torch.utils.data.DataLoader(val_data,
                                                batch_size=128,
                                                shuffle=False)
        
        start = torch.cuda.Event(enable_timing=True)
        start.record()
        for epoch in range(50):
            loss = train(model, device, train_loader, optimizer, epoch, display=epoch%10==0)
            losses = np.append(losses, loss)
            scheduler.step(loss)
        end = torch.cuda.Event(enable_timing=True)
        end.record()
        torch.cuda.synchronize()

        time = start.elapsed_time(end)
        times = np.append(times, time)        
        acc = test(model, device, val_loader)
        accs = np.append(accs, acc)
    print('Acc over 5 instances: %.2f +- %.2f , time: %.2f'%(accs.mean(),accs.std(), times.mean()/1000))    
    return accs, times, losses

metrics_map = {}
for model in classfiers:
    accs, times, losses = run(model)
    print('Acc over 5 instances: %.2f +- %.2f , time:  %.2f +- %.2f'%(accs.mean(),accs.std(), times.mean()/1000, times.std()/1000))
    metrics_map[model.__class__.__name__] = (accs, times, losses)
    




cuda
Files already downloaded and verified
Files already downloaded and verified
InceptionV4
seed 0
Num Samples For Training 50 Num Samples For Val 400





Test set: Average loss: 0.5619, Accuracy: 309/400 (77.25%)

seed 1
Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.4060, Accuracy: 360/400 (90.00%)

seed 2
Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.2653, Accuracy: 389/400 (97.25%)

seed 3
Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.5515, Accuracy: 315/400 (78.75%)

seed 4
Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.5213, Accuracy: 329/400 (82.25%)

Acc over 5 instances: 85.10 +- 7.51 , time: 53.89
Acc over 5 instances: 85.10 +- 7.51 , time:  53.89 +- 4.02


In [15]:
import pickle

with open('metrics_map-sgd-noscheduler.pkl', 'wb') as f:
    pickle.dump(metrics_map, f)