In [1]:
import  torch.nn.functional as F
def train(model, device, train_loader, optimizer,  epoch,criterion=F.cross_entropy, display=True):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
    if display:
      print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
          epoch, batch_idx * len(data), len(train_loader.dataset),
          100. * batch_idx / len(train_loader), loss.item()))
    return loss.item()

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.cross_entropy(output, target, size_average=False).item() # sum up batch loss
            pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    return 100. * correct / len(test_loader.dataset)

In [4]:

from numpy.random import RandomState
import numpy as np
import torch
from torch.utils.data import Subset
import timm
import torch

torch.cuda.benchmark = True

from torchvision import datasets, transforms
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                std=[0.229, 0.224, 0.225])

# We resize images to allow using imagenet pre-trained models, is there a better way?
resize = transforms.Resize(224)

transform_val = transforms.Compose([resize, transforms.ToTensor(), normalize]) #careful to keep this one same
transform_train = transforms.Compose([resize, transforms.ToTensor(), normalize])

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(device) # you will really need gpu's for this part

##### Cifar Data
cifar_data = datasets.CIFAR10(root='.',train=True, transform=transform_train, download=True)

#We need two copies of this due to weird dataset api
cifar_data_val = datasets.CIFAR10(root='.',train=True, transform=transform_val, download=True)

accs = []


edgenext_xx_small = timm.create_model('edgenext_xx_small', pretrained=True, num_classes=10)

classfiers = [ edgenext_xx_small]

from timm.scheduler.cosine_lr import CosineLRScheduler
from timm.optim import SGDP

def run(model):
    print(model.__class__.__name__)
    accs = np.array([])
    times = np.array([])
    losses = np.array([])   
    model.to(device)
    optimizer = SGDP(model.parameters(), lr=0.01, weight_decay=0.0005)
    
    scheduler = CosineLRScheduler(optimizer, t_initial=5)
    for seed in range(50):
        print("seed", seed)
        prng = RandomState(seed)
        random_permute = prng.permutation(np.arange(0, 5000))
        classes =  prng.permutation(np.arange(0,10))
        indx_train = np.concatenate([np.where(np.array(cifar_data.targets) == classe)[0][random_permute[0:25]] for classe in classes[0:2]])
        indx_val = np.concatenate([np.where(np.array(cifar_data.targets) == classe)[0][random_permute[25:225]] for classe in classes[0:2]])

        train_data = Subset(cifar_data, indx_train)
        val_data = Subset(cifar_data_val, indx_val)

        print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))

        train_loader = torch.utils.data.DataLoader(train_data,
                                                    batch_size=128,
                                                    shuffle=True)

        val_loader = torch.utils.data.DataLoader(val_data,
                                                batch_size=128,
                                                shuffle=False)
        
        start = torch.cuda.Event(enable_timing=True)
        start.record()
        for epoch in range(50):
            loss = train(model, device, train_loader, optimizer, epoch, display=epoch%10==0)
            losses = np.append(losses, loss)
            scheduler.step(loss)
        end = torch.cuda.Event(enable_timing=True)
        end.record()
        torch.cuda.synchronize()

        time = start.elapsed_time(end)
        times = np.append(times, time)        
        acc = test(model, device, val_loader)
        accs = np.append(accs, acc)
    print('Acc over 5 instances: %.2f +- %.2f , time: %.2f'%(accs.mean(),accs.std(), times.mean()/1000))    
    return accs, times, losses

metrics_map = {}
for model in classfiers:
    accs, times, losses = run(model)
    print('Acc over 5 instances: %.2f +- %.2f , time:  %.2f +- %.2f'%(accs.mean(),accs.std(), times.mean()/1000, times.std()/1000))
    metrics_map[model.__class__.__name__] = (accs, times, losses)
    

cuda
Files already downloaded and verified
Files already downloaded and verified
EdgeNeXt
seed 0
Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6493, Accuracy: 260/400 (65.00%)

seed 1
Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.2982, Accuracy: 358/400 (89.50%)

seed 2
Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.2428, Accuracy: 362/400 (90.50%)

seed 3
Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.5652, Accuracy: 301/400 (75.25%)

seed 4
Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.6204, Accuracy: 296/400 (74.00%)

seed 5
Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.5130, Accuracy: 328/400 (82.00%)

seed 6
Num Samples For Training 50 Num Samples For Val 400

Test set: Average loss: 0.4309, Accuracy: 330/400 (82.50%)

seed 7
Num Samples For Training 50 Num Samples For Val 400

Test set: 