In [1]:
import numpy as np
import pandas as pd
import sys, copy, os, shutil, time
import torch
import torch.nn as nn
import utils
from resnet import resnet20, resnet32, resnet44
from IPython.display import clear_output
from tqdm.notebook import tqdm

# for loading datasets
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10, MNIST, FashionMNIST

# make a directory for logs
if "logs" not in os.listdir():
    os.mkdir("logs")
    
# NO FANCY TRICKS -- JUST RESIZE TO 32 x 32!
train_transforms = transforms.Compose([transforms.Resize((32, 32)), transforms.ToTensor()])
    
# let's set a batch size
batch_size = 512

# USE A GPU IF POSSIBLE!
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

In [20]:
# for CIFAR10, let's see which train and test points each model got correct

# begin by loading our data
data_train = CIFAR10(root="./data", train=True, download=True, transform=train_transforms)
data_test = CIFAR10(root="./data", train=False, download=True, transform=train_transforms)
trainloader = torch.utils.data.DataLoader(data_train, batch_size=batch_size, shuffle=False)
testloader = torch.utils.data.DataLoader(data_test, batch_size=batch_size, shuffle=False)

# create a dataframe with N + 4 columns (model-name + variant + seed)
cifar_train_scores = pd.DataFrame(data=None, columns=["arch", "variant", "seed", "epoch"] \
                                  + list(np.arange(50000)))
cifar_test_scores = pd.DataFrame(data=None, columns=["arch", "variant", "seed", "epoch"] \
                                  + list(np.arange(10000)))

# what models do we have available for this dataset?
model_names = [f for f in sorted(os.listdir("models/CIFAR10")) if "seed" in f]

# go thru each of our model
for model_num, model_name in enumerate(model_names):
    
    # start time
    start = time.time()
    
    # first figure out what architecture we need to be loading
    if "cnn" in model_name:
        
        # how many parameters do we have?
        variant = int(model_name.split("params=")[1].split("k")[0])
        seed = int(model_name.split("seed=")[1])
        model_arch = "cnn"
        
        # load the appropriate architecture
        if variant == 25:
            model = utils.CIFAR_CNN25K()
        elif variant == 47:
            model = utils.CIFAR_CNN47K()
        elif variant == 100:
            model = utils.CIFAR_CNN100K()
        
    elif "resnet" in model_name:
        
        # which resnet variant are we loading?
        variant = int(model_name.split("variant=")[1].split("_")[0])
        seed = int(model_name.split("seed=")[1])
        model_arch = "resnet"
        
        # load the appropriate architecture
        if variant == 20:
            model = resnet20()
        elif variant == 32:
            model = resnet32()
        elif variant == 44:
            model = resnet44()
            
    # create our row header for this row
    header = [model_arch, variant, seed]
    
    # REVISION: ONLY LOOKING AT THE LAST EPOCH:
    for epoch in range(99, 100):
        
        # load in the weights for this epoch
        model.load_state_dict(torch.load(f"models/CIFAR10/{model_name}/{str(epoch).zfill(3)}.pth"))
        model.to(device); model.eval()
        
        ###### TRAINING SET METRICS
        
        # create a list of one-hot encoded accuracies
        train_accs = np.array([])
        
        # compute accuracy on training set
        for data in tqdm(trainloader):
            
            # unpack our x's and y's -- concatenate if necessary
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            
            # do not use grad - make our predictions + record our accuracies
            with torch.no_grad():
                outputs = model(inputs)
                _, predictions = torch.max(outputs.data, 1)
                train_accs = np.concatenate([train_accs, (predictions == labels).cpu().numpy()])
            
        # add to our row
        cifar_train_scores.loc[len(cifar_train_scores.index)] = header + [epoch] + list(train_accs)
        
        ###### TESTING SET METRICS
        
        # create a list of one-hot encoded accuracies
        test_accs = np.array([])
        
        # compute accuracy on the TEST set
        for data in tqdm(testloader):
            
            # unpack our x's and y's -- concatenate if necessary
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            
            # do not use grad - make our predictions + record our accuracies
            with torch.no_grad():
                outputs = model(inputs)
                _, predictions = torch.max(outputs.data, 1)
                test_accs = np.concatenate([test_accs, (predictions == labels).cpu().numpy()])
            
        # add to our row
        cifar_test_scores.loc[len(cifar_test_scores.index)] = header + [epoch] + list(test_accs)
        
    # compute end time
    end = time.time()
        
    # status update
    if (model_num + 1) % 5 == 0:
        clear_output(wait=True)
        print(f"Finished processing Epoch {str(epoch + 1).zfill(3)} of 100 on Model {str(model_num + 1).zfill(3)} of 300 in {np.round(end - start, 3)} seconds.")

# save our logs at the very end
cifar_train_scores.to_csv("logs/cifar10_train_scores.csv", index=False)
cifar_test_scores.to_csv("logs/cifar10_test_scores.csv", index=False)

Finished processing Epoch 100 of 100 on Model 300 of 300 in 12.115 seconds.
