In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
%cd / content/gdrive/MyDrive/cs394n_project/CS394N
! pip3 install -r requirements.txt

In [None]:
import sys
sys.path.append('/content/drive/MyDrive/cs394n_project/CS394N/src')

In [3]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim

import matplotlib.pyplot as plt
import numpy as np

from utils.nets import *
from utils.model_tools import *
from utils.feature_extractor import *
from utils.dataset_tools import *
from utils.cosine_similarity import *

In [4]:
# Constants

LEARNING_RATE = 0.001 # Different for CIFAR100
EXP_DECAY = 0.0001

batch_size = 64

# Files
FNIST_model_no_boot_bag_file = "./logs/fnist_no_boot_bag.pt"

In [5]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

In [19]:
# Loading general Fashion MNIST trainsets/testsets: https://github.com/zalandoresearch/fashion-mnist

transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5), (0.5))])


FMNIST_train_gen = torchvision.datasets.FashionMNIST(root='./data', train=True,
                                        download=True, transform=transform)
FMNIST_trainloader_gen = torch.utils.data.DataLoader(FMNIST_train_gen, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

FMNIST_test_gen = torchvision.datasets.FashionMNIST(root='./data', train=False,
                                       download=True, transform=transform)
FMNIST_testloader_gen = torch.utils.data.DataLoader(FMNIST_test_gen, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

FMNIST_classes = {'T-shirt/top', 'Trouser','Pullover','Dress','Coat','Sandal','Shirt','Sneaker','Bag','Ankle boot'}


# Paper leaves out 'Ankle boot' and 'Bag' class, which are indices 8 and 9 respectively
# TODO: clean this up to use the nice subset code

no_boot_bag_train_idx = np.where((np.array(FMNIST_train_gen.targets) != 8) & 
                        (np.array(FMNIST_train_gen.targets) != 9))[0]
no_boot_bag_train_subset = torch.utils.data.Subset(FMNIST_train_gen, no_boot_bag_train_idx)
no_boot_bag_train_dl = torch.utils.data.DataLoader(no_boot_bag_train_subset, batch_size=batch_size, shuffle=True, num_workers=2)

no_boot_bag_test_idx = np.where((np.array(FMNIST_test_gen.targets) != 8) & 
                        (np.array(FMNIST_test_gen.targets) != 9))[0]
no_boot_bag_test_subset = torch.utils.data.Subset(FMNIST_test_gen, no_boot_bag_test_idx)
no_boot_bag_test_dl = torch.utils.data.DataLoader(no_boot_bag_test_subset, batch_size=batch_size, shuffle=True, num_workers=2)

boot_train_idx = np.where((np.array(FMNIST_train_gen.targets) == 9))[0]
boot_train_subset = torch.utils.data.Subset(FMNIST_train_gen, boot_train_idx)
boot_train_dl = torch.utils.data.DataLoader(boot_train_subset, batch_size=batch_size, shuffle=True, num_workers=2)

no_bag_test_idx = np.where((np.array(FMNIST_test_gen.targets) != 8))[0]
no_bag_test_subset = torch.utils.data.Subset(FMNIST_test_gen, no_bag_test_idx)
no_bag_test_dl = torch.utils.data.DataLoader(no_bag_test_subset, batch_size=1, shuffle=True, num_workers=2)


test = torch.utils.data.ConcatDataset([boot_train_subset, no_bag_test_subset])

In [20]:
print(len(boot_train_subset))
print(len(no_bag_test_subset))
print(len(test))

6000
9000
15000


In [None]:
criterion = nn.CrossEntropyLoss()
linear_model = LinearFashionMNIST(8)
FMNIST_optim = optim.Adam(linear_model.parameters(), lr=LEARNING_RATE)

num_epochs = 15

decay_rate = (EXP_DECAY/LEARNING_RATE)**(1/num_epochs)

lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer=FMNIST_optim, gamma=decay_rate) 
# TODO: we need to use the scheduler for cnn too if we use that

In [None]:
# Training our base model with 8 classes

train_losses = []
test_losses = []

for epoch in range(num_epochs):
    train_loss = train(no_boot_bag_train_dl, linear_model, criterion, FMNIST_optim, 'cpu')
    test_loss = test(no_boot_bag_test_dl, linear_model, criterion, 'cpu')
    
    print("Epoch", epoch, "train loss:", train_loss, "test loss:", test_loss)
    
    lr_scheduler.step()
    
    train_losses.append(train_loss)
    test_losses.append(test_loss)
    
print("Finished training, saving to", FNIST_model_no_boot_bag_file)
torch.save(linear_model.state_dict(), FNIST_model_no_boot_bag_file)

In [None]:
# Extracting features from linear model trained on 8 classes

linear_model = LinearFashionMNIST_alt(28*28, 8)

fmnist_file = './weights/linear_fashionmnist_holdout_[8, 9].pt'

linear_model.load_state_dict(torch.load(fmnist_file))
linear_model.eval()

last_layer = 'input_layer'
print("Layer to be extracted:", last_layer)

fnist_feature_ext = FeatureExtractor(linear_model, [last_layer])

In [None]:
fmnist_classes = list(range(8)) + [9]

class_subsets, class_idxs, subset_size = generate_dls(FMNIST_train_gen, fmnist_classes)

In [None]:
X, y, subset_size = extract_features(fnist_feature_ext, fmnist_classes, class_subsets, subset_size)
avgs = get_lda_avgs(X, y, subset_size)

In [None]:
sim_scores = get_similarity_vec(avgs)
print(sim_scores)

# this should show that our third-to-last and last values (for sandals and sneakers respectively) are
# most similar to ankle boot (the closer to 0.5 the more similar)

with open(r'./data/fmnist_sim_scores_boot.txt', 'w') as fp:
    for s in sim_scores:
        fp.write("%s\n" % s)

In [None]:
with open(r'./data/fmnist_sim_scores_boot.txt', 'r') as fp:
    sim_scores = [float(i) for i in fp.readlines()]

# Add new class to the linear model
linear_model_new_class = add_output_nodes(fmnist_file)

print(linear_model_new_class.state_dict())

sim_sum = sum(sim_scores)

sim_norms = [x/sim_sum for x in sim_scores]
print(sim_norms)

# they do this weird thing where the sample size for the boots class is set at 75 and everything not above a certain threshold is set to the same number of samples.
# you can see in the paper (not appendix) this figure, we've mostly approximated it. They also specify N=350 total.
boots_sample_size = 75
sim_sample_sizes = [27 if x < 0.2 else int(x * boots_sample_size*3.52) for x in sim_norms] + [75]
print(sim_sample_sizes)
print(sum(sim_sample_sizes))

fig = plt.figure(figsize = (8, 5))
plt.bar([str(x) for x in fmnist_classes], sim_sample_sizes, color ='maroon', width = 0.9)
plt.show()

In [None]:
# Create a dataloader which contains the samples in the distribution as described above
from random import sample

sampled_idxs = []

for i in range(len(fmnist_classes)):
    idx_sample = sample(class_idxs[i].tolist(), sim_sample_sizes[i])
    sampled_idxs += idx_sample

swil_train_subset = torch.utils.data.Subset(FMNIST_train_gen, sampled_idxs)

swil_train_dl = torch.utils.data.DataLoader(swil_train_subset, batch_size=1, shuffle=True, num_workers=2)

# what to do for testing??

In [None]:
def train(dataloader, model, loss_fn, optimizer, device, swap=False, swap_labels=[]) -> float:
    '''
        Model training loop. Performs a single epoch of model updates.
        
        * USAGE *
        Within a training loop of range(num_epochs).

        * PARAMETERS *
        dataloader: A torch.utils.data.DataLoader object
        model: A torch model which subclasses torch.nn.Module
        loss_fn: A torch loss function, such as torch.nn.CrossEntropyLoss
        optimizer: A torch.optim optimizer
        device: 'cuda' or 'cpu'

        * RETURNS *
        float: The model's average epoch loss 
    '''

    size = len(dataloader.dataset)
    train_loss = 0

    model.train()
    for batch, (X, y) in enumerate(dataloader):
        if swap:
            for i in range(len(y)):
                if y[i] == swap_labels[0]:
                    y[i] = swap_labels[1]
                    
        X, y = X.to(device), y.to(device)

        optimizer.zero_grad()
        
        # Compute prediction error
        pred = model(X)

        # Backpropagation
        
        loss = loss_fn(pred, y)
        
        loss.backward()
        optimizer.step()

        # Append lists
        train_loss += loss.item()

        if batch % 1000 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

    return train_loss/len(dataloader)


def test(dataloader, model, loss_fn, device, swap=False, swap_labels=[], classes = 9) -> float:
    '''
        Model test loop. Performs a single epoch of model updates.

        * USAGE *
        Within a training loop of range(num_epochs) to perform epoch validation, or after training to perform testing.

        * PARAMETERS *
        dataloader: A torch.utils.data.DataLoader object
        model: A torch model which subclasses torch.nn.Module
        loss_fn: A torch loss function, such as torch.nn.CrossEntropyLoss
        optimizer: A torch.optim optimizer
        device: 'cuda' or 'cpu'

        * RETURNS *
        float: The average test loss
    '''

    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss = 0
    correct = [0] * classes
    c = 0
    #test_loss, correct = 0, 0
    sizes = [0] * classes


    model.eval()
    with torch.no_grad():
        for X, y in dataloader:
            if swap:
                for i in range(len(y)):
                    if y[i] == swap_labels[0]:
                        y[i] = swap_labels[1]
                        
            X, y = X.to(device), y.to(device)
            pred = model(X)
            
            #if y.item() == 8:
            #    print("pred:", pred)
            
            # this is being appended incorrectly
            # val_pred = np.append(val_pred, pred.detach().cpu().numpy()) 
            # targets = np.append(targets, y.detach().cpu().numpy())
            
            test_loss += loss_fn(pred, y).item()
            correct[y.item()] += (pred.argmax(1) == y).type(torch.float).sum().item()
            c += (pred.argmax(1) == y).type(torch.float).sum().item()
            sizes[y.item()] += 1
            
    test_loss /= num_batches
    correct = [x / s for x, s in zip(correct, sizes)]
    c /= size
    
    
    #print(torch.FloatTensor(val_pred).shape)
    #print(torch.IntTensor(targets).shape)
    
    #recall = Recall(average='macro', num_classes=classes)
    #recall_val = recall(torch.FloatTensor(val_pred), torch.IntTensor(targets))

    print(
        f"Test Error: \n Total accuracy:{(100*c):>0.1f}%, Accuracy 0: {(100*correct[0]):>0.1f}%, Accuracy 1: {(100*correct[1]):>0.1f}%, Accuracy 2: {(100*correct[2]):>0.1f}%, Accuracy 3: {(100*correct[3]):>0.1f}%, Accuracy 4: {(100*correct[4]):>0.1f}%, Accuracy 5: {(100*correct[5]):>0.1f}%, Accuracy 6: {(100*correct[6]):>0.1f}%, Accuracy 7: {(100*correct[7]):>0.1f}%, Accuracy 9: {(100*correct[8]):>0.1f}% \n Avg loss: {test_loss:>8f} \n") #, Recall: {recall_val:>8f} \n")

    return test_loss, correct, c

In [None]:
# freeze first layer
for param in linear_model_new_class.parameters():
    param.requires_grad = False
    break

In [None]:
state_dict = linear_model_new_class.state_dict()

In [None]:
model = nets.LinearFashionMNIST_alt(28*28, 9)
model.load_state_dict(state_dict)

loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

num_epochs = 6

decay_rate = (EXP_DECAY/LEARNING_RATE)**(1/num_epochs)

lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer=optimizer, gamma=decay_rate) 

model_file = './weights/linear_fashionmnist_holdout_[8]_frozen.pt'

device = 'cpu'

train_losses = []
test_losses = []
#t = trange(num_epochs)
t = range(num_epochs)
accuracies_over_time0 = []
accuracies_over_time1 = []
accuracies_over_time2 = []
accuracies_over_time3 = []
accuracies_over_time4 = []
accuracies_over_time5 = []
accuracies_over_time6 = []
accuracies_over_time7 = []
accuracies_over_time9 = []
total_acc_over_time = []

for epoch in t:
    print(f"Epoch {epoch+1}\n-------------------------------")
    train_loss = train(swil_train_dl, model, loss_fn, optimizer, device, swap=True, swap_labels=[9,8])
    test_loss, accuracies, acc = test(no_bag_test_dl, model, loss_fn, device, swap=True, swap_labels=[9,8])
    accuracies_over_time0.append(accuracies[0])
    accuracies_over_time1.append(accuracies[1])
    accuracies_over_time2.append(accuracies[2])
    accuracies_over_time3.append(accuracies[3])
    accuracies_over_time4.append(accuracies[4])
    accuracies_over_time5.append(accuracies[5])
    accuracies_over_time6.append(accuracies[6])
    accuracies_over_time7.append(accuracies[7])
    accuracies_over_time9.append(accuracies[8])
    print(accuracies_over_time9)
    total_acc_over_time.append(acc)
    train_losses.append(train_loss)
    test_losses.append(test_loss)
    
    lr_scheduler.step()
    
torch.save(model.state_dict(), model_file)
print("Done!")

# need recall, accuracy (are we calculating that now?), cross-entropy loss (same?)

In [None]:
plt.figure()
plt.title('Total accuracy')
plt.xlabel('Epoch')

# Convert the MSE loss values to RMSE

#rmse_train_loss_log = np.sqrt(train_loss_log)
#rmse_val_loss_log = np.sqrt(val_loss_log)

#rmse_train_loss_log_ours = np.sqrt(train_loss_log_ours)
#rmse_val_loss_log_ours = np.sqrt(val_loss_log_ours)

plt.plot(total_acc_over_time, label='SWIL')
plt.ylim([.6, 1])
plt.legend()

plt.xlim

In [None]:
# similar old classes

old_sim_acc = []

for a1, a2 in zip(accuracies_over_time5, accuracies_over_time7):
    old_sim_acc.append((a1 + a2)/2)


plt.figure()
plt.title('Total accuracy')
plt.xlabel('Epoch')

plt.plot(old_sim_acc, label='SWIL')
plt.ylim([.6, 1])
plt.legend()
plt.xlim

In [None]:
# Different old classes

old_sim_acc = []

for a0, a1, a2, a3, a4, a6 in zip(accuracies_over_time0, accuracies_over_time1, accuracies_over_time2, accuracies_over_time3, accuracies_over_time4, accuracies_over_time6):
    old_sim_acc.append((a0 + a1 + a2 + a3 + a4 + a6)/6)

plt.figure()
plt.title('Different old classes accuracy')
plt.xlabel('Epoch')

plt.plot(old_sim_acc, label='SWIL')
plt.ylim([.6, 1])
plt.legend()
plt.xlim

# CIFAR-10 CNN Adding New Class and Training