In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import transformers
from transformers import ViTModel, ViTConfig, ViTForImageClassification
from tqdm.auto import tqdm
import os
import matplotlib.pyplot as plt
from IPython.display import clear_output
from copy import deepcopy
import random
from torch.nn.utils import clip_grad_norm_
from torch.optim.lr_scheduler import CosineAnnealingLR, LambdaLR
from vit_train import validate, to_rgb, pretrained_model, create_dataset
from vit import MemoryCapableViT
from torchvision.transforms import ToTensor, Lambda, Compose
from torchvision.transforms.functional import to_pil_image, to_grayscale


device = "cuda:0"
seed = 42
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
os.environ["PYTHONHASHSEED"] = str(seed)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Directories for cache and datasets
home_dir = "/hdd/ege"
cache_dir = os.path.join(home_dir, "ceng502")
datasets_dir = os.path.join(home_dir, "datasets")


2023-06-11 14:15:08.775589: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-06-11 14:15:09.647860: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvrtc.so.11.1: cannot open shared object file: No such file or directory
2023-06-11 14:15:09.648676: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvrtc.so.11.1: cannot open shared object file: No such file or directory


Random seed set as 42
Random seed set as 42


In [2]:
model = pretrained_model(cache_dir = cache_dir)

Some weights of the model checkpoint at google/vit-base-patch32-224-in21k were not used when initializing ViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing ViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch32-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [3]:
base_model = model
    
model = MemoryCapableViT(deepcopy(base_model))

In [4]:
new_parameters = model.add_head(memory_tokens=1, num_classes=100)


In [5]:
model.load_state_dict(torch.load("models/CIFAR100_model.pt"))

<All keys matched successfully>

In [10]:
model_2 = pretrained_model(cache_dir = cache_dir)

Some weights of the model checkpoint at google/vit-base-patch32-224-in21k were not used when initializing ViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing ViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch32-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [11]:
model_2 = MemoryCapableViT(deepcopy(model_2))

In [12]:
new_parameters_2 = model_2.add_head(memory_tokens=1, num_classes=365)

In [13]:
model_2.load_state_dict(torch.load("models/Places_model.pt"))

<All keys matched successfully>

In [14]:
model.concatenate(model_2)

In [18]:
# Load and preprocess the dataset
transform = transforms.Compose([
    transforms.RandomResizedCrop((224)),
    transforms.ToTensor(),
    #transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


# CIFAR100
train_dataset = datasets.CIFAR100(root=datasets_dir, train=True, transform=transform, download=True)
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
validation_dataset = datasets.CIFAR100(root=datasets_dir, train=False, transform=transform, download=True)
validation_loader = DataLoader(dataset=validation_dataset, batch_size=64, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


In [19]:
transform = transforms.Compose([
                transforms.Resize((224, 224)),
                transforms.ToTensor(),
                #transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

try:
    train_dataset_2 = datasets.Places365(root=datasets_dir,small=True, split="train-standard", transform=transform, download=True)

except:
    train_dataset_2 = datasets.Places365(root=datasets_dir,small=True, split="train-standard", transform=transform)
train_loader_2 = DataLoader(dataset=train_dataset_2, batch_size=64, shuffle=True)

try:
    validation_dataset_2 = datasets.Places365(root=datasets_dir,small=True, split="val", transform=transform, download=True)

except:
    validation_dataset_2 = datasets.Places365(root=datasets_dir,small=True, split="val", transform=transform)

validation_loader_2 = DataLoader(dataset=validation_dataset_2, batch_size=64, shuffle=False)

In [27]:
validate(model, validation_loader, output_head=1)

  0%|          | 0/157 [00:00<?, ?it/s]

0.6408

In [28]:
validate(model, validation_loader_2, output_head=2)

  0%|          | 0/571 [00:00<?, ?it/s]

0.5046575342465753

In [24]:
validate(model_2, validation_loader_2, output_head = 1)

  0%|          | 0/571 [00:00<?, ?it/s]

0.0031232876712328768

In [25]:
def validate(model, dataloader, output_head=None):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    correct = 0
    total = 0
    model.eval()
    with torch.no_grad():
        for data, targets in tqdm(dataloader, leave=False):
            data = data.to(device)
            targets = targets.to(device)

            outputs = model(data)
            if output_head is not None:
                outputs = outputs[output_head]
            _, predicted = torch.max(outputs.logits, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
    
    return correct / total

In [26]:
validate(model, validation_loader_2, output_head=1)

  0%|          | 0/571 [00:00<?, ?it/s]

0.0025205479452054796