In [1]:
import subprocess

CUDA_version = [s for s in subprocess.check_output(["nvcc", "--version"]).decode("UTF-8").split(", ") if s.startswith("release")][0].split(" ")[-1]
print("CUDA version:", CUDA_version)

if CUDA_version == "10.0":
    torch_version_suffix = "+cu100"
elif CUDA_version == "10.1":
    torch_version_suffix = "+cu101"
elif CUDA_version == "10.2":
    torch_version_suffix = ""
else:
    torch_version_suffix = "+cu110"

CUDA version: 11.0


In [2]:
import numpy as np
import torch

print("Torch version:", torch.__version__)

Torch version: 1.7.1


In [3]:
torch.cuda.is_available()

True

# Downloading the model

CLIP models are distributed as TorchScript modules.

In [4]:
MODELS = {
    "RN50": "https://openaipublic.azureedge.net/clip/models/afeb0e10f9e5a86da6080e35cf09123aca3b358a0c3e3b6c78a7b63bc04b6762/RN50.pt",
    "RN101": "https://openaipublic.azureedge.net/clip/models/8fa8567bab74a42d41c5915025a8e4538c3bdbe8804a470a72f30b0d94fab599/RN101.pt",
    "RN50x4": "https://openaipublic.azureedge.net/clip/models/7e526bd135e493cef0776de27d5f42653e6b4c8bf9e0f653bb11773263205fdd/RN50x4.pt",
    "ViT-B/32": "https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt",    
}

In [None]:
!wget {MODELS["ViT-B/32"]} -O model.pt

In [5]:
model = torch.jit.load("model.pt").cuda().eval()

In [6]:
input_resolution = model.input_resolution.item()
context_length = model.context_length.item()
vocab_size = model.vocab_size.item()

print("Model parameters:", f"{np.sum([int(np.prod(p.shape)) for p in model.parameters()]):,}")
print("Input resolution:", input_resolution)
print("Context length:", context_length)
print("Vocab size:", vocab_size)

Model parameters: 151,277,313
Input resolution: 224
Context length: 77
Vocab size: 49408


# Image Preprocessing

We resize the input images and center-crop them to conform with the image resolution that the model expects. Before doing so, we will normalize the pixel intensity using the dataset mean and standard deviation.



In [13]:
from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
from PIL import Image

preprocess = Compose([
    Resize(input_resolution, interpolation=Image.BICUBIC),
    CenterCrop(input_resolution),
    ToTensor()
])

image_mean = torch.tensor([0.48145466, 0.4578275, 0.40821073])
image_std = torch.tensor([0.26862954, 0.26130258, 0.27577711])

In [14]:
import os
import skimage
import IPython.display
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np

from collections import OrderedDict
import torch

from torch.utils.data import TensorDataset, DataLoader
from tqdm import tqdm

import pickle

from sklearn.linear_model import LogisticRegression

%matplotlib inline
%config InlineBackend.figure_format = 'retina'


In [15]:
train_set_list = ['synthetic', 'natural' ]
test_set_list = ['synthetic', "natural", "natural_mirrored",
                 "NS2",
                "NS6", 
                "NSd4",
                "S2",
                "S6",
                "Sd4",
                "flank1S",
                "flank2S",
                "flank3S",
                "flank1NS",
                "flank2NS",
                "flank3NS",
                "stripe2S",
                "stripe4S",
                "stripe6S",
                "stripe8S",
                "stripe10S",
                "stripe2NS",
                "stripe4NS",
                "stripe6NS",
                "stripe8NS",
                "stripe10NS"]

In [18]:
# For RSA activations
train_set_list = ['synthetic']

test_set_list = [
    "flank1S",
    "flank2S",
    "flank3S",
    "flank1NS",
    "flank2NS",
    "flank3NS"
]

In [9]:
def get_features(dataset):
    all_features = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels in DataLoader(dataset,  shuffle = True, batch_size=100):
            features = model.encode_image(images.cuda())

            all_features.append(features)
            all_labels.append(labels)

    return torch.cat(all_features).cpu().numpy(), torch.cat(all_labels).cpu().numpy()

In [36]:
with open(PATH + 'symm_' + "S0" + '_test.pkl', 'rb') as handle:
    test_set = pickle.load(handle)
    test_set = [(test_set[0][i], test_set[1][i]) for i in range(len(test_set[0]))]
with open(PATH + 'symm_' + "S0" + '_test.pkl', 'wb') as handle:
    pickle.dump(test_set, handle)

In [10]:
PATH='./symmetry/' # Make sure this path exists in your drive
test_name="NS0"
with open(PATH + 'symm_' + test_name + '_test.pkl', 'rb') as handle:
    test_set = pickle.load(handle)[:500]
inputs = torch.tensor(np.stack([preprocess(Image.fromarray(x[0].astype('uint8')).convert("RGB")) for x in test_set]))
inputs -= image_mean[:, None, None]
inputs /= image_std[:, None, None]


In [11]:
targets = torch.IntTensor(np.stack([x[1] for x in test_set]))

In [17]:
import gc
gc.collect()

0

In [19]:
import gc
PATH='./symmetry/' # Make sure this path exists in your drive
features = {}
for test_name in test_set_list:
    print(test_name)
    gc.collect()
    with open(PATH + 'symm_' + test_name + '_test.pkl', 'rb') as handle:
        test_set = pickle.load(handle)[:500]

    inputs = torch.tensor(np.stack([preprocess(Image.fromarray(x[0].astype('uint8')).convert("RGB")) for x in test_set]))
    inputs -= image_mean[:, None, None]
    inputs /= image_std[:, None, None]
    targets = torch.IntTensor(np.stack([x[1] for x in test_set]))
    test = TensorDataset(inputs, targets)
    test_features, test_labels = get_features(test)
    features[test_name] = test_features

flank1S
flank2S
flank3S
flank1NS
flank2NS
flank3NS


In [20]:
features["flank1S"].shape

(500, 512)

In [21]:
for test_name in test_set_list:
    with open("/om/user/shobhita/data/symmetry/transformer/rsa_activations/" + test_name + "_activations.pkl", "wb") as handle:
        pickle.dump(features[test_name], handle)

In [None]:
PATH='./symmetry/' # Make sure this path exists in your drive
features = {}
acc = {}
for train_name in train_set_list:
    acc[train_name] = {}
    
    with open(PATH + 'symm_' + train_name + '_training.pkl', 'rb') as handle:
        train_set = pickle.load(handle)
    
    inputs = torch.tensor(np.stack([preprocess(Image.fromarray(x[0].astype('uint8')).convert("RGB"))  for x in train_set]))
    inputs -= image_mean[:, None, None]
    inputs /= image_std[:, None, None]
    targets = torch.IntTensor(np.stack([x[1] for x in train_set]))
    train = TensorDataset(inputs, targets)
    train_features, train_labels = get_features(train)
    
    
    classifier = LogisticRegression(random_state=0, C=0.316, max_iter=1000, verbose=1)
    classifier.fit(train_features, train_labels)
    del inputs
    del train
    del train_set
    del train_features
    
    for test_name in test_set_list:
        with open(PATH + 'symm_' + test_name + '_test.pkl', 'rb') as handle:
            test_set = pickle.load(handle)
        
        inputs = torch.tensor(np.stack([preprocess(Image.fromarray(x[0].astype('uint8')).convert("RGB")) for x in test_set]))
        inputs -= image_mean[:, None, None]
        inputs /= image_std[:, None, None]
        targets = torch.IntTensor(np.stack([x[1] for x in test_set]))
        test = TensorDataset(inputs, targets)
        test_features, test_labels = get_features(test)
        
        features[test_name] = test_features

        predictions = classifier.predict(test_features)
        accuracy = np.mean((test_labels == predictions).astype(np.float)) * 100.
        print(train_name)
        print(test_name)
        print(f"Accuracy = {accuracy:.3f}")
        acc[train_name][test_name] = accuracy
        
with open('./transformer_accuracy.pkl', 'wb') as handle:
    pickle.dump(acc, handle)

In [10]:
PATH='./symmetry/' # Make sure this path exists in your drive

acc = {}
for train_name in train_set_list:
    acc[train_name] = {}
    
    with open(PATH + 'symm_' + train_name + '_training.pkl', 'rb') as handle:
        train_set = pickle.load(handle)
    
    inputs = torch.tensor(np.stack([preprocess(Image.fromarray(x[0].astype('uint8')).convert("RGB"))  for x in train_set]))
    inputs -= image_mean[:, None, None]
    inputs /= image_std[:, None, None]
    targets = torch.IntTensor(np.stack([x[1] for x in train_set]))
    train = TensorDataset(inputs, targets)
    train_features, train_labels = get_features(train)
    
    
    classifier = LogisticRegression(random_state=0, C=0.316, max_iter=1000, verbose=1)
    classifier.fit(train_features, train_labels)
    del inputs
    del train
    del train_set
    del train_features
    
    for test_name in test_set_list:
        with open(PATH + 'symm_' + test_name + '_test.pkl', 'rb') as handle:
            test_set = pickle.load(handle)
        
        inputs = torch.tensor(np.stack([preprocess(Image.fromarray(x[0].astype('uint8')).convert("RGB")) for x in test_set]))
        inputs -= image_mean[:, None, None]
        inputs /= image_std[:, None, None]
        targets = torch.IntTensor(np.stack([x[1] for x in test_set]))
        test = TensorDataset(inputs, targets)
        test_features, test_labels = get_features(test)

        predictions = classifier.predict(test_features)
        accuracy = np.mean((test_labels == predictions).astype(np.float)) * 100.
        print(train_name)
        print(test_name)
        print(f"Accuracy = {accuracy:.3f}")
        acc[train_name][test_name] = accuracy
        
with open('./transformer_accuracy.pkl', 'wb') as handle:
    pickle.dump(acc, handle) 

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.8s finished


synthetic
synthetic
Accuracy = 100.000
synthetic
natural
Accuracy = 65.083
synthetic
natural_mirrored
Accuracy = 87.167
synthetic
NS2
Accuracy = 100.000
synthetic
NS6
Accuracy = 98.200
synthetic
NSd4
Accuracy = 100.000
synthetic
S2
Accuracy = 100.000
synthetic
S6
Accuracy = 100.000
synthetic
Sd4
Accuracy = 100.000
synthetic
flank1S
Accuracy = 100.000
synthetic
flank2S
Accuracy = 100.000
synthetic
flank3S
Accuracy = 100.000
synthetic
flank1NS
Accuracy = 0.000
synthetic
flank2NS
Accuracy = 14.800
synthetic
flank3NS
Accuracy = 0.500
synthetic
stripe2S
Accuracy = 98.600
synthetic
stripe4S
Accuracy = 100.000
synthetic
stripe6S
Accuracy = 100.000
synthetic
stripe8S
Accuracy = 100.000
synthetic
stripe10S
Accuracy = 100.000
synthetic
stripe2NS
Accuracy = 74.300
synthetic
stripe4NS
Accuracy = 0.400
synthetic
stripe6NS
Accuracy = 17.400
synthetic
stripe8NS
Accuracy = 96.900
synthetic
stripe10NS
Accuracy = 24.200


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    4.1s finished


natural
synthetic
Accuracy = 98.800
natural
natural
Accuracy = 93.417
natural
natural_mirrored
Accuracy = 98.583
natural
NS2
Accuracy = 99.400
natural
NS6
Accuracy = 74.900
natural
NSd4
Accuracy = 20.300
natural
S2
Accuracy = 100.000
natural
S6
Accuracy = 100.000
natural
Sd4
Accuracy = 100.000
natural
flank1S
Accuracy = 100.000
natural
flank2S
Accuracy = 100.000
natural
flank3S
Accuracy = 100.000
natural
flank1NS
Accuracy = 0.000
natural
flank2NS
Accuracy = 0.000
natural
flank3NS
Accuracy = 0.000
natural
stripe2S
Accuracy = 100.000
natural
stripe4S
Accuracy = 100.000
natural
stripe6S
Accuracy = 100.000
natural
stripe8S
Accuracy = 100.000
natural
stripe10S
Accuracy = 100.000
natural
stripe2NS
Accuracy = 0.000
natural
stripe4NS
Accuracy = 0.000
natural
stripe6NS
Accuracy = 0.000
natural
stripe8NS
Accuracy = 0.000
natural
stripe10NS
Accuracy = 0.000


In [23]:
train_sizes = [100.0, 1000.0, 10000.0]
train_set_list = ['synthetic', 'natural' ]
path = "/om/user/shobhita/src/symmetry/transformers/symmetry/symmetry/"

full_accs = {}
for size in train_sizes:
    for train_set in train_set_list:
        with open(path + "transformer_accuracy_{}_training_{}.pkl".format(train_set, size), "rb") as handle:
            acc = pickle.load(handle)
            acc = {key: val for key, val in acc.items()}
            full_accs["{}_{}".format(train_set, size)] = acc



In [24]:
import pandas as pd
df = pd.DataFrame(full_accs)
with open(path + "full_transformer_accs.pkl", "wb") as handle:
    pickle.dump(df, handle)

In [25]:
with open(path + "full_transformer_accs.pkl", "rb") as handle:
    a= pickle.load(handle)

In [26]:
a

Unnamed: 0,synthetic_100.0,natural_100.0,synthetic_1000.0,natural_1000.0,synthetic_10000.0,natural_10000.0
synthetic,99.8,49.3,100.0,88.1,100.0,97.9
natural,58.833333,88.333333,61.25,92.5,64.583333,93.333333
natural_mirrored,72.666667,95.916667,79.25,98.333333,86.333333,98.583333
NS2,100.0,0.0,100.0,89.5,100.0,98.8
NS6,99.2,0.0,98.3,37.1,97.8,63.8
NSd4,100.0,0.0,100.0,0.1,100.0,13.2
S2,100.0,100.0,100.0,100.0,100.0,100.0
S6,99.8,100.0,100.0,100.0,100.0,100.0
Sd4,99.8,100.0,99.9,100.0,100.0,100.0
flank1S,99.8,100.0,100.0,100.0,100.0,100.0
