In [1]:
import pandas as pd 
from torchvision.models import VisionTransformer, ViT_L_16_Weights
from torchvision.models.vision_transformer import vit_l_16
from pytorch_ood.utils import fix_random_seed

fix_random_seed(123)


device = "cuda:0"
all_results = []


In [2]:
import torch
from torch import nn
from torch.optim import Adam
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST, FashionMNIST
from torchvision.transforms import ToTensor
from pytorch_ood.loss import DeepSVDDLoss
from pytorch_ood.utils import OODMetrics, ToUnknown

In [3]:
import numpy as np 
import torch 

with np.load("../sumnist/x-test.npz") as data:
    x_test = torch.tensor(data['arr_0'])
    
with np.load("../sumnist/x-train.npz") as data:
    x_train = torch.tensor(data['arr_0'])
    
with np.load("../sumnist/y-test.npz") as data:
    y_test = torch.tensor(data['arr_0'])
    y_test = torch.where(y_test.sum(dim=1) == 20, 0, -1)
    # y_test = torch.where(y_test == 20, 0, -1)

In [4]:
from torch.utils.data import TensorDataset

train_dataset = TensorDataset(x_train.unsqueeze(1))
train_loader = DataLoader(train_dataset, num_workers=5, batch_size=128, shuffle=True)

test_dataset = TensorDataset(x_test.unsqueeze(1), y_test)

test_loader = DataLoader(test_dataset, shuffle=False, num_workers=5, batch_size=256)

# Deep OC-SVDD

In [5]:
class Model(nn.Module):
    """ """
    def __init__(self):
        super().__init__()
        self.c1 = nn.Conv2d(1, 16, 3, padding=1, bias=False)
        self.pool = nn.MaxPool2d(2)
        self.c2 = nn.Conv2d(16, 32, 3, padding=1, bias=False)
        self.c3 = nn.Conv2d(32, 64, 3, padding=1, bias=False)
        self.layer5 = nn.Linear(3136, 128, bias=False)
        self.layer6 = nn.Linear(128, 2, bias=False)

    def forward(self, x):
        batch_size = x.shape[0]
        x = self.c1(x).relu()
        x = self.pool(x)
        x = self.c2(x).relu()
        x = self.pool(x)
        x = self.c3(x).relu()
        x = self.pool(x)
        x = x.reshape(batch_size, -1)
        x = self.layer5(x).relu()
        x = self.layer6(x)
        return x


In [6]:
model = Model().to(device)

opti = Adam(model.parameters(), lr=0.001)

with torch.no_grad():
    d = [model(x[0].to(device)) for x in train_loader]
    center = torch.concat(d).mean(dim=0).cpu()

criterion = DeepSVDDLoss(n_dim=2, center=center).to(device)

In [7]:
def test():
    model.eval()
    metrics = OODMetrics()
    with torch.no_grad():
        for x, y in test_loader:
            z = model(x.to(device))
            # calculate (squared) distance of points to the center in output space
            distances = criterion.distance(z)
            # dists.append(distances)
            # labels.append(y)
            # print(y)
            metrics.update(distances, y)
    
    m = metrics.compute()
    # print(metrics.compute())
    model.train()
    return m 


In [8]:
for epoch in range(20):
    for x in train_loader:
        z = model(x[0].to(device))
        # since this is a one-class method, we do not have to provide any class labels
        loss = criterion(z)
        opti.zero_grad()
        loss.backward()
        opti.step()

    m = test()
    print(f"Epoch {epoch}, {m}")

m.update({"Method": "Deep SVDD", "Backbone": "-"})
all_results.append(m)

Epoch 0, {'AUROC': 0.5042188167572021, 'AUPR-IN': 0.18277691304683685, 'AUPR-OUT': 0.8196657299995422, 'FPR95TPR': 0.9463000297546387}
Epoch 1, {'AUROC': 0.49990278482437134, 'AUPR-IN': 0.1824645698070526, 'AUPR-OUT': 0.8180092573165894, 'FPR95TPR': 0.9448999762535095}
Epoch 2, {'AUROC': 0.4990214705467224, 'AUPR-IN': 0.18202337622642517, 'AUPR-OUT': 0.8179963827133179, 'FPR95TPR': 0.9483000040054321}
Epoch 3, {'AUROC': 0.49952933192253113, 'AUPR-IN': 0.18238399922847748, 'AUPR-OUT': 0.8178579807281494, 'FPR95TPR': 0.9498000144958496}
Epoch 4, {'AUROC': 0.5000526309013367, 'AUPR-IN': 0.1829579770565033, 'AUPR-OUT': 0.8182710409164429, 'FPR95TPR': 0.9519000053405762}
Epoch 5, {'AUROC': 0.4993959367275238, 'AUPR-IN': 0.18254747986793518, 'AUPR-OUT': 0.8179685473442078, 'FPR95TPR': 0.9483000040054321}
Epoch 6, {'AUROC': 0.49892011284828186, 'AUPR-IN': 0.18231448531150818, 'AUPR-OUT': 0.8185650706291199, 'FPR95TPR': 0.9484000205993652}
Epoch 7, {'AUROC': 0.49903491139411926, 'AUPR-IN': 0.1

# Using nearest neighbor 

In [9]:
from sklearn.neighbors import NearestNeighbors

knn = NearestNeighbors(n_neighbors=1, algorithm='ball_tree', n_jobs=-1).fit(x_train.view(-1, 56*56).numpy())

In [10]:
distances, indices = knn.kneighbors(x_test.view(-1, 56*56).numpy())

In [11]:
metrics = OODMetrics()
metrics.update(torch.tensor(distances), y_test)

m = metrics.compute()
m.update({"Method": "1-NN", "Backbone": "-"})
all_results.append(m)
print(m)

{'AUROC': 0.5, 'AUPR-IN': 0.5918367505073547, 'AUPR-OUT': 0.90816330909729, 'FPR95TPR': 1.0, 'Method': '1-NN', 'Backbone': '-'}


# Mahalanobis 

In [12]:
from sklearn.covariance import EmpiricalCovariance

x_features_train = x_train.view(-1, 56*56)
x_features_test = x_test.view(-1, 56*56)
x_mean = x_features_train.mean(dim=0)

cov = EmpiricalCovariance().fit((x_features_train - x_mean).numpy())
distances = cov.mahalanobis((x_features_test - x_mean).numpy())

metrics = OODMetrics()
metrics.update(torch.tensor(distances), y_test)
m = metrics.compute()
m.update({"Method": "Mahalanobis", "Backbone": "-"})
all_results.append(m)
print(m)

{'AUROC': 0.5, 'AUPR-IN': 0.5918367505073547, 'AUPR-OUT': 0.90816330909729, 'FPR95TPR': 1.0, 'Method': 'Mahalanobis', 'Backbone': '-'}


# Extract ViT Features 

In [13]:
model = vit_l_16(weights=ViT_L_16_Weights.DEFAULT)
preprocess = ViT_L_16_Weights.DEFAULT.transforms()
model = model.to(device)

In [14]:
x_test_rgb = x_test.unsqueeze(1).repeat(1, 3, 1, 1)
x_train_rgb = x_train.unsqueeze(1).repeat(1, 3, 1, 1)

In [15]:
print(preprocess)

ImageClassification(
    crop_size=[224]
    resize_size=[242]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BILINEAR
)


In [16]:
from tqdm.notebook import tqdm 
from torchvision.transforms.functional import to_pil_image

features_test = []

with torch.no_grad():
    for i in tqdm(range(x_test_rgb.shape[0])):
        prep = preprocess(to_pil_image(x_test_rgb[i]))
        image_features = model(prep.cuda().unsqueeze(0)).float()
        features_test.append(image_features)

  0%|          | 0/12250 [00:00<?, ?it/s]

In [17]:
features_train = []

with torch.no_grad():
    for i in tqdm(range(x_train_rgb.shape[0])):
        prep = preprocess(to_pil_image(x_train_rgb[i]))
        image_features = model(prep.cuda().unsqueeze(0)).float()
        features_train.append(image_features)

  0%|          | 0/60000 [00:00<?, ?it/s]

In [18]:
train_features = torch.cat(features_train).cpu()
test_features = torch.cat(features_test).cpu()

# kNN-CLIP

In [19]:
nbrs = NearestNeighbors(n_neighbors=1, algorithm='ball_tree', n_jobs=-1).fit(train_features.numpy())

In [20]:
distances, indices = nbrs.kneighbors(test_features.numpy())

In [21]:
metrics = OODMetrics()
metrics.update(torch.tensor(distances), y_test)

m = metrics.compute()
m.update({"Method": "1-NN", "Backbone": "ViT-L-16"})
all_results.append(m)
print(m)

{'AUROC': 0.511908233165741, 'AUPR-IN': 0.1880882978439331, 'AUPR-OUT': 0.8220650553703308, 'FPR95TPR': 0.9434000253677368, 'Method': '1-NN', 'Backbone': 'ViT-L-16'}


# Mahalanobis

In [22]:
from sklearn.covariance import EmpiricalCovariance

x_mean = train_features.mean(dim=0)

cov = EmpiricalCovariance().fit((train_features - x_mean).numpy())
distances = cov.mahalanobis((test_features - x_mean).numpy())

metrics = OODMetrics()
metrics.update(torch.tensor(distances), y_test)
m = metrics.compute()
m.update({"Method": "Mahalanobis", "Backbone": "ViT-L-16"})
all_results.append(m)
print(m)

{'AUROC': 0.5, 'AUPR-IN': 0.5918367505073547, 'AUPR-OUT': 0.8230504989624023, 'FPR95TPR': 1.0, 'Method': 'Mahalanobis', 'Backbone': 'ViT-L-16'}


In [23]:
df = pd.DataFrame(all_results)
mean_scores = df.groupby(["Method", "Backbone"]).mean() * 100

print(mean_scores.sort_values("AUROC").to_latex(float_format="%.2f"))

\begin{tabular}{llrrrr}
\toprule
 &  & AUROC & AUPR-IN & AUPR-OUT & FPR95TPR \\
Method & Backbone &  &  &  &  \\
\midrule
Deep SVDD & - & 49.32 & 18.07 & 81.28 & 95.14 \\
\cline{1-6}
1-NN & - & 50.00 & 59.18 & 90.82 & 100.00 \\
\cline{1-6}
\multirow[t]{2}{*}{Mahalanobis} & - & 50.00 & 59.18 & 90.82 & 100.00 \\
 & ViT-L-16 & 50.00 & 59.18 & 82.31 & 100.00 \\
\cline{1-6}
1-NN & ViT-L-16 & 51.19 & 18.81 & 82.21 & 94.34 \\
\cline{1-6}
\bottomrule
\end{tabular}



In [24]:
mean_scores.sort_values("AUROC")

Unnamed: 0_level_0,Unnamed: 1_level_0,AUROC,AUPR-IN,AUPR-OUT,FPR95TPR
Method,Backbone,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Deep SVDD,-,49.321049,18.07175,81.275851,95.139998
1-NN,-,50.0,59.183675,90.816331,100.0
Mahalanobis,-,50.0,59.183675,90.816331,100.0
Mahalanobis,ViT-L-16,50.0,59.183675,82.30505,100.0
1-NN,ViT-L-16,51.190823,18.80883,82.206506,94.340003


In [25]:
torch.save(train_features, f"vit_l_16_train.pt")

In [26]:
torch.save(test_features, f"vit_l_16_test.pt")

In [27]:
print(mean_scores.sort_values("AUROC").to_csv(float_format="%.2f"))

Method,Backbone,AUROC,AUPR-IN,AUPR-OUT,FPR95TPR
Deep SVDD,-,49.32,18.07,81.28,95.14
1-NN,-,50.00,59.18,90.82,100.00
Mahalanobis,-,50.00,59.18,90.82,100.00
Mahalanobis,ViT-L-16,50.00,59.18,82.31,100.00
1-NN,ViT-L-16,51.19,18.81,82.21,94.34

