In [1]:
from torch.utils.data import Dataset, random_split
import os 
from os.path import join 
from PIL import Image
from tqdm import tqdm
import logging
import sys
logger = logging.getLogger()
logger.addHandler(logging.StreamHandler(stream=sys.stdout))
logger.setLevel(logging.INFO)

device = "cuda:0"
root = "/home/ki/datasets/"

from detector import label_to_name, color_to_name

class FruitDataset(Dataset):
    """
    
    """

    class_color_map = {
        "Apple Braeburn": "red",
        "Apple Granny Smith": "green",
        "Apricot": "orange",
        "Avocado": "green",
        "Banana": "yellow",
        "Blueberry": "black",
        "Cactus fruit": "green",
        "Cantaloupe": "yellow",
        "Cherry": "red",
        "Clementine": "orange",
        "Corn": "yellow",
        "Cucumber Ripe": "brown",
        "Grape Blue": "black",
        "Kiwi": "brown",
        "Lemon": "yellow",
        "Limes": "green",
        "Mango": "green",
        "Onion White": "brown",
        "Orange": "orange",
        "Papaya": "green",
        "Passion Fruit": "black",
        "Peach": "orange",
        "Pear": "green", # ??
        "Pepper Green": "green",
        "Pepper Red": "red",
        "Pineapple": "brown",
        "Plum": "red",
        "Pomegranate": "red",
        "Potato Red": "brown",
        "Raspberry": "red",
        "Strawberry": "red",
        "Tomato": "red",
        "Watermelon": "red" 
    }
    
    def __init__(self, root="train", transform=None, target_transform=None):
        root = join(root, "fruits", "train", "train")

        self.classes = os.listdir(root)
        self.files = []
        self.labels = []
        self.colors = []
        
        self.transform = transform
        self.target_transform = target_transform 
        
        for c in self.classes:
            fs = [join(root, c, f) for f in os.listdir(join(root, c))]
            self.files += fs
            self.labels += [c.lower().replace(" ", "_")] * len(fs)
            self.colors += [self.class_color_map[c]] * len(fs)

        self.class_map = {c: n for n, c in enumerate(label_to_name)}
        self.color_map = {c: n for n, c in enumerate(color_to_name)}
        
    def __len__(self):
        return len(self.files)

    def __getitem__(self, index):
        img = self.files[index]
        y = self.class_map[self.labels[index]]
        color = self.color_map[self.colors[index]]
        
        img = Image.open(img)

        if self.transform is not None:
            img = self.transform(img)

        y = torch.tensor([y, color]) 
        if self.target_transform is not None:
            y = self.target_transform(y)
        
        return img, y 


abc


In [2]:
ds = FruitDataset(root=root)

In [3]:
from pytorch_ood.utils import ToRGB
from torchvision.transforms import ToTensor, Resize, Compose
import torch 
from torch.utils.data import DataLoader
import numpy as np


trans = Compose([ToRGB(), ToTensor(), Resize((32, 32), antialias=True)])

data = FruitDataset(root=root, transform=trans)
train_data, val_data, test_data = random_split(data, [14000,1000, 1854], generator=torch.Generator().manual_seed(0))

train_loader = DataLoader(train_data, batch_size=32, shuffle=True, num_workers=2)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False, num_workers=2)

In [4]:
from torch import nn
from pytorch_ood.model import WideResNet

# def override 
def Model(num_classes=None, *args, **kwargs):
    model = WideResNet(*args, num_classes=1000, pretrained="imagenet32", **kwargs)
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    return model

In [5]:
from torch.optim import SGD


def train_model(att_index, num_classes):
    """
    train a model for the given attribute index 
    """
    model = Model(num_classes=num_classes).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9, nesterov=True)

    for epoch in range(5):
        running_loss = 0.0
        model.train()
        bar = tqdm(train_loader)
        for inputs, y in bar:
            labels = y[:, att_index]
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss = 0.8 * running_loss + 0.2 * loss.item()
            bar.set_postfix({"loss": running_loss})

        correct = 0
        total = 0

        with torch.no_grad():
            model.eval()

            for inputs, y in test_loader:
                labels = y[:, att_index]
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                _, predicted = torch.max(outputs.data, dim=1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print(f'Accuracy of the network on the test images: {correct / total:.2%}')

    return model

In [6]:
from pytorch_ood.dataset.img import TinyImages300k
from pytorch_ood.utils import is_known

def train_fruit_model():
    tiny = TinyImages300k(root=root, download=True, transform=trans, target_transform=ToUnknown())
    data_train_out, data_test_out, _ = random_split(tiny, [50000, 10000, 240000], generator=torch.Generator().manual_seed(123))

    data_noatt = FruitDataset(root=root, transform=trans, target_transform=lambda y: int(y[0]))
    train_data_noatt, val_data_noatt, test_data_noatt = random_split(data_noatt, [14000,1000, 1854], generator=torch.Generator().manual_seed(0))

    new_loader = DataLoader(train_data_noatt + data_train_out, batch_size=32, shuffle=True, num_workers=10)
    new_test_loader = DataLoader(test_data_noatt + data_test_out, batch_size=32, shuffle=False, num_workers=10)

    model = Model(num_classes=2).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9, nesterov=True)

    accs = []

    for epoch in range(10):
        running_loss = 0.0
        model.train()

        bar = tqdm(new_loader)
        for inputs, y in bar:
            labels = is_known(y).long()
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss = 0.8 * running_loss + 0.2 * loss.item()
            bar.set_postfix({"loss": running_loss})

        correct = 0
        total = 0

        with torch.no_grad():
            model.eval()

            for inputs, y in new_test_loader:
                labels = is_known(y).long()
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print(f'Accuracy of the shape network on the test images: {correct / total:.2%}')
        accs.append(correct / total)

    return model

In [7]:
from pytorch_ood.dataset.img import (LSUNCrop, LSUNResize, Textures, TinyImageNetCrop, TinyImageNetResize)
from pytorch_ood.detector import EnergyBased, MaxSoftmax, MaxLogit, Entropy, Mahalanobis, ViM, ReAct
from pytorch_ood.utils import OODMetrics, ToUnknown
from detector import EnsembleDetector, PrologOOD, Prologic, PrologOODT

def evaluate(label_net, color_net, fruit_net):
    _ = label_net.eval()
    _ = color_net.eval()
    
    results = []

    detectors = {
        "ViM": ViM(label_net.features, w=label_net.fc.weight, b=label_net.fc.bias, d=64),
        "Mahalanobis": Mahalanobis(label_net.features),
        "Entropy": Entropy(label_net),
        "LogicOOD+": PrologOOD("kb.pl", label_net, color_net, fruit_net),
        "Logic": Prologic("kb.pl", label_net, color_net),
        "Logic+": Prologic("kb.pl", label_net, color_net, fruit_net),
        "LogicOOD": PrologOOD("kb.pl", label_net, color_net),
        "LogicOODT": PrologOODT("kb.pl", label_net, color_net),
        "LogicOODT+": PrologOODT("kb.pl", label_net, color_net, fruit_net),
        # "LogicT+": PrologOODT("kb.pl", label_net, color_net, fruit_net), # this should be exactly the same
        "Ensemble": EnsembleDetector(label_net, color_net),
        "MSP": MaxSoftmax(label_net),
        "ReAct": ReAct(label_net.features, label_net.fc),
        "Energy": EnergyBased(label_net),
        "MaxLogit": MaxLogit(label_net),
    }


    data_fit_label = FruitDataset(root=root, transform=trans,  target_transform=lambda y: torch.tensor(y[0]))
    _ , data_fit_label, _ = random_split(data_fit_label, [14000, 1000, 1854], generator=torch.Generator().manual_seed(0))
    data_fit_color = FruitDataset(root=root, transform=trans,  target_transform=lambda y: torch.tensor(y[1]))
    _, data_fit_color, _ = random_split(data_fit_color, [14000, 1000, 1854], generator=torch.Generator().manual_seed(0))
    data_fit_color = DataLoader(data_fit_color, batch_size=32, shuffle=False, num_workers=2)
    data_fit_label = DataLoader(data_fit_label, batch_size=32, shuffle=False, num_workers=2)

    data = FruitDataset(root=root, transform=trans, target_transform=lambda y: int(y[0]))
    data_in_train, data_in_val, data_in = random_split(data, [14000, 1000, 1854], generator=torch.Generator().manual_seed(0))
    train_in_loader = DataLoader(data_in_train, batch_size=32, shuffle=False, num_workers=2)

    detectors["ViM"].fit(train_in_loader, device=device)
    detectors["LogicOODT"].fit(data_fit_label, data_fit_color, device=device)
    detectors["LogicOODT+"].fit(data_fit_label, data_fit_color, device=device)
    detectors["Mahalanobis"].fit(train_in_loader, device=device)

    datasets = {d.__name__: d for d in (LSUNCrop, LSUNResize, Textures, TinyImageNetCrop, TinyImageNetResize)}
    
    for detector_name, detector in detectors.items():
        for data_name, dataset_c in datasets.items():
            print(data_name)
            data_out = dataset_c(root=root, transform=trans, target_transform=ToUnknown(), download=True)
            loader = DataLoader(data_in+data_out, batch_size=256, shuffle=False, num_workers=12)
            
            scores = []
            ys = []
            
            with torch.no_grad():
                for x, y in loader:
                    scores.append(detector(x.to(device)))
                    ys.append(y.to(device))
                    
                scores = torch.cat(scores, dim=0).cpu()
                ys = torch.cat(ys, dim=0).cpu()
            
            metrics = OODMetrics()
            metrics.update(scores, ys)
            r = metrics.compute()
            r.update({
                "Method": detector_name,
                "Dataset": data_name
            })
            print(r)
            results.append(r)
    
    return results 

In [None]:
results = []

for trial in range(10):
    print("label")
    label_net = train_model(att_index=0, num_classes=33)
    print("color")
    color_net = train_model(att_index=1, num_classes=6)
    print("fruit")

    fruit_net = train_fruit_model()

    res = evaluate(label_net, color_net, fruit_net)
    
    for r in res:
        r.update({"Seed": trial})
    
    results += res

label


100%|██████████| 438/438 [00:13<00:00, 32.43it/s, loss=0.0528]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 96.76%


100%|██████████| 438/438 [00:13<00:00, 31.76it/s, loss=0.0338]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 97.95%


100%|██████████| 438/438 [00:14<00:00, 30.54it/s, loss=0.00975]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.78%


100%|██████████| 438/438 [00:13<00:00, 31.85it/s, loss=0.02]   
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.95%


100%|██████████| 438/438 [00:10<00:00, 40.08it/s, loss=0.0072] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 100.00%
color


100%|██████████| 438/438 [00:10<00:00, 41.09it/s, loss=0.0596]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 97.30%


100%|██████████| 438/438 [00:10<00:00, 40.79it/s, loss=0.0285] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.84%


100%|██████████| 438/438 [00:10<00:00, 41.05it/s, loss=0.0554] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 100.00%


100%|██████████| 438/438 [00:10<00:00, 40.60it/s, loss=0.00361]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 100.00%


100%|██████████| 438/438 [00:11<00:00, 39.27it/s, loss=0.00353] 


Accuracy of the network on the test images: 100.00%
fruit


100%|██████████| 2000/2000 [00:50<00:00, 39.32it/s, loss=0.00221] 
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.87%


100%|██████████| 2000/2000 [00:51<00:00, 38.64it/s, loss=0.00278] 
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.92%


100%|██████████| 2000/2000 [00:51<00:00, 39.02it/s, loss=0.00285] 
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.97%


100%|██████████| 2000/2000 [00:51<00:00, 39.06it/s, loss=0.00197] 
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.91%


100%|██████████| 2000/2000 [00:50<00:00, 39.35it/s, loss=0.000111]
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.95%


100%|██████████| 2000/2000 [00:50<00:00, 39.26it/s, loss=0.000317]
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.96%


100%|██████████| 2000/2000 [00:50<00:00, 39.32it/s, loss=3.31e-5] 
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.97%


100%|██████████| 2000/2000 [00:52<00:00, 37.90it/s, loss=0.000146]
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.92%


100%|██████████| 2000/2000 [00:50<00:00, 39.30it/s, loss=0.000856]
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.97%


100%|██████████| 2000/2000 [00:47<00:00, 41.91it/s, loss=0.000125]


Accuracy of the shape network on the test images: 99.95%
Computing principal space ...
Computing alpha ...
self.alpha=6.5486
Fitting with temperature scaling


  data_fit_label = FruitDataset(root=root, transform=trans,  target_transform=lambda y: torch.tensor(y[0]))
  data_fit_label = FruitDataset(root=root, transform=trans,  target_transform=lambda y: torch.tensor(y[0]))
  data_fit_color = FruitDataset(root=root, transform=trans,  target_transform=lambda y: torch.tensor(y[1]))
  data_fit_color = FruitDataset(root=root, transform=trans,  target_transform=lambda y: torch.tensor(y[1]))


Initial T/NLL: 1.000/0.005
Optimal temperature: 0.9005682468414307
NLL after scaling: 0.00'
Initial T/NLL: 1.000/0.005
Optimal temperature: 0.8239295482635498
NLL after scaling: 0.00'
self.scorer_label.t=Parameter containing:
tensor(0.9006, requires_grad=True)
self.scorer_color.t=Parameter containing:
tensor(0.8239, requires_grad=True)
Fitting with temperature scaling


  loss = nll_loss(log_softmax(logits / self.t), labels).item()
  loss = nll_loss(log_softmax(logits / self.t), labels)
  loss = nll_loss(log_softmax(logits / self.t), labels).item()
  data_fit_label = FruitDataset(root=root, transform=trans,  target_transform=lambda y: torch.tensor(y[0]))
  data_fit_label = FruitDataset(root=root, transform=trans,  target_transform=lambda y: torch.tensor(y[0]))
  data_fit_color = FruitDataset(root=root, transform=trans,  target_transform=lambda y: torch.tensor(y[1]))
  data_fit_color = FruitDataset(root=root, transform=trans,  target_transform=lambda y: torch.tensor(y[1]))


Initial T/NLL: 1.000/0.005
Optimal temperature: 0.9005682468414307
NLL after scaling: 0.00'
Initial T/NLL: 1.000/0.005
Optimal temperature: 0.8239295482635498
NLL after scaling: 0.00'
self.scorer_label.t=Parameter containing:
tensor(0.9006, requires_grad=True)
self.scorer_color.t=Parameter containing:
tensor(0.8239, requires_grad=True)
LSUNCrop
{'AUROC': 0.9825130701065063, 'AUPR-IN': 0.9968494176864624, 'AUPR-OUT': 0.9059414267539978, 'FPR95TPR': 0.11758360266685486, 'Method': 'ViM', 'Dataset': 'LSUNCrop'}
LSUNResize
{'AUROC': 0.9987647533416748, 'AUPR-IN': 0.9997847080230713, 'AUPR-OUT': 0.9915980696678162, 'FPR95TPR': 0.0, 'Method': 'ViM', 'Dataset': 'LSUNResize'}
Textures
Found 5640 texture files.
{'AUROC': 0.9986789226531982, 'AUPR-IN': 0.9995987415313721, 'AUPR-OUT': 0.9952229857444763, 'FPR95TPR': 0.0, 'Method': 'ViM', 'Dataset': 'Textures'}
TinyImageNetCrop
{'AUROC': 0.9901565909385681, 'AUPR-IN': 0.9982592463493347, 'AUPR-OUT': 0.9397937655448914, 'FPR95TPR': 0.023193096742033

  0%|          | 0/438 [00:00<?, ?it/s]

{'AUROC': 0.9848415851593018, 'AUPR-IN': 0.9971235394477844, 'AUPR-OUT': 0.935732364654541, 'FPR95TPR': 0.09870550036430359, 'Method': 'MaxLogit', 'Dataset': 'TinyImageNetResize'}
label


100%|██████████| 438/438 [00:11<00:00, 39.38it/s, loss=0.063] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 97.52%


100%|██████████| 438/438 [00:11<00:00, 39.47it/s, loss=0.0237]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 98.27%


100%|██████████| 438/438 [00:10<00:00, 40.16it/s, loss=0.0318] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.68%


100%|██████████| 438/438 [00:10<00:00, 40.91it/s, loss=0.00971]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.89%


100%|██████████| 438/438 [00:10<00:00, 40.86it/s, loss=0.00776]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.89%
color


100%|██████████| 438/438 [00:10<00:00, 40.00it/s, loss=0.102] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.62%


100%|██████████| 438/438 [00:10<00:00, 40.23it/s, loss=0.0658] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.24%


100%|██████████| 438/438 [00:11<00:00, 39.63it/s, loss=0.0092] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.78%


100%|██████████| 438/438 [00:10<00:00, 40.51it/s, loss=0.00382]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 100.00%


100%|██████████| 438/438 [00:10<00:00, 40.13it/s, loss=0.00739]


Accuracy of the network on the test images: 100.00%
fruit


100%|██████████| 2000/2000 [00:50<00:00, 39.55it/s, loss=0.00185] 
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.83%


100%|██████████| 2000/2000 [00:51<00:00, 39.12it/s, loss=0.00148] 
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.96%


100%|██████████| 2000/2000 [00:50<00:00, 39.55it/s, loss=0.0012]  
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.96%


100%|██████████| 2000/2000 [00:50<00:00, 39.51it/s, loss=0.0482]  
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.90%


100%|██████████| 2000/2000 [00:50<00:00, 39.79it/s, loss=0.000163]
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.94%


100%|██████████| 2000/2000 [00:50<00:00, 39.58it/s, loss=0.000832]
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.97%


100%|██████████| 2000/2000 [00:50<00:00, 39.78it/s, loss=0.000247]
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.92%


100%|██████████| 2000/2000 [00:50<00:00, 39.51it/s, loss=0.000248]
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.97%


100%|██████████| 2000/2000 [00:50<00:00, 39.62it/s, loss=0.000105]
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.98%


100%|██████████| 2000/2000 [00:52<00:00, 38.32it/s, loss=0.00023] 


Accuracy of the shape network on the test images: 99.99%
Computing principal space ...
Computing alpha ...
self.alpha=6.7176
Fitting with temperature scaling


  data_fit_label = FruitDataset(root=root, transform=trans,  target_transform=lambda y: torch.tensor(y[0]))
  data_fit_label = FruitDataset(root=root, transform=trans,  target_transform=lambda y: torch.tensor(y[0]))
  data_fit_color = FruitDataset(root=root, transform=trans,  target_transform=lambda y: torch.tensor(y[1]))
  data_fit_color = FruitDataset(root=root, transform=trans,  target_transform=lambda y: torch.tensor(y[1]))


Initial T/NLL: 1.000/0.006
Optimal temperature: 0.9064440131187439
NLL after scaling: 0.00'
Initial T/NLL: 1.000/0.004
Optimal temperature: 0.8041974306106567
NLL after scaling: 0.00'
self.scorer_label.t=Parameter containing:
tensor(0.9064, requires_grad=True)
self.scorer_color.t=Parameter containing:
tensor(0.8042, requires_grad=True)
Fitting with temperature scaling


  loss = nll_loss(log_softmax(logits / self.t), labels).item()
  loss = nll_loss(log_softmax(logits / self.t), labels)
  loss = nll_loss(log_softmax(logits / self.t), labels).item()
  data_fit_label = FruitDataset(root=root, transform=trans,  target_transform=lambda y: torch.tensor(y[0]))
  data_fit_label = FruitDataset(root=root, transform=trans,  target_transform=lambda y: torch.tensor(y[0]))
  data_fit_color = FruitDataset(root=root, transform=trans,  target_transform=lambda y: torch.tensor(y[1]))
  data_fit_color = FruitDataset(root=root, transform=trans,  target_transform=lambda y: torch.tensor(y[1]))


Initial T/NLL: 1.000/0.006
Optimal temperature: 0.9064440131187439
NLL after scaling: 0.00'
Initial T/NLL: 1.000/0.004
Optimal temperature: 0.8041974306106567
NLL after scaling: 0.00'
self.scorer_label.t=Parameter containing:
tensor(0.9064, requires_grad=True)
self.scorer_color.t=Parameter containing:
tensor(0.8042, requires_grad=True)
LSUNCrop
{'AUROC': 0.9923960566520691, 'AUPR-IN': 0.9986230134963989, 'AUPR-OUT': 0.959827184677124, 'FPR95TPR': 0.01995684951543808, 'Method': 'ViM', 'Dataset': 'LSUNCrop'}
LSUNResize
{'AUROC': 0.9993399381637573, 'AUPR-IN': 0.9998793005943298, 'AUPR-OUT': 0.9976972341537476, 'FPR95TPR': 0.0005393743049353361, 'Method': 'ViM', 'Dataset': 'LSUNResize'}
Textures
Found 5640 texture files.
{'AUROC': 0.9993034601211548, 'AUPR-IN': 0.9997746348381042, 'AUPR-OUT': 0.9986166954040527, 'FPR95TPR': 0.0005393743049353361, 'Method': 'ViM', 'Dataset': 'Textures'}
TinyImageNetCrop
{'AUROC': 0.9968504309654236, 'AUPR-IN': 0.9994328618049622, 'AUPR-OUT': 0.982918262481

  0%|          | 0/438 [00:00<?, ?it/s]

{'AUROC': 0.9631373286247253, 'AUPR-IN': 0.9919742941856384, 'AUPR-OUT': 0.8457326292991638, 'FPR95TPR': 0.16127292811870575, 'Method': 'MaxLogit', 'Dataset': 'TinyImageNetResize'}
label


100%|██████████| 438/438 [00:10<00:00, 40.27it/s, loss=0.0687]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 93.74%


100%|██████████| 438/438 [00:10<00:00, 40.33it/s, loss=0.0302]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 97.84%


100%|██████████| 438/438 [00:10<00:00, 41.07it/s, loss=0.0206] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.84%


100%|██████████| 438/438 [00:10<00:00, 40.00it/s, loss=0.0112] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 97.36%


100%|██████████| 438/438 [00:10<00:00, 40.15it/s, loss=0.0103] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 100.00%
color


100%|██████████| 438/438 [00:10<00:00, 39.98it/s, loss=0.0998]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 86.52%


100%|██████████| 438/438 [00:10<00:00, 39.97it/s, loss=0.00461]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.68%


100%|██████████| 438/438 [00:10<00:00, 40.26it/s, loss=0.0148] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 98.92%


100%|██████████| 438/438 [00:10<00:00, 40.20it/s, loss=0.0175] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.46%


100%|██████████| 438/438 [00:10<00:00, 40.43it/s, loss=0.0045]  


Accuracy of the network on the test images: 99.95%
fruit


100%|██████████| 2000/2000 [00:49<00:00, 40.17it/s, loss=0.00304] 
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.91%


100%|██████████| 2000/2000 [00:49<00:00, 40.02it/s, loss=0.000825]
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.94%


100%|██████████| 2000/2000 [00:50<00:00, 40.00it/s, loss=0.000311]
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.99%


100%|██████████| 2000/2000 [00:49<00:00, 40.01it/s, loss=0.000209]
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.99%


100%|██████████| 2000/2000 [00:49<00:00, 40.22it/s, loss=0.00162] 
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.94%


100%|██████████| 2000/2000 [00:50<00:00, 39.98it/s, loss=0.000164]
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.98%


100%|██████████| 2000/2000 [00:51<00:00, 39.02it/s, loss=5.53e-5] 
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.99%


100%|██████████| 2000/2000 [00:52<00:00, 37.74it/s, loss=1.74e-5] 
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.98%


100%|██████████| 2000/2000 [00:53<00:00, 37.63it/s, loss=0.000162]
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.98%


100%|██████████| 2000/2000 [00:51<00:00, 38.67it/s, loss=0.000102]


Accuracy of the shape network on the test images: 99.98%
Computing principal space ...
Computing alpha ...
self.alpha=6.8942
Fitting with temperature scaling


  data_fit_label = FruitDataset(root=root, transform=trans,  target_transform=lambda y: torch.tensor(y[0]))
  data_fit_label = FruitDataset(root=root, transform=trans,  target_transform=lambda y: torch.tensor(y[0]))
  data_fit_color = FruitDataset(root=root, transform=trans,  target_transform=lambda y: torch.tensor(y[1]))
  data_fit_color = FruitDataset(root=root, transform=trans,  target_transform=lambda y: torch.tensor(y[1]))


Initial T/NLL: 1.000/0.002
Optimal temperature: 0.9354942440986633
NLL after scaling: 0.00'
Initial T/NLL: 1.000/0.007
Optimal temperature: 0.7720860838890076
NLL after scaling: 0.00'
self.scorer_label.t=Parameter containing:
tensor(0.9355, requires_grad=True)
self.scorer_color.t=Parameter containing:
tensor(0.7721, requires_grad=True)
Fitting with temperature scaling


  loss = nll_loss(log_softmax(logits / self.t), labels).item()
  loss = nll_loss(log_softmax(logits / self.t), labels)
  loss = nll_loss(log_softmax(logits / self.t), labels).item()
  data_fit_label = FruitDataset(root=root, transform=trans,  target_transform=lambda y: torch.tensor(y[0]))
  data_fit_label = FruitDataset(root=root, transform=trans,  target_transform=lambda y: torch.tensor(y[0]))
  data_fit_color = FruitDataset(root=root, transform=trans,  target_transform=lambda y: torch.tensor(y[1]))
  data_fit_color = FruitDataset(root=root, transform=trans,  target_transform=lambda y: torch.tensor(y[1]))


Initial T/NLL: 1.000/0.002
Optimal temperature: 0.9354942440986633
NLL after scaling: 0.00'
Initial T/NLL: 1.000/0.007
Optimal temperature: 0.7720860838890076
NLL after scaling: 0.00'
self.scorer_label.t=Parameter containing:
tensor(0.9355, requires_grad=True)
self.scorer_color.t=Parameter containing:
tensor(0.7721, requires_grad=True)
LSUNCrop
{'AUROC': 0.999791145324707, 'AUPR-IN': 0.9999614953994751, 'AUPR-OUT': 0.9988881349563599, 'FPR95TPR': 0.0, 'Method': 'ViM', 'Dataset': 'LSUNCrop'}
LSUNResize
{'AUROC': 0.9999620914459229, 'AUPR-IN': 0.9999929070472717, 'AUPR-OUT': 0.9997984170913696, 'FPR95TPR': 0.0, 'Method': 'ViM', 'Dataset': 'LSUNResize'}
Textures
Found 5640 texture files.
{'AUROC': 0.9999872446060181, 'AUPR-IN': 0.999995768070221, 'AUPR-OUT': 0.9999610781669617, 'FPR95TPR': 0.0, 'Method': 'ViM', 'Dataset': 'Textures'}
TinyImageNetCrop
{'AUROC': 0.9998794198036194, 'AUPR-IN': 0.9999778270721436, 'AUPR-OUT': 0.9993565082550049, 'FPR95TPR': 0.0, 'Method': 'ViM', 'Dataset': 'T

  0%|          | 0/438 [00:00<?, ?it/s]

{'AUROC': 0.9801960587501526, 'AUPR-IN': 0.996537983417511, 'AUPR-OUT': 0.8674206733703613, 'FPR95TPR': 0.09223300963640213, 'Method': 'MaxLogit', 'Dataset': 'TinyImageNetResize'}
label


100%|██████████| 438/438 [00:11<00:00, 39.65it/s, loss=0.167] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 85.49%


100%|██████████| 438/438 [00:10<00:00, 40.17it/s, loss=0.0377]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 95.31%


100%|██████████| 438/438 [00:10<00:00, 39.90it/s, loss=0.0395] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 98.98%


100%|██████████| 438/438 [00:11<00:00, 39.47it/s, loss=0.00905]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.51%


100%|██████████| 438/438 [00:11<00:00, 38.62it/s, loss=0.0115] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 100.00%
color


100%|██████████| 438/438 [00:11<00:00, 38.21it/s, loss=0.0253]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 96.60%


100%|██████████| 438/438 [00:10<00:00, 40.22it/s, loss=0.0206] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 95.31%


100%|██████████| 438/438 [00:10<00:00, 40.03it/s, loss=0.00408]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 98.27%


100%|██████████| 438/438 [00:11<00:00, 39.52it/s, loss=0.00397]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.84%


100%|██████████| 438/438 [00:11<00:00, 39.15it/s, loss=0.0028] 


Accuracy of the network on the test images: 99.78%
fruit


100%|██████████| 2000/2000 [00:50<00:00, 39.48it/s, loss=0.00852] 
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.93%


100%|██████████| 2000/2000 [00:51<00:00, 39.09it/s, loss=0.00102] 
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.95%


100%|██████████| 2000/2000 [00:50<00:00, 39.76it/s, loss=0.00129] 
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.94%


100%|██████████| 2000/2000 [00:50<00:00, 39.91it/s, loss=0.000238]
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.97%


100%|██████████| 2000/2000 [00:50<00:00, 39.56it/s, loss=0.000631]
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.97%


100%|██████████| 2000/2000 [00:49<00:00, 40.12it/s, loss=0.000886]
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.97%


100%|██████████| 2000/2000 [00:50<00:00, 39.80it/s, loss=7.2e-5]  
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.99%


100%|██████████| 2000/2000 [00:51<00:00, 38.49it/s, loss=0.000285]
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 100.00%


100%|██████████| 2000/2000 [00:51<00:00, 39.09it/s, loss=0.000161]
  0%|          | 0/2000 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.99%


100%|██████████| 2000/2000 [00:51<00:00, 38.58it/s, loss=0.000294]


Accuracy of the shape network on the test images: 99.98%
Computing principal space ...
Computing alpha ...
self.alpha=6.8563
Fitting with temperature scaling


  data_fit_label = FruitDataset(root=root, transform=trans,  target_transform=lambda y: torch.tensor(y[0]))
  data_fit_label = FruitDataset(root=root, transform=trans,  target_transform=lambda y: torch.tensor(y[0]))
  data_fit_color = FruitDataset(root=root, transform=trans,  target_transform=lambda y: torch.tensor(y[1]))
  data_fit_color = FruitDataset(root=root, transform=trans,  target_transform=lambda y: torch.tensor(y[1]))


Initial T/NLL: 1.000/0.004
Optimal temperature: 0.9130489826202393
NLL after scaling: 0.00'
Initial T/NLL: 1.000/0.006
Optimal temperature: 0.8459239602088928
NLL after scaling: 0.00'
self.scorer_label.t=Parameter containing:
tensor(0.9130, requires_grad=True)
self.scorer_color.t=Parameter containing:
tensor(0.8459, requires_grad=True)
Fitting with temperature scaling


  loss = nll_loss(log_softmax(logits / self.t), labels).item()
  loss = nll_loss(log_softmax(logits / self.t), labels)
  loss = nll_loss(log_softmax(logits / self.t), labels).item()
  data_fit_label = FruitDataset(root=root, transform=trans,  target_transform=lambda y: torch.tensor(y[0]))
  data_fit_label = FruitDataset(root=root, transform=trans,  target_transform=lambda y: torch.tensor(y[0]))
  data_fit_color = FruitDataset(root=root, transform=trans,  target_transform=lambda y: torch.tensor(y[1]))
  data_fit_color = FruitDataset(root=root, transform=trans,  target_transform=lambda y: torch.tensor(y[1]))


Initial T/NLL: 1.000/0.004
Optimal temperature: 0.9130489826202393
NLL after scaling: 0.00'
Initial T/NLL: 1.000/0.006
Optimal temperature: 0.8459239602088928
NLL after scaling: 0.00'
self.scorer_label.t=Parameter containing:
tensor(0.9130, requires_grad=True)
self.scorer_color.t=Parameter containing:
tensor(0.8459, requires_grad=True)
LSUNCrop
{'AUROC': 0.9984092712402344, 'AUPR-IN': 0.9996959567070007, 'AUPR-OUT': 0.9953922629356384, 'FPR95TPR': 0.0026968715246766806, 'Method': 'ViM', 'Dataset': 'LSUNCrop'}
LSUNResize
{'AUROC': 0.9991886615753174, 'AUPR-IN': 0.9998494982719421, 'AUPR-OUT': 0.9999563694000244, 'FPR95TPR': 0.0016181230312213302, 'Method': 'ViM', 'Dataset': 'LSUNResize'}
Textures
Found 5640 texture files.
{'AUROC': 0.998826801776886, 'AUPR-IN': 0.9996160268783569, 'AUPR-OUT': 0.9987301230430603, 'FPR95TPR': 0.0016181230312213302, 'Method': 'ViM', 'Dataset': 'Textures'}
TinyImageNetCrop
{'AUROC': 0.9990221261978149, 'AUPR-IN': 0.9998140335083008, 'AUPR-OUT': 0.9988125562

  0%|          | 0/438 [00:00<?, ?it/s]

{'AUROC': 0.9709396958351135, 'AUPR-IN': 0.9947396516799927, 'AUPR-OUT': 0.839903712272644, 'FPR95TPR': 0.19525350630283356, 'Method': 'MaxLogit', 'Dataset': 'TinyImageNetResize'}
label


100%|██████████| 438/438 [00:10<00:00, 40.01it/s, loss=0.0992]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 83.76%


100%|██████████| 438/438 [00:10<00:00, 40.83it/s, loss=0.044] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 97.52%


100%|██████████| 438/438 [00:10<00:00, 40.45it/s, loss=0.0135] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 100.00%


100%|██████████| 438/438 [00:10<00:00, 40.74it/s, loss=0.0154] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.24%


100%|██████████| 438/438 [00:11<00:00, 39.75it/s, loss=0.0274] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.95%
color


100%|██████████| 438/438 [00:11<00:00, 39.67it/s, loss=0.0954]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 98.54%


100%|██████████| 438/438 [00:11<00:00, 39.10it/s, loss=0.0391] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.03%


100%|██████████| 438/438 [00:11<00:00, 39.73it/s, loss=0.00303]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.78%


100%|██████████| 438/438 [00:11<00:00, 39.67it/s, loss=0.00196]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 100.00%


 11%|█▏        | 50/438 [00:01<00:09, 40.16it/s, loss=0.00213]

In [None]:
import pandas as pd
result_df = pd.DataFrame(results)
# print((result_df.groupby(by="Method").agg(["mean", "sem"]) * 100)[["AUROC", "AUPR-IN", "AUPR-OUT", "FPR95TPR"]].to_latex(float_format="%.2f"))

In [None]:
# s = (result_df.groupby(by="Method").agg(["mean", "sem"]) * 100)[["AUROC", "AUPR-IN", "AUPR-OUT", "FPR95TPR"]].to_latex(float_format="%.2f")

In [None]:
order = ['MSP', 'Energy', 'MaxLogit', 'Entropy', 'ReAct', 'Mahalanobis', 'ViM', 'Ensemble', 'Logic', 'Logic+', 'LogicOOD', 'LogicOOD+', 'LogicOODT', 'LogicOODT+']


print((result_df.groupby(by=["Method", "Seed"]).mean() * 100).groupby("Method").agg(["mean", "sem"]).reindex(order).to_latex(float_format="%.2f").replace("& 0.", "& $\pm$ 0.").replace("& 2.", "& $\pm$ 2.").replace("& 3.", "& $\pm$ 3.").replace("& 1.", "& $\pm$ 1.").replace("& 4.", "& $\pm$ 4.").replace("& 5.", "& $\pm$ 5."))


# print(s.replace("& 0.", "& \pm 0.").replace("& 1.", "& \pm 1.").replace("& 2.", "& \pm 2.").replace("& 4.", "& \pm 4."))