In [1]:
from torch.utils.data import Dataset, random_split
import os 
from os.path import join 
from PIL import Image
from tqdm import tqdm 
import torch 

device = "cuda:0"
root = "/home/ki/datasets/"

from detector import label_to_name, color_to_name

class FruitDataset(Dataset):
    """
    
    """

    class_color_map = {
        "Apple Braeburn": "red",
        "Apple Granny Smith": "green",
        "Apricot": "orange",
        "Avocado": "green",
        "Banana": "yellow",
        "Blueberry": "black",
        "Cactus fruit": "green",
        "Cantaloupe": "yellow",
        "Cherry": "red",
        "Clementine": "orange",
        "Corn": "yellow",
        "Cucumber Ripe": "brown",
        "Grape Blue": "black",
        "Kiwi": "brown",
        "Lemon": "yellow",
        "Limes": "green",
        "Mango": "green",
        "Onion White": "brown",
        "Orange": "orange",
        "Papaya": "green",
        "Passion Fruit": "black",
        "Peach": "orange",
        "Pear": "green", # ??
        "Pepper Green": "green",
        "Pepper Red": "red",
        "Pineapple": "brown",
        "Plum": "red",
        "Pomegranate": "red",
        "Potato Red": "brown",
        "Raspberry": "red",
        "Strawberry": "red",
        "Tomato": "red",
        "Watermelon": "red" 
    }
    
    def __init__(self, root="train", transform=None, target_transform=None):
        root = join(root, "fruits", "train", "train")

        self.classes = os.listdir(root)
        self.files = []
        self.labels = []
        self.colors = []
        
        self.transform = transform
        self.target_transform = target_transform 
        
        for c in self.classes:
            fs = [join(root, c, f) for f in os.listdir(join(root, c))]
            self.files += fs
            self.labels += [c.lower().replace(" ", "_")] * len(fs)
            self.colors += [self.class_color_map[c]] * len(fs)

        self.class_map = {c: n for n, c in enumerate(label_to_name)}
        self.color_map = {c: n for n, c in enumerate(color_to_name)}
        
    def __len__(self):
        return len(self.files)

    def __getitem__(self, index):
        img = self.files[index]
        y = self.class_map[self.labels[index]]
        color = self.color_map[self.colors[index]]
        
        img = Image.open(img)

        if self.transform is not None:
            img = self.transform(img)

        y = torch.tensor([y, color]) 
        if self.target_transform is not None:
            y = self.target_transform(y)
        
        return img, y 


In [2]:
ds = FruitDataset(root=root)

In [3]:
from pytorch_ood.utils import ToRGB
from torchvision.transforms import ToTensor, Resize, Compose
import torch 
from torch.utils.data import DataLoader
import numpy as np


trans = Compose([ToRGB(), ToTensor(), Resize((32, 32), antialias=True)])

data = FruitDataset(root=root, transform=trans)
train_data, test_data = random_split(data, [14000,2854], generator=torch.Generator().manual_seed(0))

train_loader = DataLoader(train_data, batch_size=32, shuffle=True, num_workers=2)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False, num_workers=2)

In [4]:
from torch import nn
from pytorch_ood.model import WideResNet

# def override 
def Model(num_classes=None, *args, **kwargs):
    model = WideResNet(*args, num_classes=1000, pretrained="imagenet32", **kwargs)
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    return model

In [5]:
from torch.optim import SGD


def train_model(att_index, num_classes):
    """
    train a model for the given attribute index 
    """
    model = Model(num_classes=num_classes).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9, nesterov=True)

    for epoch in range(5):
        running_loss = 0.0
        model.train()
        bar = tqdm(train_loader)
        for inputs, y in bar:
            labels = y[:, att_index]
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss = 0.8 * running_loss + 0.2 * loss.item()
            bar.set_postfix({"loss": running_loss})

        correct = 0
        total = 0

        with torch.no_grad():
            model.eval()

            for inputs, y in test_loader:
                labels = y[:, att_index]
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                _, predicted = torch.max(outputs.data, dim=1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print(f'Accuracy of the network on the test images: {correct / total:.2%}')

    return model

In [6]:
from pytorch_ood.dataset.img import TinyImages300k
from pytorch_ood.utils import is_known

def train_fruit_model():
    tiny = TinyImages300k(root=root, download=True, transform=trans, target_transform=ToUnknown())
    data_train_out, data_test_out, _ = random_split(tiny, [50000, 10000, 240000], generator=torch.Generator().manual_seed(123))

    data_noatt = FruitDataset(root=root, transform=trans, target_transform=lambda y: int(y[0]))
    train_data_noatt, test_data_noatt = random_split(data_noatt, [14000,2854], generator=torch.Generator().manual_seed(0))

    new_loader = DataLoader(train_data_noatt + data_train_out, batch_size=32, shuffle=True, num_workers=10)
    new_test_loader = DataLoader(test_data_noatt + data_test_out, batch_size=32, shuffle=False, num_workers=10)

    model = Model(num_classes=2).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9, nesterov=True)

    accs = []

    for epoch in range(1):
        running_loss = 0.0
        model.train()

        bar = tqdm(new_loader)
        for inputs, y in bar:
            labels = is_known(y).long()
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss = 0.8 * running_loss + 0.2 * loss.item()
            bar.set_postfix({"loss": running_loss})

        correct = 0
        total = 0

        with torch.no_grad():
            model.eval()

            for inputs, y in new_test_loader:
                labels = is_known(y).long()
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print(f'Accuracy of the shape network on the test images: {correct / total:.2%}')
        accs.append(correct / total)

    return model

In [7]:
from pytorch_ood.dataset.img import (LSUNCrop, LSUNResize, Textures, TinyImageNetCrop, TinyImageNetResize)
from pytorch_ood.detector import EnergyBased, MaxSoftmax, MaxLogit, Entropy, Mahalanobis, ViM
from pytorch_ood.utils import OODMetrics, ToUnknown
from detector import EnsembleDetector, PrologOOD, Prologic

def evaluate(label_net, color_net, fruit_net):
    _ = label_net.eval()
    _ = color_net.eval()
    
    results = []

    detectors = {
        "ViM": ViM(label_net.features, w=label_net.fc.weight, b=label_net.fc.bias, d=64),
        "Mahalanobis": Mahalanobis(label_net.features),
        "Entropy": Entropy(label_net),
        "LogicOOD+": PrologOOD("kb.pl", label_net, color_net, fruit_net),
        "Logic": Prologic("kb.pl", label_net, color_net),
        "Logic+": Prologic("kb.pl", label_net, color_net, fruit_net),
        "LogicOOD": PrologOOD("kb.pl", label_net, color_net),
        "Ensemble": EnsembleDetector(label_net, color_net),
        "MSP": MaxSoftmax(label_net),
        "Energy": EnergyBased(label_net),
        "MaxLogit": MaxLogit(label_net),
    }



    data = FruitDataset(root=root, transform=trans, target_transform=lambda y: int(y[0]))
    data_in_train, data_in = random_split(data, [14000,2854], generator=torch.Generator().manual_seed(0))
    train_in_loader = DataLoader(data_in_train, batch_size=32, shuffle=False, num_workers=2)

    detectors["ViM"].fit(train_in_loader, device=device)
    detectors["Mahalanobis"].fit(train_in_loader, device=device)

    datasets = {d.__name__: d for d in (LSUNCrop, LSUNResize, Textures, TinyImageNetCrop, TinyImageNetResize)}
    
    for detector_name, detector in detectors.items():
        for data_name, dataset_c in datasets.items():
            print(data_name)
            data_out = dataset_c(root=root, transform=trans, target_transform=ToUnknown(), download=True)
            loader = DataLoader(data_in+data_out, batch_size=256, shuffle=False, num_workers=12)
            
            scores = []
            ys = []
            
            with torch.no_grad():
                for x, y in loader:
                    scores.append(detector(x.to(device)))
                    ys.append(y.to(device))
                    
                scores = torch.cat(scores, dim=0).cpu()
                ys = torch.cat(ys, dim=0).cpu()
            
            metrics = OODMetrics()
            metrics.update(scores, ys)
            r = metrics.compute()
            r.update({
                "Method": detector_name,
                "Dataset": data_name
            })
            print(r)
            results.append(r)
    
    return results 

In [8]:
results = []

for trial in range(10):
    print("label")
    label_net = train_model(att_index=0, num_classes=33)
    print("color")
    color_net = train_model(att_index=1, num_classes=6)
    print("fruit")

    fruit_net = train_fruit_model()

    res = evaluate(label_net, color_net, fruit_net)
    
    for r in res:
        r.update({"Seed": trial})
    
    results += res

label


100%|██████████| 438/438 [00:13<00:00, 33.24it/s, loss=0.047] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 98.60%


100%|██████████| 438/438 [00:12<00:00, 34.39it/s, loss=0.0215]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.89%


100%|██████████| 438/438 [00:12<00:00, 35.11it/s, loss=0.0334] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.96%


100%|██████████| 438/438 [00:11<00:00, 37.80it/s, loss=0.0119] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.82%


100%|██████████| 438/438 [00:11<00:00, 36.86it/s, loss=0.0168] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.96%
color


100%|██████████| 438/438 [00:11<00:00, 37.29it/s, loss=0.13]  
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.79%


100%|██████████| 438/438 [00:11<00:00, 36.97it/s, loss=0.0234] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.89%


100%|██████████| 438/438 [00:11<00:00, 36.84it/s, loss=0.00893]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.58%


100%|██████████| 438/438 [00:11<00:00, 36.92it/s, loss=0.00353]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.93%


100%|██████████| 438/438 [00:11<00:00, 36.60it/s, loss=0.0255] 


Accuracy of the network on the test images: 100.00%
fruit


100%|██████████| 2000/2000 [00:54<00:00, 36.82it/s, loss=0.00241] 


Accuracy of the shape network on the test images: 99.86%
LSUNCrop
{'AUROC': 0.997471034526825, 'AUPR-IN': 0.9992936849594116, 'AUPR-OUT': 0.9914761781692505, 'FPR95TPR': 0.002803083276376128, 'Method': 'ViM', 'Dataset': 'LSUNCrop'}
LSUNResize
{'AUROC': 0.9997084736824036, 'AUPR-IN': 0.9999164938926697, 'AUPR-OUT': 0.9995827674865723, 'FPR95TPR': 0.000350385409547016, 'Method': 'ViM', 'Dataset': 'LSUNResize'}
Textures
{'AUROC': 0.9997475147247314, 'AUPR-IN': 0.9998723268508911, 'AUPR-OUT': 0.9998408555984497, 'FPR95TPR': 0.000350385409547016, 'Method': 'ViM', 'Dataset': 'Textures'}
TinyImageNetCrop
{'AUROC': 0.9986088871955872, 'AUPR-IN': 0.9996112585067749, 'AUPR-OUT': 0.9955292344093323, 'FPR95TPR': 0.000350385409547016, 'Method': 'ViM', 'Dataset': 'TinyImageNetCrop'}
TinyImageNetResize
{'AUROC': 0.9994780421257019, 'AUPR-IN': 0.9998520612716675, 'AUPR-OUT': 0.9987319111824036, 'FPR95TPR': 0.000350385409547016, 'Method': 'ViM', 'Dataset': 'TinyImageNetResize'}
LSUNCrop
{'AUROC': 0.995

  0%|          | 0/438 [00:00<?, ?it/s]

{'AUROC': 0.9887897372245789, 'AUPR-IN': 0.9970400929450989, 'AUPR-OUT': 0.9531997442245483, 'FPR95TPR': 0.021373510360717773, 'Method': 'MaxLogit', 'Dataset': 'TinyImageNetResize'}
label


100%|██████████| 438/438 [00:12<00:00, 35.86it/s, loss=0.0475]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 96.50%


100%|██████████| 438/438 [00:11<00:00, 36.50it/s, loss=0.0251]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 96.53%


100%|██████████| 438/438 [00:11<00:00, 36.67it/s, loss=0.0196]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 96.99%


100%|██████████| 438/438 [00:12<00:00, 35.76it/s, loss=0.032]  
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 98.39%


100%|██████████| 438/438 [00:12<00:00, 35.40it/s, loss=0.00517]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.86%
color


100%|██████████| 438/438 [00:12<00:00, 36.20it/s, loss=0.0579]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 93.73%


100%|██████████| 438/438 [00:12<00:00, 36.31it/s, loss=0.00881]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 98.77%


100%|██████████| 438/438 [00:12<00:00, 36.37it/s, loss=0.00407]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.96%


100%|██████████| 438/438 [00:12<00:00, 36.40it/s, loss=0.00918] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 100.00%


100%|██████████| 438/438 [00:12<00:00, 36.09it/s, loss=0.00224] 


Accuracy of the network on the test images: 100.00%
fruit


100%|██████████| 2000/2000 [00:55<00:00, 35.94it/s, loss=0.00278] 


Accuracy of the shape network on the test images: 99.74%
LSUNCrop
{'AUROC': 0.9791744947433472, 'AUPR-IN': 0.9945045113563538, 'AUPR-OUT': 0.9159132242202759, 'FPR95TPR': 0.12508758902549744, 'Method': 'ViM', 'Dataset': 'LSUNCrop'}
LSUNResize
{'AUROC': 0.9998419284820557, 'AUPR-IN': 0.999958872795105, 'AUPR-OUT': 0.9992638826370239, 'FPR95TPR': 0.0, 'Method': 'ViM', 'Dataset': 'LSUNResize'}
Textures
{'AUROC': 0.994813084602356, 'AUPR-IN': 0.9977324604988098, 'AUPR-OUT': 0.9872773289680481, 'FPR95TPR': 0.0, 'Method': 'ViM', 'Dataset': 'Textures'}
TinyImageNetCrop
{'AUROC': 0.9917891621589661, 'AUPR-IN': 0.9978598356246948, 'AUPR-OUT': 0.9641873836517334, 'FPR95TPR': 0.008759635500609875, 'Method': 'ViM', 'Dataset': 'TinyImageNetCrop'}
TinyImageNetResize
{'AUROC': 0.9996896386146545, 'AUPR-IN': 0.999914288520813, 'AUPR-OUT': 0.9988460540771484, 'FPR95TPR': 0.0, 'Method': 'ViM', 'Dataset': 'TinyImageNetResize'}
LSUNCrop
{'AUROC': 0.9691423177719116, 'AUPR-IN': 0.9918456077575684, 'AUPR-OU

  0%|          | 0/438 [00:00<?, ?it/s]

{'AUROC': 0.9962979555130005, 'AUPR-IN': 0.9989663362503052, 'AUPR-OUT': 0.9869139790534973, 'FPR95TPR': 0.009810792282223701, 'Method': 'MaxLogit', 'Dataset': 'TinyImageNetResize'}
label


100%|██████████| 438/438 [00:12<00:00, 36.05it/s, loss=0.0762]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 95.66%


100%|██████████| 438/438 [00:12<00:00, 36.24it/s, loss=0.0331]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 98.39%


100%|██████████| 438/438 [00:12<00:00, 36.20it/s, loss=0.0159] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 98.98%


100%|██████████| 438/438 [00:12<00:00, 36.12it/s, loss=0.0256] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.40%


100%|██████████| 438/438 [00:12<00:00, 35.31it/s, loss=0.01]   
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 100.00%
color


100%|██████████| 438/438 [00:12<00:00, 36.15it/s, loss=0.0478]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 98.32%


100%|██████████| 438/438 [00:12<00:00, 35.49it/s, loss=0.0177] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.16%


100%|██████████| 438/438 [00:12<00:00, 35.58it/s, loss=0.0101] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.96%


100%|██████████| 438/438 [00:12<00:00, 36.24it/s, loss=0.104]  
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.89%


100%|██████████| 438/438 [00:12<00:00, 36.21it/s, loss=0.016]  


Accuracy of the network on the test images: 100.00%
fruit


100%|██████████| 2000/2000 [00:55<00:00, 35.91it/s, loss=0.00187] 


Accuracy of the shape network on the test images: 99.92%
LSUNCrop
{'AUROC': 0.9999201893806458, 'AUPR-IN': 0.9999774098396301, 'AUPR-OUT': 0.9997282028198242, 'FPR95TPR': 0.0, 'Method': 'ViM', 'Dataset': 'LSUNCrop'}
LSUNResize
{'AUROC': 0.9999998807907104, 'AUPR-IN': 0.9999999403953552, 'AUPR-OUT': 0.9999996423721313, 'FPR95TPR': 0.0, 'Method': 'ViM', 'Dataset': 'LSUNResize'}
Textures
{'AUROC': 0.9999940395355225, 'AUPR-IN': 0.999997079372406, 'AUPR-OUT': 0.9999886155128479, 'FPR95TPR': 0.0, 'Method': 'ViM', 'Dataset': 'Textures'}
TinyImageNetCrop
{'AUROC': 0.9999749660491943, 'AUPR-IN': 0.9999929070472717, 'AUPR-OUT': 0.9999142289161682, 'FPR95TPR': 0.0, 'Method': 'ViM', 'Dataset': 'TinyImageNetCrop'}
TinyImageNetResize
{'AUROC': 0.9999989867210388, 'AUPR-IN': 0.9999995827674866, 'AUPR-OUT': 0.9999969005584717, 'FPR95TPR': 0.0, 'Method': 'ViM', 'Dataset': 'TinyImageNetResize'}
LSUNCrop
{'AUROC': 0.9999001622200012, 'AUPR-IN': 0.9999716877937317, 'AUPR-OUT': 0.999648928642273, 'FPR95TP

  0%|          | 0/438 [00:00<?, ?it/s]

{'AUROC': 0.9948320984840393, 'AUPR-IN': 0.998527467250824, 'AUPR-OUT': 0.9824684262275696, 'FPR95TPR': 0.011913103982806206, 'Method': 'MaxLogit', 'Dataset': 'TinyImageNetResize'}
label


100%|██████████| 438/438 [00:11<00:00, 36.67it/s, loss=0.0618]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 96.67%


100%|██████████| 438/438 [00:12<00:00, 36.17it/s, loss=0.0211]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.12%


100%|██████████| 438/438 [00:12<00:00, 36.01it/s, loss=0.00945]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.40%


100%|██████████| 438/438 [00:12<00:00, 36.00it/s, loss=0.0119] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.79%


100%|██████████| 438/438 [00:12<00:00, 35.81it/s, loss=0.0108] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.96%
color


100%|██████████| 438/438 [00:12<00:00, 35.27it/s, loss=0.0175]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 98.74%


100%|██████████| 438/438 [00:12<00:00, 35.88it/s, loss=0.129]  
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 98.39%


100%|██████████| 438/438 [00:12<00:00, 35.85it/s, loss=0.0261] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.86%


100%|██████████| 438/438 [00:12<00:00, 36.09it/s, loss=0.0401] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.75%


100%|██████████| 438/438 [00:12<00:00, 35.70it/s, loss=0.0077] 


Accuracy of the network on the test images: 100.00%
fruit


100%|██████████| 2000/2000 [00:55<00:00, 35.75it/s, loss=0.00386] 


Accuracy of the shape network on the test images: 99.90%
LSUNCrop
{'AUROC': 0.9999932050704956, 'AUPR-IN': 0.9999979138374329, 'AUPR-OUT': 0.9999763369560242, 'FPR95TPR': 0.0, 'Method': 'ViM', 'Dataset': 'LSUNCrop'}
LSUNResize
{'AUROC': 0.9999998807907104, 'AUPR-IN': 0.9999999403953552, 'AUPR-OUT': 0.9999996423721313, 'FPR95TPR': 0.0, 'Method': 'ViM', 'Dataset': 'LSUNResize'}
Textures
{'AUROC': 0.9999988675117493, 'AUPR-IN': 0.9999993443489075, 'AUPR-OUT': 0.9999977350234985, 'FPR95TPR': 0.0, 'Method': 'ViM', 'Dataset': 'Textures'}
TinyImageNetCrop
{'AUROC': 0.9999927282333374, 'AUPR-IN': 0.9999979138374329, 'AUPR-OUT': 0.9999746084213257, 'FPR95TPR': 0.0, 'Method': 'ViM', 'Dataset': 'TinyImageNetCrop'}
TinyImageNetResize
{'AUROC': 0.9999996423721313, 'AUPR-IN': 0.9999998807907104, 'AUPR-OUT': 0.9999985694885254, 'FPR95TPR': 0.0, 'Method': 'ViM', 'Dataset': 'TinyImageNetResize'}
LSUNCrop
{'AUROC': 0.9999626874923706, 'AUPR-IN': 0.9999893307685852, 'AUPR-OUT': 0.9998713731765747, 'FPR95

  0%|          | 0/438 [00:00<?, ?it/s]

{'AUROC': 0.9756123423576355, 'AUPR-IN': 0.9932126998901367, 'AUPR-OUT': 0.9068678617477417, 'FPR95TPR': 0.10476524382829666, 'Method': 'MaxLogit', 'Dataset': 'TinyImageNetResize'}
label


100%|██████████| 438/438 [00:12<00:00, 36.30it/s, loss=0.0448]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 97.48%


100%|██████████| 438/438 [00:12<00:00, 36.35it/s, loss=0.055] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 97.34%


100%|██████████| 438/438 [00:12<00:00, 36.23it/s, loss=0.0201] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 100.00%


100%|██████████| 438/438 [00:12<00:00, 36.28it/s, loss=0.016]  
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.96%


100%|██████████| 438/438 [00:12<00:00, 35.59it/s, loss=0.00694]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 100.00%
color


100%|██████████| 438/438 [00:12<00:00, 35.62it/s, loss=0.0268]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.96%


100%|██████████| 438/438 [00:12<00:00, 36.03it/s, loss=0.022]  
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 100.00%


100%|██████████| 438/438 [00:12<00:00, 36.22it/s, loss=0.00701]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 100.00%


100%|██████████| 438/438 [00:12<00:00, 35.60it/s, loss=0.00276]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 100.00%


100%|██████████| 438/438 [00:12<00:00, 35.84it/s, loss=0.00823] 


Accuracy of the network on the test images: 100.00%
fruit


100%|██████████| 2000/2000 [00:55<00:00, 35.95it/s, loss=0.00226] 


Accuracy of the shape network on the test images: 99.84%
LSUNCrop
{'AUROC': 0.9996182918548584, 'AUPR-IN': 0.9998903870582581, 'AUPR-OUT': 0.999853789806366, 'FPR95TPR': 0.000700770819094032, 'Method': 'ViM', 'Dataset': 'LSUNCrop'}
LSUNResize
{'AUROC': 0.9996490478515625, 'AUPR-IN': 0.9998998045921326, 'AUPR-OUT': 0.9999904632568359, 'FPR95TPR': 0.000700770819094032, 'Method': 'ViM', 'Dataset': 'LSUNResize'}
Textures
{'AUROC': 0.9996474981307983, 'AUPR-IN': 0.9998213648796082, 'AUPR-OUT': 0.9999922513961792, 'FPR95TPR': 0.000700770819094032, 'Method': 'ViM', 'Dataset': 'Textures'}
TinyImageNetCrop
{'AUROC': 0.9996339678764343, 'AUPR-IN': 0.9998946785926819, 'AUPR-OUT': 0.9999181628227234, 'FPR95TPR': 0.000700770819094032, 'Method': 'ViM', 'Dataset': 'TinyImageNetCrop'}
TinyImageNetResize
{'AUROC': 0.9996487498283386, 'AUPR-IN': 0.9998996257781982, 'AUPR-OUT': 0.9999912977218628, 'FPR95TPR': 0.000700770819094032, 'Method': 'ViM', 'Dataset': 'TinyImageNetResize'}
LSUNCrop
{'AUROC': 0.999

  0%|          | 0/438 [00:00<?, ?it/s]

{'AUROC': 0.9916251301765442, 'AUPR-IN': 0.9977561235427856, 'AUPR-OUT': 0.9658426642417908, 'FPR95TPR': 0.021373510360717773, 'Method': 'MaxLogit', 'Dataset': 'TinyImageNetResize'}
label


100%|██████████| 438/438 [00:12<00:00, 36.44it/s, loss=0.045] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 91.70%


100%|██████████| 438/438 [00:12<00:00, 36.46it/s, loss=0.0208]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 98.88%


100%|██████████| 438/438 [00:12<00:00, 35.96it/s, loss=0.0143] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.72%


100%|██████████| 438/438 [00:12<00:00, 36.04it/s, loss=0.0103] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 98.35%


100%|██████████| 438/438 [00:12<00:00, 36.07it/s, loss=0.0485] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 98.49%
color


100%|██████████| 438/438 [00:12<00:00, 35.89it/s, loss=0.0753] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 96.74%


100%|██████████| 438/438 [00:12<00:00, 36.08it/s, loss=0.0118] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.23%


100%|██████████| 438/438 [00:12<00:00, 35.93it/s, loss=0.0166] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 100.00%


100%|██████████| 438/438 [00:12<00:00, 35.97it/s, loss=0.00244]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.93%


100%|██████████| 438/438 [00:12<00:00, 36.11it/s, loss=0.00232] 


Accuracy of the network on the test images: 100.00%
fruit


100%|██████████| 2000/2000 [00:55<00:00, 35.96it/s, loss=0.00261] 


Accuracy of the shape network on the test images: 99.96%
LSUNCrop
{'AUROC': 0.989605724811554, 'AUPR-IN': 0.9971250295639038, 'AUPR-OUT': 0.9637423753738403, 'FPR95TPR': 0.04765241593122482, 'Method': 'ViM', 'Dataset': 'LSUNCrop'}
LSUNResize
{'AUROC': 0.9997286796569824, 'AUPR-IN': 0.9999234676361084, 'AUPR-OUT': 0.9996294379234314, 'FPR95TPR': 0.000350385409547016, 'Method': 'ViM', 'Dataset': 'LSUNResize'}
Textures
{'AUROC': 0.9974644184112549, 'AUPR-IN': 0.9988363981246948, 'AUPR-OUT': 0.9944780468940735, 'FPR95TPR': 0.000350385409547016, 'Method': 'ViM', 'Dataset': 'Textures'}
TinyImageNetCrop
{'AUROC': 0.9961258172988892, 'AUPR-IN': 0.9989482164382935, 'AUPR-OUT': 0.9855588674545288, 'FPR95TPR': 0.002803083276376128, 'Method': 'ViM', 'Dataset': 'TinyImageNetCrop'}
TinyImageNetResize
{'AUROC': 0.9994083642959595, 'AUPR-IN': 0.9998375177383423, 'AUPR-OUT': 0.9983282685279846, 'FPR95TPR': 0.000350385409547016, 'Method': 'ViM', 'Dataset': 'TinyImageNetResize'}
LSUNCrop
{'AUROC': 0.9747

  0%|          | 0/438 [00:00<?, ?it/s]

{'AUROC': 0.981615424156189, 'AUPR-IN': 0.994752824306488, 'AUPR-OUT': 0.9400652647018433, 'FPR95TPR': 0.09950946271419525, 'Method': 'MaxLogit', 'Dataset': 'TinyImageNetResize'}
label


100%|██████████| 438/438 [00:12<00:00, 36.25it/s, loss=0.0664]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 96.74%


100%|██████████| 438/438 [00:12<00:00, 36.12it/s, loss=0.0219]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 97.62%


100%|██████████| 438/438 [00:12<00:00, 36.13it/s, loss=0.105]  
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 95.62%


100%|██████████| 438/438 [00:12<00:00, 35.85it/s, loss=0.00919]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.82%


100%|██████████| 438/438 [00:12<00:00, 36.11it/s, loss=0.0117] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.89%
color


100%|██████████| 438/438 [00:12<00:00, 36.05it/s, loss=0.0364]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 93.69%


100%|██████████| 438/438 [00:12<00:00, 35.95it/s, loss=0.0322] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 96.57%


100%|██████████| 438/438 [00:12<00:00, 36.22it/s, loss=0.00355]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.54%


100%|██████████| 438/438 [00:12<00:00, 35.64it/s, loss=0.0103]  
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.23%


100%|██████████| 438/438 [00:12<00:00, 35.90it/s, loss=0.00543] 


Accuracy of the network on the test images: 99.37%
fruit


100%|██████████| 2000/2000 [00:55<00:00, 36.09it/s, loss=0.00172] 


Accuracy of the shape network on the test images: 99.91%
LSUNCrop
{'AUROC': 0.9998009204864502, 'AUPR-IN': 0.9999424815177917, 'AUPR-OUT': 0.9998828172683716, 'FPR95TPR': 0.000350385409547016, 'Method': 'ViM', 'Dataset': 'LSUNCrop'}
LSUNResize
{'AUROC': 0.9998244047164917, 'AUPR-IN': 0.9999498128890991, 'AUPR-OUT': 0.999996542930603, 'FPR95TPR': 0.000350385409547016, 'Method': 'ViM', 'Dataset': 'LSUNResize'}
Textures
{'AUROC': 0.99982088804245, 'AUPR-IN': 0.9999090433120728, 'AUPR-OUT': 0.9999889731407166, 'FPR95TPR': 0.000350385409547016, 'Method': 'ViM', 'Dataset': 'Textures'}
TinyImageNetCrop
{'AUROC': 0.999807596206665, 'AUPR-IN': 0.9999443888664246, 'AUPR-OUT': 0.9999136924743652, 'FPR95TPR': 0.000350385409547016, 'Method': 'ViM', 'Dataset': 'TinyImageNetCrop'}
TinyImageNetResize
{'AUROC': 0.999823272228241, 'AUPR-IN': 0.9999493360519409, 'AUPR-OUT': 0.9999878406524658, 'FPR95TPR': 0.000350385409547016, 'Method': 'ViM', 'Dataset': 'TinyImageNetResize'}
LSUNCrop
{'AUROC': 0.9998669

  0%|          | 0/438 [00:00<?, ?it/s]

{'AUROC': 0.9940197467803955, 'AUPR-IN': 0.9983851313591003, 'AUPR-OUT': 0.9734114408493042, 'FPR95TPR': 0.015066573396325111, 'Method': 'MaxLogit', 'Dataset': 'TinyImageNetResize'}
label


100%|██████████| 438/438 [00:12<00:00, 36.18it/s, loss=0.0677]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 96.71%


100%|██████████| 438/438 [00:11<00:00, 36.62it/s, loss=0.018] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 97.83%


100%|██████████| 438/438 [00:12<00:00, 36.19it/s, loss=0.0118] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.96%


100%|██████████| 438/438 [00:12<00:00, 36.14it/s, loss=0.0142] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.96%


100%|██████████| 438/438 [00:11<00:00, 36.59it/s, loss=0.00883]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.96%
color


100%|██████████| 438/438 [00:12<00:00, 36.30it/s, loss=0.0264]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 94.85%


100%|██████████| 438/438 [00:12<00:00, 35.90it/s, loss=0.00868]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 100.00%


100%|██████████| 438/438 [00:12<00:00, 36.02it/s, loss=0.00359]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 100.00%


100%|██████████| 438/438 [00:12<00:00, 35.78it/s, loss=0.0025] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 100.00%


100%|██████████| 438/438 [00:12<00:00, 36.36it/s, loss=0.0236] 


Accuracy of the network on the test images: 100.00%
fruit


100%|██████████| 2000/2000 [00:55<00:00, 36.12it/s, loss=0.00119] 


Accuracy of the shape network on the test images: 99.84%
LSUNCrop
{'AUROC': 0.9958398342132568, 'AUPR-IN': 0.9988441467285156, 'AUPR-OUT': 0.9858628511428833, 'FPR95TPR': 0.009810792282223701, 'Method': 'ViM', 'Dataset': 'LSUNCrop'}
LSUNResize
{'AUROC': 0.9991551637649536, 'AUPR-IN': 0.9997578859329224, 'AUPR-OUT': 0.9982118010520935, 'FPR95TPR': 0.000700770819094032, 'Method': 'ViM', 'Dataset': 'LSUNResize'}
Textures
{'AUROC': 0.999501645565033, 'AUPR-IN': 0.9997488260269165, 'AUPR-OUT': 0.999689519405365, 'FPR95TPR': 0.000700770819094032, 'Method': 'ViM', 'Dataset': 'Textures'}
TinyImageNetCrop
{'AUROC': 0.996726930141449, 'AUPR-IN': 0.9990889430046082, 'AUPR-OUT': 0.9891442656517029, 'FPR95TPR': 0.003503854153677821, 'Method': 'ViM', 'Dataset': 'TinyImageNetCrop'}
TinyImageNetResize
{'AUROC': 0.998699426651001, 'AUPR-IN': 0.9996343851089478, 'AUPR-OUT': 0.996394157409668, 'FPR95TPR': 0.000700770819094032, 'Method': 'ViM', 'Dataset': 'TinyImageNetResize'}
LSUNCrop
{'AUROC': 0.9935744

  0%|          | 0/438 [00:00<?, ?it/s]

{'AUROC': 0.9858701229095459, 'AUPR-IN': 0.9962455034255981, 'AUPR-OUT': 0.9419997334480286, 'FPR95TPR': 0.06482130289077759, 'Method': 'MaxLogit', 'Dataset': 'TinyImageNetResize'}
label


100%|██████████| 438/438 [00:12<00:00, 36.44it/s, loss=0.0752]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 96.15%


100%|██████████| 438/438 [00:12<00:00, 36.27it/s, loss=0.0321]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.26%


100%|██████████| 438/438 [00:11<00:00, 36.67it/s, loss=0.00998]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 100.00%


100%|██████████| 438/438 [00:12<00:00, 36.27it/s, loss=0.0117] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 100.00%


100%|██████████| 438/438 [00:12<00:00, 35.77it/s, loss=0.0138] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.96%
color


100%|██████████| 438/438 [00:12<00:00, 35.44it/s, loss=0.0193]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 97.27%


100%|██████████| 438/438 [00:12<00:00, 36.16it/s, loss=0.0155] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 100.00%


100%|██████████| 438/438 [00:12<00:00, 36.27it/s, loss=0.0214] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 97.76%


100%|██████████| 438/438 [00:12<00:00, 36.26it/s, loss=0.00368]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 100.00%


100%|██████████| 438/438 [00:12<00:00, 35.74it/s, loss=0.00812]


Accuracy of the network on the test images: 99.72%
fruit


100%|██████████| 2000/2000 [00:55<00:00, 36.05it/s, loss=0.00416] 


Accuracy of the shape network on the test images: 99.84%
LSUNCrop
{'AUROC': 0.998859703540802, 'AUPR-IN': 0.9996744394302368, 'AUPR-OUT': 0.997124195098877, 'FPR95TPR': 0.000700770819094032, 'Method': 'ViM', 'Dataset': 'LSUNCrop'}
LSUNResize
{'AUROC': 0.99961256980896, 'AUPR-IN': 0.9998888373374939, 'AUPR-OUT': 0.9998611807823181, 'FPR95TPR': 0.000700770819094032, 'Method': 'ViM', 'Dataset': 'LSUNResize'}
Textures
{'AUROC': 0.999627947807312, 'AUPR-IN': 0.999811053276062, 'AUPR-OUT': 0.9999532699584961, 'FPR95TPR': 0.000700770819094032, 'Method': 'ViM', 'Dataset': 'Textures'}
TinyImageNetCrop
{'AUROC': 0.9993158578872681, 'AUPR-IN': 0.999803900718689, 'AUPR-OUT': 0.9987545013427734, 'FPR95TPR': 0.000700770819094032, 'Method': 'ViM', 'Dataset': 'TinyImageNetCrop'}
TinyImageNetResize
{'AUROC': 0.9995713233947754, 'AUPR-IN': 0.9998767375946045, 'AUPR-OUT': 0.9997104406356812, 'FPR95TPR': 0.000700770819094032, 'Method': 'ViM', 'Dataset': 'TinyImageNetResize'}
LSUNCrop
{'AUROC': 0.998623669

  0%|          | 0/438 [00:00<?, ?it/s]

{'AUROC': 0.9947925209999084, 'AUPR-IN': 0.99846351146698, 'AUPR-OUT': 0.9838247895240784, 'FPR95TPR': 0.024176593869924545, 'Method': 'MaxLogit', 'Dataset': 'TinyImageNetResize'}
label


100%|██████████| 438/438 [00:12<00:00, 36.01it/s, loss=0.0449]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.30%


100%|██████████| 438/438 [00:11<00:00, 36.50it/s, loss=0.0222]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 96.50%


100%|██████████| 438/438 [00:12<00:00, 36.25it/s, loss=0.0276] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.58%


100%|██████████| 438/438 [00:12<00:00, 36.40it/s, loss=0.00758]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 100.00%


100%|██████████| 438/438 [00:12<00:00, 36.16it/s, loss=0.00457]
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.68%
color


100%|██████████| 438/438 [00:12<00:00, 35.79it/s, loss=0.0744] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 95.90%


100%|██████████| 438/438 [00:12<00:00, 36.00it/s, loss=0.0262] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.51%


100%|██████████| 438/438 [00:12<00:00, 35.85it/s, loss=0.0104] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.30%


100%|██████████| 438/438 [00:12<00:00, 35.89it/s, loss=0.0157] 
  0%|          | 0/438 [00:00<?, ?it/s]

Accuracy of the network on the test images: 100.00%


100%|██████████| 438/438 [00:12<00:00, 35.67it/s, loss=0.0115]  


Accuracy of the network on the test images: 100.00%
fruit


100%|██████████| 2000/2000 [00:55<00:00, 35.85it/s, loss=0.0275]  


Accuracy of the shape network on the test images: 99.72%
LSUNCrop
{'AUROC': 0.9968429803848267, 'AUPR-IN': 0.9991291761398315, 'AUPR-OUT': 0.9885169267654419, 'FPR95TPR': 0.005606166552752256, 'Method': 'ViM', 'Dataset': 'LSUNCrop'}
LSUNResize
{'AUROC': 0.9999991655349731, 'AUPR-IN': 0.9999997615814209, 'AUPR-OUT': 0.9999969005584717, 'FPR95TPR': 0.0, 'Method': 'ViM', 'Dataset': 'LSUNResize'}
Textures
{'AUROC': 0.9998722076416016, 'AUPR-IN': 0.9999393224716187, 'AUPR-OUT': 0.9997283816337585, 'FPR95TPR': 0.0, 'Method': 'ViM', 'Dataset': 'Textures'}
TinyImageNetCrop
{'AUROC': 0.9994798898696899, 'AUPR-IN': 0.9998562932014465, 'AUPR-OUT': 0.9980723857879639, 'FPR95TPR': 0.0, 'Method': 'ViM', 'Dataset': 'TinyImageNetCrop'}
TinyImageNetResize
{'AUROC': 0.999991774559021, 'AUPR-IN': 0.9999976754188538, 'AUPR-OUT': 0.9999712109565735, 'FPR95TPR': 0.0, 'Method': 'ViM', 'Dataset': 'TinyImageNetResize'}
LSUNCrop
{'AUROC': 0.9941326975822449, 'AUPR-IN': 0.9983989000320435, 'AUPR-OUT': 0.97823733

In [9]:
import pandas as pd
result_df = pd.DataFrame(results)
print((result_df.groupby(by="Method").agg(["mean", "sem"]) * 100)[["AUROC", "AUPR-IN", "AUPR-OUT", "FPR95TPR"]].to_latex(float_format="%.2f"))

\begin{tabular}{lrrrrrrrr}
\toprule
{} & \multicolumn{2}{l}{AUROC} & \multicolumn{2}{l}{AUPR-IN} & \multicolumn{2}{l}{AUPR-OUT} & \multicolumn{2}{l}{FPR95TPR} \\
{} &  mean &  sem &    mean &  sem &     mean &  sem &     mean &  sem \\
Method      &       &      &         &      &          &      &          &      \\
\midrule
Energy      & 98.19 & 0.22 &   99.38 & 0.09 &    94.44 & 0.59 &     9.11 & 1.41 \\
Ensemble    & 98.95 & 0.08 &   99.60 & 0.05 &    97.39 & 0.17 &     4.39 & 0.38 \\
Entropy     & 98.24 & 0.22 &   99.39 & 0.09 &    94.64 & 0.65 &     8.37 & 1.32 \\
Logic       & 73.90 & 0.90 &   93.57 & 0.27 &    69.50 & 0.57 &   100.00 & 0.00 \\
Logic+      & 99.87 & 0.03 &   99.96 & 0.01 &    99.82 & 0.05 &     0.13 & 0.03 \\
LogicOOD    & 99.12 & 0.08 &   99.66 & 0.04 &    97.74 & 0.16 &     3.66 & 0.37 \\
LogicOOD+   & 99.94 & 0.01 &   99.98 & 0.01 &    99.95 & 0.01 &     0.13 & 0.03 \\
MSP         & 98.10 & 0.23 &   99.32 & 0.10 &    94.54 & 0.65 &     8.42 & 1.29 \\
Mahalano

In [10]:
s = (result_df.groupby(by="Method").agg(["mean", "sem"]) * 100)[["AUROC", "AUPR-IN", "AUPR-OUT", "FPR95TPR"]].to_latex(float_format="%.2f")

In [14]:
print(s.replace("& 0.", "& \pm 0.").replace("& 1.", "& \pm 1."))

\begin{tabular}{lrrrrrrrr}
\toprule
{} & \multicolumn{2}{l}{AUROC} & \multicolumn{2}{l}{AUPR-IN} & \multicolumn{2}{l}{AUPR-OUT} & \multicolumn{2}{l}{FPR95TPR} \\
{} &  mean &  sem &    mean &  sem &     mean &  sem &     mean &  sem \\
Method      &       &      &         &      &          &      &          &      \\
\midrule
Energy      & 98.19 & \pm 0.22 &   99.38 & \pm 0.09 &    94.44 & \pm 0.59 &     9.11 & \pm 1.41 \\
Ensemble    & 98.95 & \pm 0.08 &   99.60 & \pm 0.05 &    97.39 & \pm 0.17 &     4.39 & \pm 0.38 \\
Entropy     & 98.24 & \pm 0.22 &   99.39 & \pm 0.09 &    94.64 & \pm 0.65 &     8.37 & \pm 1.32 \\
Logic       & 73.90 & \pm 0.90 &   93.57 & \pm 0.27 &    69.50 & \pm 0.57 &   100.00 & \pm 0.00 \\
Logic+      & 99.87 & \pm 0.03 &   99.96 & \pm 0.01 &    99.82 & \pm 0.05 &     0.13 & \pm 0.03 \\
LogicOOD    & 99.12 & \pm 0.08 &   99.66 & \pm 0.04 &    97.74 & \pm 0.16 &     3.66 & \pm 0.37 \\
LogicOOD+   & 99.94 & \pm 0.01 &   99.98 & \pm 0.01 &    99.95 & \pm 0.01 &   