# Street sign with Pre-Trained WideResNet

With additional shield net 

with prolog KB 


In [2]:
from torch.utils.data import Dataset, DataLoader
from os.path import join
import pandas as pd 
from PIL import Image
from torch.optim import SGD
import seaborn as sb 
from gtsrb import GTSRB
from detectors import EnsembleDetector, KBDetector

sb.set()

device="cuda:0"
root = "data/"

import torch
torch.manual_seed(0)

import numpy as np
np.random.seed(0)

import random
random.seed(0)

torch.use_deterministic_algorithms(False)

def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

g = torch.Generator()
g.manual_seed(0)


<torch._C.Generator at 0x7f67e6266df0>

In [3]:
import torchvision
from torchvision.transforms import ToTensor, Resize, Compose
import torch 
from torch.utils.data import DataLoader

trans = Compose([ToTensor(), Resize((32, 32))])

train_data = GTSRB(root=root, train=True, transforms=trans)
test_data = GTSRB(root=root, train=False, transforms=trans)

In [4]:
train_loader = DataLoader(train_data, batch_size=32, shuffle=True, num_workers=2, worker_init_fn=seed_worker)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False, num_workers=2, worker_init_fn=seed_worker)

In [5]:
from torch import nn
from torchvision.models.resnet import resnet18
from pytorch_ood.model import WideResNet

# def override 
def Model(num_classes=None, *args, **kwargs):
    model = WideResNet(*args, num_classes=1000, **kwargs, pretrained="imagenet32")
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    return model


In [6]:
from tqdm.notebook import tqdm 
import numpy as np 

def train_model(att_index, num_classes):
    """
    train a model for the given attribute index 
    """
    trans = Compose([ToTensor(), Resize((32, 32))])
    train_data = GTSRB(root=root, train=True, transforms=trans)
    test_data = GTSRB(root=root, train=False, transforms=trans)
    
    train_loader = DataLoader(train_data, batch_size=32, shuffle=True, num_workers=2, worker_init_fn=seed_worker)
    test_loader = DataLoader(test_data, batch_size=32, shuffle=False, num_workers=2, worker_init_fn=seed_worker)
    
    model = Model(num_classes=num_classes).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9, nesterov=True)

    accs = []

    for epoch in range(1):
        running_loss = 0.0
        model.train()
        bar = tqdm(train_loader)
        for inputs, y in bar:
            labels = y[:, att_index]
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss = 0.8 * running_loss + 0.2 * loss.item()
            bar.set_postfix({"loss": running_loss})

        correct = 0
        total = 0

        with torch.no_grad():
            model.eval()

            for inputs, y in test_loader:
                labels = y[:, att_index]
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                _, predicted = torch.max(outputs.data, dim=1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print(f'Accuracy of the network on the test images: {correct / total:.2%}')

    return model 

# Sign Network 

In [7]:
from torch.utils.data import DataLoader
from pytorch_ood.utils import is_known
from tqdm.notebook import tqdm 
from pytorch_ood.dataset.img import TinyImages300k
from pytorch_ood.utils import ToUnknown
from torch.utils.data import random_split

def train_sign_model():
    tiny = TinyImages300k(root=root, download=True, transform=trans, target_transform=ToUnknown())
    data_train_out, data_test_out, _ = random_split(tiny, [50000, 10000, 240000], generator=torch.Generator().manual_seed(123))

    train_data_noatt = GTSRB(root=root, train=True, transforms=trans, target_transform=lambda y: y[0])
    test_data_noatt = GTSRB(root=root, train=False, transforms=trans, target_transform=lambda y: y[0])

    new_loader = DataLoader(train_data_noatt + data_train_out, batch_size=32, shuffle=True, num_workers=10, worker_init_fn=seed_worker)
    new_test_loader = DataLoader(test_data_noatt + data_test_out, batch_size=32, shuffle=False, num_workers=10, worker_init_fn=seed_worker)

    model = Model(num_classes=2).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9, nesterov=True)

    accs = []

    for epoch in range(1):
        running_loss = 0.0
        model.train()
        
        bar = tqdm(new_loader)
        for inputs, y in bar:
            labels = is_known(y).long()
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss = 0.8 * running_loss + 0.2 * loss.item()
            bar.set_postfix({"loss": running_loss})

        correct = 0
        total = 0

        with torch.no_grad():
            model.eval()

            for inputs, y in new_test_loader:
                labels = is_known(y).long()
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print(f'Accuracy of the shape network on the test images: {correct / total:.2%}')
        accs.append(correct / total)

    return model 

# OOD Evaluation 

In [8]:
from pytorch_ood.dataset.img import (LSUNCrop, LSUNResize, Textures, TinyImageNetCrop, TinyImageNetResize)
from pytorch_ood.detector import EnergyBased, MaxSoftmax
from pytorch_ood.utils import ToRGB, OODMetrics

def evaluate(label_net, shape_net, color_net, shield_net):
    _ = label_net.eval()
    _ = shape_net.eval()
    _ = color_net.eval()
    _ = shield_net.eval()
    
    results = []

    trans = Compose([Resize(size=(32, 32)), ToRGB(), ToTensor()])
    data_in = GTSRB(root=root, train=False, transforms=trans, target_transform=lambda y: y[0])

    detectors = {
        "MSP": MaxSoftmax(label_net),
        "Energy": EnergyBased(label_net),
        "Ensemble": EnsembleDetector(label_net, shape_net, color_net),
        "Semantic": KBDetector("knowledge_base.pl", label_net, shape_net, color_net),
        "Semantic-OE": KBDetector("knowledge_base.pl", label_net, shape_net, color_net, sign_net=shield_net),
    }
    
    datasets = {d.__name__: d for d in (LSUNCrop, LSUNResize, Textures, TinyImageNetCrop, TinyImageNetResize)}
    
    for detector_name, detector in detectors.items():
        for data_name, dataset_c in datasets.items():
            data_out = dataset_c(root=root, transform=trans, target_transform=ToUnknown(), download=True)
            loader = DataLoader(data_in+data_out, batch_size=1024, shuffle=False, worker_init_fn=seed_worker)
            
            scores = []
            ys = []
            
            with torch.no_grad():
                for x, y in loader:
                    scores.append(detector(x.to(device)))
                    ys.append(y.to(device))
                    
                scores = torch.cat(scores, dim=0).cpu()
                ys = torch.cat(ys, dim=0).cpu()
            
            metrics = OODMetrics()
            metrics.update(scores, ys)
            r = metrics.compute()
            r.update({
                "Method": detector_name,
                "Dataset": data_name
            })
            print(r)
            results.append(r)
    
    return results 

In [9]:
def evaluate_acc(net, att_idx=0, oe=False):
    _ = net.eval()
    
    if oe:
        target_trans = lambda y: torch.tensor(1)
    else:
         target_trans = lambda y: y[att_idx]

    trans = Compose([Resize(size=(32, 32)), ToRGB(), ToTensor()])
    data_in = GTSRB(root=root, train=False, transforms=trans, target_transform=target_trans)
    loader = DataLoader(data_in, batch_size=1024, shuffle=False, worker_init_fn=seed_worker)
            
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = net(inputs)
            predicted = outputs.max(dim=1).indices
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    return correct / total  

def evaluate_accs(label_net, shape_net, color_net, shield_net):
    r = {}
    names = ("Label", "Color", "Shape",)
    
    for n, net in enumerate((label_net, color_net, shape_net)): 
        acc = evaluate_acc(net, n)
        r[names[n]] = acc
    
    acc = evaluate_acc(shield_net, oe=True)
    r["Sign"] = acc
    
    return [r] 

In [11]:
# %%time 

# _ = label_net.eval()
# _ = shape_net.eval()
# _ = color_net.eval()
# _ = shield_net.eval()

# results = []

# trans = Compose([Resize(size=(32, 32)), ToRGB(), ToTensor()])
# data_in = GTSRB(root=root, train=False, transforms=trans, target_transform=lambda y: y[0])

# detectors = {
#     "abc": KBDetector("knowledge_base.pl", label_net, shape_net, color_net, sign_net=shield_net)
# }

# datasets = {d.__name__: d for d in (Textures,)}

# for detector_name, detector in detectors.items():
#     for data_name, dataset_c in datasets.items():
#         data_out = dataset_c(root=root, transform=trans, target_transform=ToUnknown(), download=True)
#         loader = DataLoader(data_in+data_out, batch_size=1024, shuffle=False, worker_init_fn=seed_worker)

#         scores = []
#         ys = []

#         with torch.no_grad():
#             for x, y in loader:
#                 scores.append(detector(x.to(device)))
#                 ys.append(y.to(device))

#             scores = torch.cat(scores, dim=0).cpu()
#             ys = torch.cat(ys, dim=0).cpu()

#         metrics = OODMetrics()
#         metrics.update(scores, ys)
#         r = metrics.compute()
#         r.update({
#             "Method": detector_name,
#             "Dataset": data_name
#         })
#         print(r)


In [15]:
results = []
results_acc = []

for trial in range(1):
    shield_net = train_sign_model()
    shape_net = train_model(att_index=2, num_classes=5)
    color_net = train_model(att_index=1, num_classes=4)
    label_net = train_model(att_index=0, num_classes=43)
    
    res = evaluate(label_net, shape_net, color_net, shield_net)
    res_acc = evaluate_accs(label_net, shape_net, color_net, shield_net)
    
    for r in res:
        r.update({"Seed": trial})
        
    for r in res_acc:
        r.update({"Seed": trial})
    
    results += res
    results_acc += res_acc

  0%|          | 0/2788 [00:00<?, ?it/s]

Accuracy of the shape network on the test images: 99.83%


  0%|          | 0/1226 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.89%


  0%|          | 0/1226 [00:00<?, ?it/s]

Accuracy of the network on the test images: 99.31%


  0%|          | 0/1226 [00:00<?, ?it/s]

Accuracy of the network on the test images: 97.29%
{'AUROC': 0.9830532670021057, 'AUPR-IN': 0.9725900888442993, 'AUPR-OUT': 0.9891340136528015, 'ACC95TPR': 0.9461334347724915, 'FPR95TPR': 0.05692794919013977, 'Method': 'MSP', 'Dataset': 'LSUNCrop'}
{'AUROC': 0.9915062189102173, 'AUPR-IN': 0.9864577651023865, 'AUPR-OUT': 0.9944612979888916, 'ACC95TPR': 0.9587273597717285, 'FPR95TPR': 0.03436262905597687, 'Method': 'MSP', 'Dataset': 'LSUNResize'}
{'AUROC': 0.9812435507774353, 'AUPR-IN': 0.9512006640434265, 'AUPR-OUT': 0.9920799136161804, 'ACC95TPR': 0.9400109648704529, 'FPR95TPR': 0.0644497200846672, 'Method': 'MSP', 'Dataset': 'Textures'}
{'AUROC': 0.9851004481315613, 'AUPR-IN': 0.9781304001808167, 'AUPR-OUT': 0.9893771409988403, 'ACC95TPR': 0.9462218284606934, 'FPR95TPR': 0.05676959455013275, 'Method': 'MSP', 'Dataset': 'TinyImageNetCrop'}
{'AUROC': 0.9893456697463989, 'AUPR-IN': 0.9840818047523499, 'AUPR-OUT': 0.9926323294639587, 'ACC95TPR': 0.9539549350738525, 'FPR95TPR': 0.042913697

In [16]:
import pandas as pd 
result_df = pd.DataFrame(results)
print((result_df.groupby(by="Method").agg(["mean", "sem"]) * 100)[["AUROC", "AUPR-IN", "AUPR-OUT", "FPR95TPR"]].to_latex(float_format="%.2f"))

\begin{tabular}{lrrrrrrrr}
\toprule
{} & \multicolumn{2}{l}{AUROC} & \multicolumn{2}{l}{AUPR-IN} & \multicolumn{2}{l}{AUPR-OUT} & \multicolumn{2}{l}{FPR95TPR} \\
{} &  mean &  sem &    mean &  sem &     mean &  sem &     mean &  sem \\
Method      &       &      &         &      &          &      &          &      \\
\midrule
Energy      & 99.29 & 0.16 &   98.63 & 0.47 &    99.56 & 0.07 &     2.00 & 0.35 \\
Ensemble    & 99.68 & 0.06 &   99.41 & 0.19 &    99.81 & 0.03 &     1.35 & 0.28 \\
MSP         & 98.60 & 0.19 &   97.45 & 0.63 &    99.15 & 0.10 &     5.11 & 0.54 \\
Semantic    & 99.35 & 0.02 &   98.82 & 0.19 &    99.68 & 0.02 &     1.23 & 0.14 \\
Semantic-OE & 99.34 & 0.02 &   98.82 & 0.19 &    99.68 & 0.02 &     1.24 & 0.14 \\
\bottomrule
\end{tabular}



  print((result_df.groupby(by="Method").agg(["mean", "sem"]) * 100)[["AUROC", "AUPR-IN", "AUPR-OUT", "FPR95TPR"]].to_latex(float_format="%.2f"))
  print((result_df.groupby(by="Method").agg(["mean", "sem"]) * 100)[["AUROC", "AUPR-IN", "AUPR-OUT", "FPR95TPR"]].to_latex(float_format="%.2f"))


In [17]:
from scipy.stats import ttest_ind

sem_auroc = result_df[result_df["Method"] == "Semantic"].groupby(by=["Method", "Seed"]).mean()["AUROC"]
sem_ensemble =  result_df[result_df["Method"] == "Ensemble"].groupby(by=["Method", "Seed"]).mean()["AUROC"]

print(ttest_ind(sem_auroc, sem_ensemble, equal_var=False))

Ttest_indResult(statistic=nan, pvalue=nan)


  sem_auroc = result_df[result_df["Method"] == "Semantic"].groupby(by=["Method", "Seed"]).mean()["AUROC"]
  sem_ensemble =  result_df[result_df["Method"] == "Ensemble"].groupby(by=["Method", "Seed"]).mean()["AUROC"]
  print(ttest_ind(sem_auroc, sem_ensemble, equal_var=False))
  var *= np.divide(n, n-ddof)  # to avoid error on division by zero
  var *= np.divide(n, n-ddof)  # to avoid error on division by zero


In [18]:
print((pd.DataFrame(results_acc) * 100).agg(["mean", "sem"]).to_latex(float_format="%.2f"))

\begin{tabular}{lrrrrr}
\toprule
{} &  Label &  Color &  Shape &  Sign &  Seed \\
\midrule
mean &  97.18 &  99.35 &  99.90 & 99.87 &  0.00 \\
sem  &    NaN &    NaN &    NaN &   NaN &   NaN \\
\bottomrule
\end{tabular}



  print((pd.DataFrame(results_acc) * 100).agg(["mean", "sem"]).to_latex(float_format="%.2f"))


In [19]:

def evaluate2(label_net, shape_net, color_net, shield_net):
    _ = label_net.eval()
    _ = shape_net.eval()
    _ = color_net.eval()
    _ = shield_net.eval()
    
    results = []

    trans = Compose([Resize(size=(32, 32)), ToRGB(), ToTensor()])
    data_in = GTSRB(root=root, train=False, transforms=trans, target_transform=lambda y: y[0])
    # dataset_out_test = Textures(root=root, transform=trans, target_transform=ToUnknown(), download=True)

    detectors = {
        "MSP": MaxSoftmax(label_net),
        "Energy": EnergyBased(label_net),
        "Ensemble": EnsembleDetector(label_net, shape_net, color_net),
        "Semantic": KBDetector("knowledge_base.pl", label_net, shape_net, color_net),
        "Semantic-OE": KBDetector("knowledge_base.pl", label_net, shape_net, color_net, sign_net=shield_net),
    }
    
    datasets = {d.__name__: d for d in (LSUNCrop, LSUNResize, Textures, TinyImageNetCrop, TinyImageNetResize)}
    
    for detector_name, detector in detectors.items():
        for data_name, dataset_c in datasets.items():
            data_out = dataset_c(root=root, transform=trans, target_transform=ToUnknown(), download=True)
            loader = DataLoader(data_in+data_out, batch_size=1024, shuffle=False, worker_init_fn=seed_worker)
            
            scores = []
            ys = []
            ys_hat = []
            
            with torch.no_grad():
                for x, y in loader:
                    y_hat = label_net(x.to(device)).max(dim=1).indices
                    ys_hat.append(y_hat)
                    scores.append(detector(x.to(device)))
                    ys.append(y.to(device))
                    
                scores = torch.cat(scores, dim=0).cpu()
                ys = torch.cat(ys, dim=0).cpu()
                ys_hat = torch.cat(ys_hat, dim=0).cpu()
                
                l = -(ys != ys_hat).long()
            metrics = OODMetrics()
            metrics.update(scores, l)
            r = metrics.compute()
            r.update({
                "Method": detector_name,
                "Dataset": data_name
            })
            print(r)
            results.append(r)
    
    return results 

In [20]:
res = evaluate2(label_net, shape_net, color_net, shield_net)

{'AUROC': 0.9887268543243408, 'AUPR-IN': 0.9847310185432434, 'AUPR-OUT': 0.9913710355758667, 'ACC95TPR': 0.9554131627082825, 'FPR95TPR': 0.040084730833768845, 'Method': 'MSP', 'Dataset': 'LSUNCrop'}
{'AUROC': 0.9940543174743652, 'AUPR-IN': 0.9925169348716736, 'AUPR-OUT': 0.99504154920578, 'ACC95TPR': 0.9653999209403992, 'FPR95TPR': 0.021671826019883156, 'Method': 'MSP', 'Dataset': 'LSUNResize'}
{'AUROC': 0.9857792854309082, 'AUPR-IN': 0.9711555242538452, 'AUPR-OUT': 0.9926772713661194, 'ACC95TPR': 0.9500821232795715, 'FPR95TPR': 0.049942970275878906, 'Method': 'MSP', 'Dataset': 'Textures'}
{'AUROC': 0.9893912672996521, 'AUPR-IN': 0.9870184659957886, 'AUPR-OUT': 0.9909202456474304, 'ACC95TPR': 0.9551480412483215, 'FPR95TPR': 0.040573570877313614, 'Method': 'MSP', 'Dataset': 'TinyImageNetCrop'}
{'AUROC': 0.9922948479652405, 'AUPR-IN': 0.9906942844390869, 'AUPR-OUT': 0.9934539198875427, 'ACC95TPR': 0.9617322087287903, 'FPR95TPR': 0.028434088453650475, 'Method': 'MSP', 'Dataset': 'TinyImag

In [21]:
(result_df.groupby(by=["Method", "Seed"]).mean() * 100).groupby("Method").agg(["mean", "sem"])

  (result_df.groupby(by=["Method", "Seed"]).mean() * 100).groupby("Method").agg(["mean", "sem"])


Unnamed: 0_level_0,AUROC,AUROC,AUPR-IN,AUPR-IN,AUPR-OUT,AUPR-OUT,ACC95TPR,ACC95TPR,FPR95TPR,FPR95TPR
Unnamed: 0_level_1,mean,sem,mean,sem,mean,sem,mean,sem,mean,sem
Method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
Energy,99.288969,,98.633252,,99.55847,,96.72277,,1.998416,
Ensemble,99.682112,,99.409517,,99.812239,,97.111565,,1.346002,
MSP,98.604983,,97.449214,,99.153694,,94.90097,,5.108472,
Semantic,99.345369,,98.822942,,99.68389,,97.191517,,1.22882,
Semantic-OE,99.343197,,98.82008,,99.682142,,97.186254,,1.236738,


In [22]:
pd.DataFrame(results_acc).agg(["mean", "sem"])
# dataset gives np.array([label, color, shape])

# results_acc = evaluate_accs(label_net, shape_net, color_net, shield_net)
# print(results_acc)

Unnamed: 0,Label,Color,Shape,Sign,Seed
mean,0.971813,0.993508,0.998971,0.998654,0.0
sem,,,,,


# Eval

In [23]:
def ood_label(x):
    if x == True:
        return "Normal"
    else:
        return "Anomaly"

In [24]:
trans = Compose([Resize(size=(32, 32)), ToRGB(), ToTensor()])

In [25]:
from pytorch_ood.utils import OODMetrics, ToRGB, ToUnknown
from pytorch_ood.dataset.img import Textures

In [26]:
test_in_data = GTSRB(root=root, train=False, transforms=trans, target_transform=lambda y: y[0])
dataset_out_test = Textures(root=root, transform=trans, target_transform=ToUnknown(), download=True)
loader = DataLoader(dataset_out_test +  test_in_data, batch_size=128, worker_init_fn=seed_worker)

sem_detector = SemanticDetector(
            label_net, 
            shape_net, 
            color_net, 
            GTSRB(root=root).class_to_shape, 
            GTSRB(root=root).class_to_color, 
            sign_net=shield_net
        )

with torch.no_grad():
    metrics = OODMetrics()
    for x, y in tqdm(loader):
        metrics.update(sem_detector(x.to(device)), y.to(device))

print(metrics.compute())

  0%|          | 0/143 [00:00<?, ?it/s]

{'AUROC': 0.1820300966501236, 'AUPR-IN': 0.5679941773414612, 'AUPR-OUT': 0.998434841632843, 'ACC95TPR': 0.9922276735305786, 'FPR95TPR': 0.010055423714220524}


In [27]:
with torch.no_grad():
    metrics = OODMetrics()
    for x, y in tqdm(loader):
        metrics.update(-shield_net(x.to(device)).softmax(dim=1)[:,1].cpu(), y.to(device))

sign_net_metric = metrics.compute()
print(sign_net_metric)

  0%|          | 0/143 [00:00<?, ?it/s]

{'AUROC': 0.9999501705169678, 'AUPR-IN': 0.9998885989189148, 'AUPR-OUT': 0.9999778866767883, 'ACC95TPR': 0.984455406665802, 'FPR95TPR': 0.0001583531266078353}


In [28]:
from tqdm.notebook import tqdm 

test_in_data = GTSRB(root=root, train=False, transforms=trans, target_transform=lambda y: y[0])
dataset_out_test = Textures(root=root, transform=trans, target_transform=ToUnknown(), download=True)
loader = DataLoader(dataset_out_test +  test_in_data, batch_size=128, worker_init_fn=seed_worker)

detector = MaxSoftmax(label_net)
    
sem_detector = KBDetector("knowledge_base.pl", label_net, shape_net, color_net, sign_net=shield_net)

scores = []
my_scores = []
ys = []
xs = []
ys_hat = []

with torch.no_grad():
    for x, y in tqdm(loader):
        x = x.to(device)
        scores.append(detector(x))
        ys_hat.append(label_net(x).softmax(dim=1).max(dim=1).indices) 
        my_scores.append(sem_detector(x))
        
        ys.append(y)
        xs.append(x.cpu())


scores = torch.cat(scores, dim=0).cpu()
ys = torch.cat(ys, dim=0).cpu()
ys_hat = torch.cat(ys_hat, dim=0).cpu()
my_scores = torch.cat(my_scores, dim=0).cpu()
xs = torch.cat(xs, dim=0).cpu()


metrics = OODMetrics()
metrics.update(my_scores, ys)
print(metrics.compute())

  0%|          | 0/143 [00:00<?, ?it/s]

{'AUROC': 0.994219183921814, 'AUPR-IN': 0.9748163819313049, 'AUPR-OUT': 0.997749388217926, 'ACC95TPR': 0.967870831489563, 'FPR95TPR': 0.0241488516330719}


In [29]:
from tqdm.notebook import tqdm 


test_in_data = GTSRB(root=root, train=False, transforms=trans, target_transform=lambda y: y[0])


sem_detector = KBDetector("knowledge_base.pl", label_net, shape_net, color_net, sign_net=shield_net)

my_scores = []
ys = []

metrics = OODMetrics()

datasets = (LSUNCrop, LSUNResize, Textures, TinyImageNetCrop, TinyImageNetResize)
datasets = [c(root=root, transform=trans, target_transform=ToUnknown(), download=True) for c in datasets]
loaders = [DataLoader(d, batch_size=128, worker_init_fn=seed_worker) for d in datasets]
loaders.append( DataLoader(test_in_data, batch_size=128, worker_init_fn=seed_worker))


with torch.no_grad():
     for loader in loaders:
        for x, y in tqdm(loader):
            x = x.to(device)
            scores = sem_detector(x)

            metrics.update(scores.cpu(), y.cpu())
            my_scores.append(scores)
            ys.append(y)

print(metrics.compute())

ys = torch.cat(ys, dim=0).cpu()
my_scores = torch.cat(my_scores, dim=0).cpu()

metrics = OODMetrics()
metrics.update(my_scores, ys)
print(metrics.compute())

  0%|          | 0/79 [00:00<?, ?it/s]

  0%|          | 0/79 [00:00<?, ?it/s]

  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/79 [00:00<?, ?it/s]

  0%|          | 0/79 [00:00<?, ?it/s]

  0%|          | 0/99 [00:00<?, ?it/s]

{'AUROC': 0.9969454407691956, 'AUPR-IN': 0.9989409446716309, 'AUPR-OUT': 0.9932375550270081, 'ACC95TPR': 0.958040177822113, 'FPR95TPR': 0.012905780225992203}
{'AUROC': 0.9969454407691956, 'AUPR-IN': 0.9989409446716309, 'AUPR-OUT': 0.9932375550270081, 'ACC95TPR': 0.958040177822113, 'FPR95TPR': 0.012905780225992203}


In [30]:
from tqdm.notebook import tqdm 

test_in_data = GTSRB(root=root, train=False, transforms=trans, target_transform=lambda y: y[0])
dataset_out_test = Textures(root=root, transform=trans, target_transform=ToUnknown(), download=True)
loader = DataLoader(dataset_out_test +  test_in_data, batch_size=128, worker_init_fn=seed_worker)


 for data_name, dataset_c in datasets.items():
            data_out = dataset_c(root=root, transform=trans, target_transform=ToUnknown(), download=True)
            loader = DataLoader(data_in+data_out, batch_size=1024, shuffle=False, worker_init_fn=seed_worker)
            
sem_detector = KBDetector("knowledge_base.pl", label_net, shape_net, color_net, sign_net=shield_net)

my_scores = []
ys = []

metrics = OODMetrics()

with torch.no_grad():
    for x, y in tqdm(loader):
        x = x.to(device)
        scores = sem_detector(x)
        
        metrics.update(scores.cpu(), y.cpu())
        my_scores.append(scores)
        ys.append(y)

print(metrics.compute())

ys = torch.cat(ys, dim=0).cpu()
my_scores = torch.cat(my_scores, dim=0).cpu()

metrics = OODMetrics()
metrics.update(my_scores, ys)
print(metrics.compute())

IndentationError: unexpected indent (3662306820.py, line 8)

In [None]:
scores

In [None]:
y

In [None]:
print(xs.shape)
print(my_scores.shape)
print(ys_hat.shape)
print(ys.shape)
print(scores.shape)
print(len(test_in_data))
print(len(dataset_out_test))


In [None]:
from pandas import DataFrame

df_1 = DataFrame()
df_1["Scores"] = scores.cpu().numpy()
df_1["Labels"] = ys >= 0
df_1["Labels"] = df_1["Labels"].apply(ood_label)
df_1["Method"] = "Implicit"

sb.histplot(data=df_1, x="Scores", hue="Labels", common_norm=False, stat="probability", bins=30)

# Examples 

In [None]:
# custom_params = {"axes.spines.right": True, "axes.spines.top": True, "axes.spines.bottom": True, "axes.spines.left": True}
import matplotlib.pyplot as plt 
import numpy as np 

sb.set_theme(style="white")

sem_detector = KBDetector("knowledge_base.pl", label_net, shape_net, color_net, sign_net=shield_net)

dataset = GTSRB(root=root)

top_values, top_indxs = (-scores[ys<0]).topk(50)
datasets = dataset_out_test +  test_in_data
imgs = [datasets[i][0] for i in top_indxs]

for n, img in enumerate(imgs):
    img_batch = img.unsqueeze(0).to(device)
    
    print(img_batch.shape)
    with torch.no_grad():
        l = label_net(img_batch)
        s = shape_net(img_batch)
        c = color_net(img_batch)
        o = shield_net(img_batch)
        
    sign_detected = o.max(dim=1).indices.cpu()
        
    my_score = sem_detector(img_batch)[0]
    
    lindex = l.argmax(dim=1).item()
    sindex = s.argmax(dim=1).item()
    cindex = c.argmax(dim=1).item()
    oindex = o.argmax(dim=1).item()
    
    lname = dataset.class_to_name[lindex]
    sname = dataset.shape_to_name[sindex]
    cname = dataset.color_to_name[cindex]
    oname = "Sign" if oindex else "NoSign"
    
    plt.xticks([])
    plt.yticks([])
    plt.imshow(np.moveaxis(img.numpy(), 0, -1))
    plt.suptitle(f"'{lname.title()}' ({l.softmax(dim=1).max().item():.2%}) \n {sname.title()} | {cname.title()} | {oname.title()}")
    plt.title(f"Consistent: {'Yes' if abs(my_score) > 0.0 else 'No'}") # 'Yes' if my_score > 0.0 else 'No'
    plt.tight_layout(pad=0.5)
    plt.savefig(f"img/prediction-example-{n}.pdf", bbox_inches="tight")
    plt.show()

In [None]:
sem_detector = KBDetector("knowledge_base.pl", label_net, shape_net, color_net, sign_net=shield_net)

top_values, top_indxs = (scores[ys>=0]).topk(10)

for n, i in enumerate([i for i in top_indxs]):
    img = xs[ys>=0][i]
    img_batch = img.unsqueeze(0).to(device)
    
    with torch.no_grad():
        l = label_net(img_batch)
        s = shape_net(img_batch)
        c = color_net(img_batch)
        o = shield_net(img_batch)
    
    # my_score = detector(img_batch)[0]
    my_score = my_scores[ys>=0][i]
    # score = scores[ys>=0][i]
    
    lindex = l.argmax(dim=1).item()
    sindex = s.argmax(dim=1).item()
    cindex = c.argmax(dim=1).item()
    oindex = o.argmax(dim=1).item()
        
    lname = dataset.class_to_name[lindex]
    sname = dataset.shape_to_name[sindex]
    cname = dataset.color_to_name[cindex]
    oname = "Sign" if oindex == 1 else "NoSign"
        
    plt.imshow(np.moveaxis(img.numpy(), 0, -1))
    plt.suptitle(f"'{lname.title()}' ({l.softmax(dim=1).max().item():.2%}) \n {sname.title()} | {cname.title()} | {oname}")
    plt.title(f"Consistent: {'Yes' if abs(my_score) > 0.0 else 'No'}") # 'Yes' if my_score > 0.0 else 'No'
    plt.xticks([])
    plt.yticks([])
    plt.tight_layout(pad=0.5)
    plt.savefig(f"img/prediction-example-in-{n}.pdf", bbox_inches="tight")
    plt.show()

In [None]:
sem_detector = KBDetector("knowledge_base.pl", label_net, shape_net, color_net, sign_net=shield_net)

# in-distribution where prediction was not correct and was rejected 
index = (ys>=0) & (ys != ys_hat.cpu()) & (my_scores.abs() == 0.0)
if index.sum().item() > 0:
    top_values, top_indxs = (-scores[index]).topk(1)

    for n, i in enumerate([i for i in top_indxs]):
        img = xs[index][i]
        img_batch = img.unsqueeze(0).to(device)

        print(img_batch.shape)
        with torch.no_grad():
            l = label_net(img_batch)
            s = shape_net(img_batch)
            c = color_net(img_batch)
            o = shield_net(img_batch)

        my_score = sem_detector(img_batch)[0]

        lindex = l.argmax(dim=1).item()
        sindex = s.argmax(dim=1).item()
        cindex = c.argmax(dim=1).item()

        lname = dataset.class_to_name[lindex]
        sname = dataset.shape_to_name[sindex]
        cname = dataset.color_to_name[cindex]
        oname = "Sign" if oindex == 1 else "NoSign"

        plt.imshow(np.moveaxis(img.numpy(), 0, -1))
        plt.suptitle(f"'{lname.title()}' ({l.softmax(dim=1).max().item():.2%}) \n {sname.title()} | {cname.title()} | {oname.title()}")
        plt.title(f"Consistent: {'Yes' if abs(my_score) > 0.0 else 'No'}") # 'Yes' if my_score > 0.0 else 'No'
        plt.tight_layout(pad=0.5)
        plt.xticks([])
        plt.yticks([])
        plt.savefig(f"img/prediction-example-in-error-{n}.pdf", bbox_inches="tight")
        plt.show()

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import seaborn as sb 

ys_hat_anom = ys_hat.clone()
ys_hat_anom[my_scores > -0.99] = 44 

fig, ax = plt.subplots(figsize=(20,20))
m = confusion_matrix(ys[ys >= 0], ys_hat_anom[ys >= 0])

for i in range(m.shape[0]):
    m[i,i] = 0
    
disp = ConfusionMatrixDisplay(m, display_labels=list(dataset.class_to_name.values()) + ["Anomaly"])
disp.plot(ax=ax, xticks_rotation="vertical", colorbar=False)

plt.tight_layout(pad=0)

plt.savefig("img/confusion.pdf")
plt.savefig("img/confusion.jpg", dpi=300, bbox_inches="tight")

In [None]:
min_score = my_scores[ys < 0][my_scores[ys < 0] != -1].min().item() - 0.00001

ys_hat_anom = ys_hat.clone()
ys_hat_anom[my_scores > -0.99] = -1 

(ys_hat_anom[ys >= 0] == 44).sum()

print(f"Total number of street signs {(ys >= 0).sum()}")
print(f"Total number of anomalies {(ys < 0).sum()}")
print(f"Signs mistakenly marked as anomalies (no matter prediction) {(ys_hat_anom[ys >= 0] < 0).sum()}")
print(f"Signs mistakenly marked as anomalies (correct sign predicted) {(ys_hat_anom[ys == ys_hat] < 0).sum()}")
print(f"Signs mistakenly marked as anomalies (false sign predicted) {(ys_hat_anom[(ys != ys_hat) & (ys >= 0)] < 0).sum()}")

print(f"Signs maked as non-anomalous (false sign predicted) {(  (ys_hat_anom >= 0) & (ys_hat_anom != ys) ).sum() }")
print(f"Anomaly mistakenly marked as sign (no matter prediction) {(ys_hat_anom[ys < 0] >= 0).sum()}")

t = (ys_hat_anom[ys == ys_hat] < 0).sum() + ((ys_hat_anom >= 0) & (ys_hat_anom != ys)).sum() + (ys_hat_anom[ys < 0] >= 0).sum()
total = ys.shape[0]
print(f"Instances where our decision was not optimal: {t}/{total}={t/total:.3%}")

In [None]:
red_classes = [k for k, v in dataset.class_to_color.items() if dataset.color_to_name[v] == "red"]
red_classes.sort()
# red_classes

In [None]:
detector = EnsembleDetector(label_net, shape_net, color_net)

with torch.no_grad():
    metrics = OODMetrics()
    for x, y in loader:
        metrics.update(detector(x.to(device)), y.to(device))

metrics_ensemble = metrics.compute()
print(metrics_ensemble)

In [None]:
sem_detector = SemanticDetector(
    label_net, 
    shape_net, 
    color_net, 
    GTSRB(root=root).class_to_shape, 
    GTSRB(root=root).class_to_color, 
    sign_net=shield_net
)

scores = []
ys = []

with torch.no_grad():
    for x, y in loader:
        scores.append(sem_detector(x.to(device)).cpu())
        ys.append(y)
        
scores = torch.cat(scores)
ys = torch.cat(ys)


metrics = OODMetrics()
metrics.update(scores, ys)
metrics_semantic = metrics.compute()

In [None]:
df_2 = DataFrame()
df_2["Scores"] = scores.cpu().numpy()
df_2["Labels"] = ys >= 0
df_2["Labels"] = df_2["Labels"].apply(ood_label)
df_2["Method"] = "Explicit"

sb.histplot(data=df_2, x="Scores", hue="Labels", common_norm=False, stat="probability", bins=30)

In [None]:
sb.displot(pd.concat([df_1, df_2]).reset_index(), x="Scores", hue="Labels", col="Method", stat="probability", bins=30, kind="hist",  common_norm=False)

In [None]:
metrics_semantic["Method"] = "Explicit"
metrics_semantic

In [None]:
metrics_softmax["Method"] = "Implicit"

In [None]:
metrics_ensemble["Method"] = "Ensemble"

In [None]:
result_df = pd.DataFrame([metrics_semantic, metrics_softmax, metrics_ensemble])

In [None]:
# plot distplot
fig, ax = plt.subplots()
sb.barplot(data=result_df, x="Method", y="AUROC")

# change the limits of X-axis
ax.set_ylim(0.95, 1)
plt.show()

In [None]:
print((result_df.groupby(by="Method").mean() * 100)[["AUROC", "AUPR-IN", "AUPR-OUT", "FPR95TPR"]].to_latex(float_format="%.2f"))