In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

# Add Bayesian-and-novelty directory to the PYTHONPATH
import sys
import os
sys.path.append(os.path.realpath('../../..'))

# Autoreload changes in utils, etc.
%load_ext autoreload
%autoreload 2

from novelty.utils.metrics import plot_roc, plot_prc
from novelty.utils.metrics import get_summary_statistics
from novelty.utils.metrics import html_summary_table

In [2]:
# Training settings
BATCH_SIZE = 128
EPOCHS = 200
LR = 0.1
MOMENTUM = 0.9
NO_CUDA = False
SEED = 1
CLASSES = 10
MODEL_PATH_ROOT = './weights/wrn-28-10-cifar10'
MODEL_PATH = MODEL_PATH_ROOT + '.pth'

# MNIST mean and stdevs of training data by channel
CHANNEL_MEANS = (125.3/255, 123.0/255, 113.9/255)
CHANNEL_STDS = (63.0/255, 62.1/255.0, 66.7/255.0)

# Plot ROC and PR curves
PLOT_CHARTS = True

# ODIN parameters
TEMP = 1000.
NOISE_MAGNITUDE = 0.0012

## Training and Testing functions

In [3]:
from novelty.utils import Progbar


def train(model, device, train_loader, optimizer, epoch):
    progbar = Progbar(target=len(train_loader.dataset))

    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = F.log_softmax(model(data), dim=1)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()

        progbar.add(len(data), [("loss", loss.item())])
        
    

In [4]:
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = F.log_softmax(model(data), dim=1)
            # sum up batch loss
            test_loss += F.nll_loss(output, target, size_average=False).item()
            # get the index of the max log-probability
            pred = output.max(1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_acc = 100. * correct / len(test_loader.dataset)
    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset), test_acc))
    
    return test_loss, test_acc

## Initialize model and load MNIST

In [5]:
from novelty.utils import DATA_DIR
from src.wide_resnet import Wide_ResNet

torch.manual_seed(SEED)

use_cuda = not NO_CUDA and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# Dataset transformation
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(CHANNEL_MEANS, CHANNEL_STDS)
])

# Load training and test sets
kwargs = {'num_workers': 2, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10(os.path.join(DATA_DIR, 'cifar10'), train=True, transform=transform, download=True),
    batch_size=BATCH_SIZE, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10(os.path.join(DATA_DIR, 'cifar10'), train=False, transform=transform, download=True),
    batch_size=BATCH_SIZE, shuffle=False, **kwargs)

# Create model instance
model = Wide_ResNet(28, 10, 0.0, CLASSES)
model = model.to(device)

# Initialize optimizer
optimizer = optim.SGD(model.parameters(), lr=LR, momentum=MOMENTUM)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[int(EPOCHS*0.5), int(EPOCHS*0.75)], gamma=0.1)

Files already downloaded and verified
Files already downloaded and verified
| Wide-Resnet 28x10


## Optimization loop

In [6]:
if os.path.exists(MODEL_PATH):
    # load previously trained model:
    model.load_state_dict(torch.load(MODEL_PATH))

else:
    best_loss = float("inf")
    
    # Training loop
    for epoch in range(EPOCHS):
        print("Epoch:", epoch)
        scheduler.step()
        
        # Print the learning rate
        for param_group in optimizer.param_groups:
            print('Learning rate:', param_group['lr'])
        
        train(model, device, train_loader, optimizer, epoch)
        loss, acc = test(model, device, test_loader)
        
        # Checkpoint the model parameters
        if loss < best_loss:
            torch.save(model.state_dict(), "{}_epoch{}.pth".format(MODEL_PATH_ROOT, epoch))
            best_loss = loss
            

    # save the model 
    torch.save(model.state_dict(), MODEL_PATH)

## ODIN prediction functions

In [7]:
from torch.autograd import Variable


def predict(model, data, device):
    model.eval()
    data = data.to(device)
    outputs = model(data)
    outputs = outputs - outputs.max(1)[0].unsqueeze(1)  # For stability
    return F.softmax(outputs, dim=1)


def predict_temp(model, data, device, temp=1000.):
    model.eval()
    data = data.to(device)
    outputs = model(data)
    outputs /= temp
    outputs = outputs - outputs.max(1)[0].unsqueeze(1)  # For stability
    return F.softmax(outputs, dim=1)


def predict_novelty(model, data, device, temp=1000., noiseMagnitude=0.0012):
    model.eval()

    # Create a variable so we can get the gradients on the input
    inputs = Variable(data.to(device), requires_grad=True)

    # Get the predicted labels
    outputs = model(inputs)
    outputs = outputs / temp
    outputs = F.log_softmax(outputs, dim=1)

    # Calculate the perturbation to add to the input
    maxIndexTemp = torch.argmax(outputs, dim=1)
    labels = Variable(maxIndexTemp).to(device)
    loss = F.nll_loss(outputs, labels)
    loss.backward()

    # Normalizing the gradient to binary in {0, 1}
    gradient = torch.ge(inputs.grad.data, 0)
    gradient = (gradient.float() - 0.5) * 2

    # Normalize the gradient to the same space of image
    for channel, (mean, std) in enumerate(zip(CHANNEL_MEANS, CHANNEL_STDS)):
        gradient[0][channel] = (gradient[0][channel] - mean) / std

    # Add small perturbations to image
    # TODO, this is from the released code, but disagrees with paper I think
    tempInputs = torch.add(inputs.data, -noiseMagnitude, gradient)

    # Get new outputs after perturbations
    outputs = model(Variable(tempInputs))
    outputs = outputs / temp
    outputs = outputs - outputs.max(1)[0].unsqueeze(1)  # For stability
    outputs = F.softmax(outputs, dim=1)

    return outputs

## Evaluate method on outlier datasets

In [8]:
def get_max_model_outputs(data_loader, device):
    """Get the max softmax output from the model in a Python array.

    data_loader: object
        A pytorch dataloader with the data you want to calculate values for.

    device: object
        The CUDA device handle.
    """
    result = []
    
    for data, target in data_loader:
        # Using regular model
        p = predict(model, data, device)
        max_val, label = torch.max(p, dim=1)
        # Convert torch tensors to python list
        max_val = list(max_val.cpu().detach().numpy())
        result += max_val

    return result


def get_max_odin_outputs(data_loader, device, temp=1000., noiseMagnitude=0.0012):
    """Convenience function to get the max softmax values from the ODIN model in a Python array.
    
    data_loader: object
        A pytorch dataloader with the data you want to calculate values for.
        
    device: object
        The CUDA device handle.
        
    temp: float, optional (default=1000.)
        The temp the model should use to do temperature scaling on the softmax outputs.
        
    noiseMagnitude: float, optional (default=0.0012)
        The epsilon value used to scale the input images according to the ODIN paper.
    """
    result = []
    
    for data, target in data_loader:
        # Using ODIN model
        p = predict_novelty(model, data, device, temp=temp, noiseMagnitude=noiseMagnitude)
        max_val, label = torch.max(p, dim=1)
        # Convert torch tensors to python list
        max_val = list(max_val.cpu().detach().numpy())
        result += max_val

    return result

In [9]:
# Build results table
results = {
    "WRN-28-10": {
        "inlier_name": "CIFAR10",
        "outliers": {}
    },
    "WRN-28-10-ODIN": {
        "inlier_name": "CIFAR10",
        "outliers": {}
    }
}

### Process Inliers

In [10]:
num_inliers = test_loader.dataset.test_data.shape[0]

# Get predictions on in-distribution images
cifar_model_maximums = get_max_model_outputs(test_loader, device)
cifar_odin_maximums = get_max_odin_outputs(test_loader, device, temp=TEMP, noiseMagnitude=NOISE_MAGNITUDE)

## Get CIFAR10 outputs

In [14]:
# CIFAR10 test (vanilla WRN T=1, pert e=0) -> 
cifar10_t1_e0 = get_max_model_outputs(test_loader, device)

# CIFAR10 test (T=1000 + pert e=0.0012) -> 
cifar10_t1000_e0_0012 = get_max_odin_outputs(test_loader, device, temp=1000, noiseMagnitude=0.0012)

# CIFAR10 test (T=1 + pert e=0.0012) -> 
cifar10_t1_e0_0012 = get_max_odin_outputs(test_loader, device, temp=1, noiseMagnitude=0.0012)

# CIFAR10 test (T=1000 + pert e=0.0) -> 
cifar10_t1000_e0 = get_max_odin_outputs(test_loader, device, temp=1, noiseMagnitude=0.0012)

In [27]:
import csv

with open('cifar10_t1_e0.csv', 'w') as f:
    wr = csv.writer(f)
    wr.writerows([list(cifar10_t1_e0)])
    
with open('cifar10_t1000_e0_0012.csv', 'w') as f:
    wr = csv.writer(f)
    wr.writerows([list(cifar10_t1000_e0_0012)])
    
with open('cifar10_t1_e0_0012.csv', 'w') as f:
    wr = csv.writer(f)
    wr.writerows([list(cifar10_t1_e0_0012)])
    
with open('cifar10_t1000_e0.csv', 'w') as f:
    wr = csv.writer(f)
    wr.writerows([list(cifar10_t1000_e0)])

## CIFAR100 ouputs

In [28]:
cifar100_loader = torch.utils.data.DataLoader(
    datasets.CIFAR100(os.path.join(DATA_DIR, 'cifar100'), train=False, transform=transform, download=True),
    batch_size=BATCH_SIZE, shuffle=False, **kwargs)

# CIFAR100 test (vanilla WRN T=1, pert e=0) -> 
cifar100_t1_e0 = get_max_model_outputs(cifar100_loader, device)

# CIFAR100 test (T=1000 + pert e=0.0012) -> 
cifar100_t1000_e0_0012 = get_max_odin_outputs(cifar100_loader, device, temp=1000, noiseMagnitude=0.0012)

# CIFAR100 test (T=1 + pert e=0.0012) -> 
cifar100_t1_e0_0012 = get_max_odin_outputs(cifar100_loader, device, temp=1, noiseMagnitude=0.0012)

# CIFAR100 test (T=1000 + pert e=0.0) -> 
cifar100_t1000_e0 = get_max_odin_outputs(cifar100_loader, device, temp=1, noiseMagnitude=0.0012)

Files already downloaded and verified


In [29]:
import csv

with open('cifar100_t1_e0.csv', 'w') as f:
    wr = csv.writer(f)
    wr.writerows([list(cifar100_t1_e0)])
    
with open('cifar100_t1000_e0_0012.csv', 'w') as f:
    wr = csv.writer(f)
    wr.writerows([list(cifar100_t1000_e0_0012)])
    
with open('cifar100_t1_e0_0012.csv', 'w') as f:
    wr = csv.writer(f)
    wr.writerows([list(cifar100_t1_e0_0012)])
    
with open('cifar100_t1000_e0.csv', 'w') as f:
    wr = csv.writer(f)
    wr.writerows([list(cifar100_t1000_e0)])