In [None]:
import torch
import torch.nn as nn
from torch.nn import functional as F
from torchvision import transforms, datasets
import torch.backends.cudnn as cudnn

import time
import numpy as np
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import math
from scipy.special import softmax

from networks import ResNet
from utils import *
from utils_plotting import *

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

use_cuda = torch.cuda.is_available()

## Dataset

In [None]:
batch_size = 128
mean = (0.4914, 0.4822, 0.4465)
std = (0.2023, 0.1994, 0.2010)

transform_train = transforms.Compose([transforms.RandomHorizontalFlip(),
                                    transforms.RandomCrop(32, padding=4),
                                    transforms.ToTensor(),
                                    transforms.Normalize(mean, std)
                                    ])
transform_test = transforms.Compose([transforms.ToTensor(),
                                    transforms.Normalize(mean, std)])

trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
valset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=4)
valloader = torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=False, num_workers=4)

print(trainset.train_data.shape)

In [None]:
models_dir = "Models/" + 'Contrastive_Reasoning_models/'
results_dir = "Results/" + 'Contrastive_Reasoning_results/'

## Network

In [None]:
class Net_classifier(nn.Module):
    def __init__(self):
        super(Net_classifier, self).__init__()
        # my network is composed of only affine layers
        self.bn = nn.BatchNorm1d(640)
        self.fc1 = nn.Linear(640, 300)
        self.fc1_drop = nn.Dropout(0.5)
        self.fc2 = nn.Linear(300, 100)
        self.fc2_drop = nn.Dropout(0.5)
        self.fc3 = nn.Linear(100, 10)

    def forward(self, x):
        # x = self.bn(x)
        # x = F.sigmoid(self.fc1(x))
        # x = F.sigmoid(self.fc2(x))
        x = ((torch.sigmoid(self.fc1(x))))
        x = ((torch.sigmoid(self.fc2(x))))
        x = self.fc3(x)
        return x

In [None]:
net = ResNet(18, 10)

## Training

### Train Backbone

In [None]:
if use_cuda:
    net.cuda()
    net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count()))
    cudnn.benchmark = True

def learning_rate(init, epoch):
    optim_factor = 0
    if (epoch > 300):
        optim_factor = 4
    if(epoch > 160):
        optim_factor = 3
    elif(epoch > 120):
        optim_factor = 2
    elif(epoch > 60):
        optim_factor = 1

    return init*math.pow(0.2, optim_factor)


In [None]:
criterion = nn.CrossEntropyLoss()

def train(epoch): 
    net.train()
    train_loss = 0
    correct = 0
    total = 0

    lr = 0.1
    
    optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate(lr, epoch), momentum=0.9, weight_decay=5e-4)

    print('\n=> Training Epoch #%d, LR=%.4f' %(epoch, learning_rate(lr, epoch)))
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda() # GPU settings
        optimizer.zero_grad()
        inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)               # Forward Propagation
        loss = criterion(outputs, targets)  # Loss
        loss.backward()  # Backward Propagation
        optimizer.step() # Optimizer update

        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

        # sys.stdout.write('\r')
        # sys.stdout.write('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Acc@1: %.3f%%'
        #         %(epoch, num_epochs, batch_idx+1,
        #             (len(trainset)//batch_size)+1, loss.item(), 100.*correct/total))
        # sys.stdout.flush()
    print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Acc@1: %.3f%%'
                %(epoch, num_epochs, batch_idx+1,
                    (len(trainset)//batch_size)+1, loss.item(), 100.*correct/total))

def test(epoch):
    global best_acc
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(valloader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        with torch.no_grad():
            inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)
        loss = criterion(outputs, targets)

        test_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

    # Save checkpoint when best model
    acc = 100.*correct/total
    print("\n| Validation Epoch #%d\t\t\tLoss: %.4f Acc@1: %.2f%%" %(epoch, loss.item(), acc))

    if acc > best_acc:
        print('| Saving Best model...\t\t\tTop1 = %.2f%%' %(acc))
        state = {
                'net':net.module if use_cuda else net,
                'acc':acc,
                'epoch':epoch,
        }

        torch.save(state, models_dir + '/' + 'backbone.t7')
        best_acc = acc

def get_hms(seconds):
    m, s = divmod(seconds, 60)
    h, m = divmod(m, 60)

    return h, m, s

In [None]:
num_epochs = 200
best_acc = 0

start = time.time()
elapsed_time = 0
for epoch in range(num_epochs):
    start_time = time.time()

    train(epoch)
    test(epoch)

    epoch_time = time.time() - start_time
    elapsed_time += epoch_time
    print('| Elapsed time : %d:%02d:%02d'  %(get_hms(elapsed_time)))

print('\nTesting model')
print('* Test results : Acc@1 = %.2f%%' %(best_acc))
end = time.time()
print(end - start)

### Train Projection Head

In [None]:
def get_grads(loader, model, data, feat):  # , target_class):

    torch.cuda.empty_cache()

    ce_loss = nn.CrossEntropyLoss()
    ce_loss2 = nn.MSELoss()

    num_images = len(data)
    all_grads = []
    all_grad_pred = []
    model.cuda()
    model.eval()
    time_avg = 0

    for num_im in range(num_images):

        # print(num_im)

        img = loader(data[num_im])
        img = torch.unsqueeze(img, 0)

        img = img.cuda()
        grad = torch.zeros(0)
        grad_pred = torch.zeros(0)
        #start_time = time.time()


        # im_label_as_var = torch.from_numpy(np.asarray([target_label]))
        im_label_as_var2 = torch.from_numpy(feat*np.ones(10)).float()
        #im_label_as_var2[target_label] = feat
        # im_label_as_var2 = torch.unsqueeze(im_label_as_var2, 0)

        output = model(img)[0]

        # pred_loss = FocalLoss(gamma=2)(output.cuda(), im_label_as_var.cuda())
        pred_loss = ce_loss2(output.cuda(), im_label_as_var2.cuda())
        pred_loss.backward(retain_graph=False)

        temp_grad = model.linear.weight.grad

        #temp_grad = temp_grad[target_label]
        temp_grad = torch.unsqueeze(temp_grad, 0)
        temp_grad = temp_grad.data.cpu()
        temp_grad = torch.reshape(temp_grad, (1, 640))
        #plt.imshow(temp_grad.cpu().squeeze().numpy())
        #plt.show()
        grad = torch.cat((grad, temp_grad), 1)

        model.linear.weight.grad.zero_()
        model.linear.bias.grad.zero_()
        # model.param.grad.zero_()

        del temp_grad, pred_loss, im_label_as_var2, output
        torch.cuda.empty_cache()


        del img
        torch.cuda.empty_cache()
        grad = grad.squeeze(0)

        all_grads.append(grad.cpu())
        all_grad_pred.append(grad_pred.cpu())

        del grad, grad_pred
        torch.cuda.empty_cache()

    #average = time_avg / num_images
    #print(average)
    return all_grads, all_grad_pred

def scale(X, x_min, x_max):
    nom = (X - X.min(axis=0)) * (x_max - x_min)
    denom = X.max(axis=0) - X.min(axis=0)
    denom[denom == 0] = 1
    return x_min + nom / denom

def preprocess_test(dat1):
    data_np1 = [(np.asarray(i)) for i in dat1]
    data_np1 = np.asarray(data_np1)
    data_np1 = data_np1.squeeze()

    data = scale(data_np1, -1, 1)

    return data

def preprocess_train(dat1):
    data_np1 = [(np.asarray(i)) for i in dat1]
    data_np1 = np.asarray(data_np1)
    data_np1 = data_np1.squeeze()

    data_np1 = scale(data_np1, -1, 1)

    return data_np1

In [None]:
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        # Resnet-50,101 = 2560
        data = data.view(-1, 640)
        optimizer.zero_grad()
        output = model(data)
        # loss = F.nll_loss(output, target)
        # loss = F.kl_div(output, target) #nn.KLDivLoss(output, target)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 1000 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                       100. * batch_idx / len(train_loader), loss.item()))

def test(net, testloader):
    net.eval()
    test_loss = 0
    correct = 0
    total = 0

    logits = []
    labels = []
    preds = []

    for batch_idx, (inputs, targets) in enumerate(testloader):
        # targets = torch.unsqueeze(targets,1)
        inputs, targets = inputs.cuda(), targets.cuda()
        with torch.no_grad():
            inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)

        logits.append(outputs.tolist())
        labels.append(targets.tolist())
        _, predicted = torch.max(outputs.data, 1)
        preds.append(predicted)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

    acc = 100. * correct / total
    return acc, correct, total, logits, labels, preds

In [None]:
checkpoint = torch.load(models_dir + '/' + 'backbone.t7')
model = checkpoint['net']
model = model.cuda()

test_labels = valset.test_labels
test_labels = torch.Tensor(test_labels)
test_labels = test_labels.long()

train_labels = trainset.train_labels
train_labels = torch.Tensor(train_labels)
train_labels = train_labels.long()

In [None]:
# get gradients
grads_train, _ = get_grads(transform_test, model, trainset.train_data, feat= 15.5)
grads_test, _ = get_grads(transform_test, model, valset.test_data, feat = 15.5)

In [None]:
train_grads = preprocess_train(grads_train)
train_tensor = torch.stack([torch.Tensor(i) for i in train_grads])
train_dataset = torch.utils.data.TensorDataset(train_tensor, train_labels)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size)

test_grads = preprocess_test(grads_test)
test_tensor = torch.stack([torch.Tensor(i) for i in test_grads])
test_dataset = torch.utils.data.TensorDataset(test_tensor, test_labels)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)

In [None]:
num_epochs = 200
lr = 0.1

net = Net_classifier().cuda()

for epoch in range(num_epochs):
    optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate(lr, epoch), momentum=0.9, weight_decay=5e-4)
    train(net, device, train_loader, optimizer, epoch)

In [None]:
acc, correct, total, logits, labels, preds = test(net, test_loader)
print(correct.numpy() / total)

In [None]:
torch.save(net, models_dir + '/' + 'head.t7')

## Inference

In [None]:
checkpoint = torch.load(models_dir + '/' + 'backbone.t7')
model = checkpoint['net']

mlp = torch.load(models_dir + '/' + 'head.t7')
mlp = mlp.cuda()

In [None]:
test_labels = valset.test_labels
test_labels = torch.Tensor(test_labels)
test_labels = test_labels.long()

In [None]:
valset.test_data[1].shape

In [None]:
# get gradients
grads_test, _ = get_grads(transform_test, model, valset.test_data, feat = 15.5)

test_grads = preprocess_test(grads_test)
test_tensor = torch.stack([torch.Tensor(i) for i in test_grads])

In [None]:
test_dataset = torch.utils.data.TensorDataset(test_tensor, test_labels)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)

In [None]:
def test(net, testloader):
    net.eval()
    test_loss = 0
    correct = 0
    total = 0

    logits = []
    labels = []
    preds = []

    for batch_idx, (inputs, targets) in enumerate(testloader):
        # targets = torch.unsqueeze(targets,1)
        inputs, targets = inputs.cuda(), targets.cuda()
        with torch.no_grad():
            inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)

        logits.append(outputs.tolist())
        labels.append(targets.tolist())
        # print(np.shape(logits), np.shape(labels))
        _, predicted = torch.max(outputs.data, 1)
        preds.append(predicted)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

    acc = 100. * correct / total
    return acc, correct, total, logits, labels, preds

acc, correct, total, logits, labels, preds = test(mlp, test_loader)
print(correct.numpy() / total)

In [None]:
def flatten(t):
    return [item for sublist in t for item in sublist]
    
all_preds =  np.array(flatten(logits))
all_preds = softmax(all_preds,1)
all_preds = np.expand_dims(all_preds,1)

In [None]:
np.save(results_dir + '/' + 'all_preds.npy',all_preds)

In [None]:
# class GradCon_Net(nn.Module):
#     def __init__(self, model):
#         self.model = model

#     def sample_predict(self, x, Nsamples):  # Nsamples: No. of forward passes
#         predictions = x.data.new(Nsamples, x.shape[0], self.num_classes)
#         for i in range(Nsamples):
#             y,_= self.forward(x)
#             predictions[i] = y

#         return predictions

#     def sample_eval(self, x, y, Nsamples, logits=True, train=False):
#         x, y = to_variable(var=(x, y.long()), cuda=self.cuda)
#         out = self.model.sample_predict(x, Nsamples)

#         if logits:
#             mean_out = out.mean(dim=0, keepdim=False)
#             loss = F.cross_entropy(mean_out, y, reduction='sum')
#             probs = F.softmax(mean_out, dim=1).data.cpu()

#         else:
#             mean_out = F.softmax(out, dim=2).mean(dim=0, keepdim=False)
#             probs = mean_out.data.cpu()

#             log_mean_probs_out = torch.log(mean_out)
#             loss = F.nll_loss(log_mean_probs_out, y, reduction='sum')

#         pred = mean_out.data.max(dim=1, keepdim=False)[1]  # get the index of the max log-probability
#         err = pred.ne(y.data).sum()

#         return loss.data, err, probs

#     def all_sample_eval(self, x, y, Nsamples):
#         """Returns predictions for sample"""
#         x, y = to_variable(var=(x, y.long()), cuda=self.cuda)

#         out = self.model.sample_predict(x, Nsamples)

#         prob_out = F.softmax(out, dim=2)
#         prob_out = prob_out.data

#         return prob_out

### CIFAR-10-C

In [None]:
def get_grads(loader, model, data, feat):  # , target_class):

    torch.cuda.empty_cache()

    ce_loss = nn.CrossEntropyLoss()
    ce_loss2 = nn.MSELoss()

    num_images = len(data)
    all_grads = []
    all_grad_pred = []
    model.cuda()
    model.eval()
    time_avg = 0

    for num_im in range(num_images):

        # print(num_im)

        img = loader(data[num_im])
        img = torch.unsqueeze(img, 0)

        img = img.cuda()
        grad = torch.zeros(0)
        grad_pred = torch.zeros(0)
        #start_time = time.time()


        # im_label_as_var = torch.from_numpy(np.asarray([target_label]))
        im_label_as_var2 = torch.from_numpy(feat*np.ones(10)).float() #vector of ones for training labels
        #im_label_as_var2[target_label] = feat
        im_label_as_var2 = torch.unsqueeze(im_label_as_var2, 0)

        output = model(img)[0]
        output = torch.unsqueeze(output, 0)

        # print(output.shape, im_label_as_var2.shape)
        # pred_loss = FocalLoss(gamma=2)(output.cuda(), im_label_as_var.cuda())
        pred_loss = ce_loss2(output.cuda(), im_label_as_var2.cuda())
        pred_loss.backward(retain_graph=False)

        temp_grad = model.linear.weight.grad

        #temp_grad = temp_grad[target_label]
        temp_grad = torch.unsqueeze(temp_grad, 0)
        temp_grad = temp_grad.data.cpu()
        temp_grad = torch.reshape(temp_grad, (1, 640))
        #plt.imshow(temp_grad.cpu().squeeze().numpy())
        #plt.show()
        grad = torch.cat((grad, temp_grad), 1)

        model.linear.weight.grad.zero_()
        model.linear.bias.grad.zero_()
        # model.param.grad.zero_()

        del temp_grad, pred_loss, im_label_as_var2, output
        torch.cuda.empty_cache()


        del img
        torch.cuda.empty_cache()
        grad = grad.squeeze(0)

        all_grads.append(grad.cpu())
        all_grad_pred.append(grad_pred.cpu())

        del grad, grad_pred
        torch.cuda.empty_cache()

    #average = time_avg / num_images
    #print(average)
    return all_grads, all_grad_pred

def scale(X, x_min, x_max):
    nom = (X - X.min(axis=0)) * (x_max - x_min)
    denom = X.max(axis=0) - X.min(axis=0)
    denom[denom == 0] = 1
    return x_min + nom / denom

def preprocess_test(dat1):
    data_np1 = [(np.asarray(i)) for i in dat1]
    data_np1 = np.asarray(data_np1)
    data_np1 = data_np1.squeeze()

    data = scale(data_np1, -1, 1)

    return data

def test(net, testloader):
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    preds = []
    labels = []
    
    criterion = nn.CrossEntropyLoss()
    for batch_idx, (inputs, targets) in enumerate(testloader):
        # targets = torch.unsqueeze(targets,1)
        inputs, targets = inputs.cuda(), targets.cuda()
        with torch.no_grad():
            inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)

        preds.append(outputs.tolist())
        labels.append(targets.tolist())
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

    # Save checkpoint when best model
    acc = 100. * correct / total
    return acc, correct, total, preds, labels

In [None]:
checkpoint = torch.load(models_dir + '/' + 'backbone.t7')
model = checkpoint['net']

mlp = torch.load(models_dir + '/' + 'head.t7')
mlp = mlp.cuda()

chalPath = 'data/CIFAR-10-C/'
chals = os.listdir(chalPath)
chals.sort()

mean = (0.4914, 0.4822, 0.4465)
std = (0.2023, 0.1994, 0.2010)

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean,std),
])

testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
test_labels = testset.test_labels
test_labels = torch.Tensor(test_labels)
test_labels = test_labels.long()

In [None]:
preds_list = []
avg_list = []

for challenge in range(len(chals)):
    
    path = chalPath + chals[challenge]
    chal_data = np.load(path)
    #acts_test = get_acts(model, chal_data, 0)
    grads_s, _ = get_grads(transform_test, model, chal_data, feat=15.5)
    avg = 0

    for j in range(5):

        chal_temp_data = grads_s[j * 10000:(j + 1) * 10000]
        chal_grads = preprocess_test(chal_temp_data)
        chal_tensor = torch.stack([torch.Tensor(i) for i in chal_grads])
        chal_dataset = torch.utils.data.TensorDataset(chal_tensor, test_labels)
        chal_loader = torch.utils.data.DataLoader(chal_dataset, batch_size=10000)

        _, correct, total, preds, labels = test(mlp, chal_loader)
        preds_list.append(preds[0])
        print(correct.numpy() / total)
        avg = avg + (correct.numpy() / total)

    avg = avg / 5
    avg_list.append(avg)
    print("Average:",avg," ", chals[challenge])

print("Mean: ", np.mean(avg_list))

In [None]:
preds_list = np.vstack(preds_list)
print(preds_list.shape)
preds_list1 = softmax(preds_list,1)
np.save(results_dir+'/preds_CIFAR-10-C.npy', preds_list1)
np.save(results_dir+'/avg_list_CIFAR-10-C.npy', avg_list)

### CIFAR-10 Rotations

In [None]:
x_dev = []
y_dev = []
for x, y in valloader:
    x_dev.append(x.cpu().numpy())
    y_dev.append(y.cpu().numpy())

x_dev = np.concatenate(x_dev)
y_dev = np.concatenate(y_dev)

x_dev = np.transpose(x_dev, (0,2,3,1))
print(x_dev.shape)
print(y_dev.shape)

In [None]:
x_dev = valset.test_data
print(x_dev.shape)

In [None]:
data_rotated = np.load("data/CIFAR10_rotated.npy")
# data_rotated = np.transpose(data_rotated, (0,1,4,2,3))
data_rotated.shape

In [None]:
def test(net, testloader):
    net.eval()
    test_loss = 0
    correct = 0
    total = 0

    logits = []
    labels = []
    preds = []

    for inputs, targets in testloader:
        # targets = torch.unsqueeze(targets,1)
        inputs, targets = inputs.cuda(), targets.cuda()
        with torch.no_grad():
            inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)

        logits.append(outputs.tolist())
        labels.append(targets.tolist())
        # print(np.shape(logits), np.shape(labels))
        _, predicted = torch.max(outputs.data, 1)
        preds.append(predicted)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

    acc = (100. * correct / total).item()
    return acc, logits, labels, preds


im_ind = 23
# Nsamples = 1
steps = 16

plt.figure()
plt.imshow( ndim.interpolation.rotate(x_dev[im_ind,:,:,:], 0, reshape=False, mode='nearest'))
plt.title('original image')
plt.savefig(results_dir + '/sample_image.png', bbox_inches='tight')
s_rot = 0
end_rot = 179
rotations = (np.linspace(s_rot, end_rot, steps)).astype(int)            

ims = []
predictions = []
# percentile_dist_confidence = []
x, y = x_dev[im_ind], y_dev[im_ind]

fig = plt.figure(figsize=(steps, 8), dpi=80)

ims = data_rotated[:,im_ind,:,:,:]
print(ims.shape)
test_labels = np.ones(ims.shape[0])*y
y = test_labels
test_labels = torch.Tensor(test_labels)
test_labels = test_labels.long()

grads_test, _ = get_grads(transform_test, model, ims , feat = 15.5)
test_grads = preprocess_test(grads_test)
test_tensor = torch.stack([torch.Tensor(i) for i in test_grads])

test_dataset = torch.utils.data.TensorDataset(test_tensor, test_labels)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=steps)

acc, logits, labels, preds = test(mlp, test_loader)
print(acc)

logits = np.vstack(logits)
predictions = softmax(logits,1)

textsize = 15
lw = 5

c = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
    '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']  

ax0 = plt.subplot2grid((3, steps-1), (0, 0), rowspan=2, colspan=steps-1)
#ax0 = fig.add_subplot(2, 1, 1)
plt.gca().set_prop_cycle(color = c)
ax0.plot(rotations, predictions, linewidth=lw)


##########################
# Dots at max

for i in range(predictions.shape[1]):

    selections = (predictions[:,i] == predictions.max(axis=1))
    for n in range(len(selections)):
        if selections[n]:
            ax0.plot(rotations[n], predictions[n, i], 'o', c=c[i], markersize=15.0)
##########################  

lgd = ax0.legend(['airplane', 'automobile', 'bird',
            'cat', 'deer', 'dog',
            'frog', 'horse', 'ship',
            'truck'], loc='upper right', prop={'size': textsize, 'weight': 'normal'}, bbox_to_anchor=(1.35,1))
plt.xlabel('rotation angle')
# plt.ylabel('probability')
plt.title('True class: %d' % (y[0]))
# ax0.axis('tight')
plt.tight_layout()
plt.autoscale(enable=True, axis='x', tight=True)
plt.subplots_adjust(wspace=0, hspace=0)

for item in ([ax0.title, ax0.xaxis.label, ax0.yaxis.label] +
            ax0.get_xticklabels() + ax0.get_yticklabels()):
    item.set_fontsize(textsize)
    item.set_weight('normal')

# plt.savefig(results_dir + '/percentile_label_probabilities.png', bbox_extra_artists=(lgd,), bbox_inches='tight')

In [None]:
data_rotated = np.load("data/CIFAR10_rotated.npy")
data_rotated.shape

In [None]:
def test(net, testloader):
    net.eval()
    test_loss = 0
    correct = 0
    total = 0

    logits = []
    labels = []
    preds = []

    for batch_idx, (inputs, targets) in enumerate(testloader):
        # targets = torch.unsqueeze(targets,1)
        inputs, targets = inputs.cuda(), targets.cuda()
        with torch.no_grad():
            inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)

        logits.append(outputs.tolist())
        labels.append(targets.tolist())
        # print(np.shape(logits), np.shape(labels))
        _, predicted = torch.max(outputs.data, 1)
        preds.append(predicted)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

    logits = np.vstack(logits)
    acc = (100. * correct / total).item()
    return acc, logits, labels, preds

N = 10000
steps = 16
all_preds = np.zeros((N, steps, 10))
correct_preds = np.zeros((N, steps))
test_labels = torch.Tensor(valset.test_labels)
test_labels = test_labels.long()

for r in range(steps):
    # get gradients
    grads_test, _ = get_grads(transform_test, model, data_rotated[r] , feat = 15.5)

    test_grads = preprocess_test(grads_test)
    test_tensor = torch.stack([torch.Tensor(i) for i in test_grads])

    test_dataset = torch.utils.data.TensorDataset(test_tensor, test_labels)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)

    acc, logits, labels, preds = test(mlp, test_loader)
    print(r,acc)

    probs = softmax(logits,1)
    all_preds[:,r,:] = probs

In [None]:
rotations = (np.linspace(0, 179, steps)).astype(int)

y_dev = testset.test_labels
correct_preds = np.zeros((N, steps))
for i in range(N):
    correct_preds[i,:] = all_preds[i,:,y_dev[i]]   

plot_predictive_entropy(correct_preds, all_preds, rotations, results_dir)

In [None]:
np.save(results_dir+'/correct_preds.npy', correct_preds)
np.save(results_dir+'/all_preds.npy', all_preds)

In [None]:
all_preds.shape

In [None]:
N = 10000; Nsamples = 100; R=16; C=10
all_sample_preds = np.zeros((N, Nsamples, R, C))

for n in range(N):
    for r in range(R):
        for c in range(C):
            all_sample_preds[n,:,r,c] = all_preds[n,r,c]


In [None]:
np.save(results_dir+'/all_sample_preds.npy', all_sample_preds)