In [1]:
from __future__ import print_function

import os
import sys
import time
import argparse
import datetime
import math
import pickle


import torchvision
import torchvision.transforms as transforms
#from utils.autoaugment import CIFAR10Policy

import torch
import torch.utils.data as data
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
from torch.autograd import Variable
from torchvision import datasets
from torch.utils.data.sampler import SubsetRandomSampler

In [2]:
from utils.BBBlayers import GaussianVariationalInference
from utils.BayesianModels.Bayesian3Conv3FC import BBB3Conv3FC
from utils.BayesianModels.BayesianAlexNet import BBBAlexNet
from utils.BayesianModels.BayesianLeNet import BBBLeNet
#from utils.BayesianModels.BayesianSqueezeNet import BBBSqueezeNet

In [3]:
net_type = 'alexnet'
dataset = 'CIFAR10'
outputs = 9
inputs = 3
resume = False
n_epochs = 10
lr = 0.0001
weight_decay = 0.0005
num_samples = 10
beta_type = "Blundell"
resize=80
im_size=80

In [4]:
# Hyper Parameter settings
use_cuda = torch.cuda.is_available()
# torch.cuda.set_device(0)

In [5]:
# number of subprocesses to use for data loading
num_workers = 0
# how many samples per batch to load
batch_size = 32
# percentage of training set to use as validation
valid_size = 0.2

In [6]:
def load_images(image_size=(im_size,im_size), batch_size=128, root="D:\justi\Documents\BML Proj"):

    transform = transforms.Compose([
                    transforms.Resize(image_size),
                    transforms.ToTensor(),
                    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    train_set = datasets.ImageFolder(root=root, transform=transform)
    train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=2)
    return train_loader

In [164]:
batch_size = 24
train_ld = load_images(root='D:/justi/Documents/BMLProj/train/')
train_ld1 = load_images(root='D:/justi/Documents/BMLProj/train/')

print(len(train_ld.dataset))
validation_split = 0.3
shuffle_dataset = True
random_seed= 42

# Creating data indices for training and validation splits:
dataset_size = len(train_ld.dataset)
indices = list(range(dataset_size))
split = int(np.floor(validation_split * dataset_size))
if shuffle_dataset :
    np.random.seed(random_seed)
    np.random.shuffle(indices)
train_idx, valid_idx = indices[split:], indices[:split]

def get_same_index(target, label):
    label_indices = []
    
    for i in range(len(target)):
        if target[i][1] == label:
            label_indices.append(i)

    return label_indices

ind = get_same_index(train_ld.dataset.samples,1)
# del ind[:27]
del ind[:7]
print(len(ind))

22424
2260


In [165]:
sampler_chk1 = torch.utils.data.sampler.SubsetRandomSampler(ind)

print(len(sampler_chk1))

train_idx1 = list(set(train_idx) - set(sampler_chk1))
print(len(train_idx1))

valid_idx1 = list(set(valid_idx) - set(sampler_chk1))
del valid_idx1[0]
len(valid_idx1)

train_data = train_ld.dataset
train_data1 = train_ld1.dataset

train_sampler = SubsetRandomSampler(train_idx1)
valid_sampler = SubsetRandomSampler(valid_idx1)

train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size,
    sampler=train_sampler)
print(len(train_loader.sampler))
valid_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, 
    sampler=valid_sampler)
print(len(valid_loader.sampler))
valid_loader_ooc = torch.utils.data.DataLoader(train_data1, batch_size=batch_size, 
    sampler=sampler_chk1)
print(len(valid_loader_ooc.sampler))

train_data=train_loader.dataset
valid_data=valid_loader.dataset
valset=valid_loader.dataset

2260
14139
14139
6024
2260


In [12]:
# specify the image classes
classes = ['c0', 'c2', 'c3', 'c4',
           'c5', 'c6', 'c7', 'c8', 'c9']

In [13]:
import matplotlib.pyplot as plt
%matplotlib inline

# helper function to un-normalize and display an image
def imshow(img):
    img = img / 2 + 0.5  # unnormalize
    plt.imshow(np.transpose(img, (1, 2, 0)))  # convert from Tensor image

In [16]:
# Architecture
if (net_type == 'lenet'):
    net = BBBLeNet(outputs,inputs)
elif (net_type == 'alexnet'):
    net = BBBAlexNet(outputs,inputs)
elif (net_type == '3conv3fc'):
        net = BBB3Conv3FC(outputs,inputs)
else:
    print('Error : Network should be either [LeNet / AlexNet / 3Conv3FC')


In [17]:
use_cuda

True

In [18]:
if use_cuda:
    net.cuda()

In [19]:
vi = GaussianVariationalInference(torch.nn.CrossEntropyLoss())
optimizer = optim.Adam(net.parameters(), lr=lr, weight_decay=weight_decay)

In [20]:
ckpt_name = f'model_{net_type}_{dataset}_bayesian_v6.pt'
ckpt_name

'model_alexnet_CIFAR10_bayesian_v6.pt'

In [21]:
%%time

valid_loss_min = np.Inf # track change in validation loss

for epoch in range(1, n_epochs+1):

    # keep track of training and validation loss
    train_loss = 0.0
    valid_loss = 0.0
    
    m = math.ceil(len(train_data) / batch_size)
    
    ###################
    # train the model #
    ###################
    net.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        # move tensors to GPU if CUDA is available
        data = data.view(-1, inputs, resize, resize).repeat(num_samples, 1, 1, 1)
        target = target.repeat(num_samples)
        #print(data.shape, target.shape)
        if use_cuda:
            data, target = data.cuda(), target.cuda()
            
        if beta_type is "Blundell":
            beta = 2 ** (m - (batch_idx + 1)) / (2 ** m - 1)
        elif beta_type is "Soenderby":
            beta = min(epoch / (n_epochs // 4), 1)
        elif beta_type is "Standard":
            beta = 1 / m
        else:
            beta = 0
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model

        output,kl = net.probforward(data)
#         print("CNN output :{}".format(output.shape))
        # calculate the batch loss
#         print(target.shape)
        loss = vi(output, target, kl, beta)
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        optimizer.step()
        # update training loss
        train_loss += (loss.item()*data.size(0)) / num_samples
        
    ######################    
    # validate the model #
    ######################
    net.eval()
    print('hi')
    for batch_idx, (data, target) in enumerate(valid_loader):
        data = data.view(-1, inputs, resize, resize).repeat(num_samples, 1, 1, 1)

        target = target.repeat(num_samples)
        # move tensors to GPU if CUDA is available
        if use_cuda:
            data, target = data.cuda(), target.cuda()
        # forward pass: compute predicted outputs by passing inputs to the model
        output,kl = net.probforward(data)
        
        # calculate the batch loss
        loss = vi(output, target, kl, beta)
        # update average validation loss 
        valid_loss += (loss.item()*data.size(0)) / num_samples
        
    # calculate average losses
    train_loss = train_loss/(len(train_loader.dataset) * (1-valid_size))
    valid_loss = valid_loss/(len(valid_loader.dataset) * valid_size)
        
    # print training/validation statistics 
    print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
        epoch, train_loss, valid_loss))
    
    # save model if validation loss has decreased
    if valid_loss <= valid_loss_min:
        print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
        valid_loss_min,
        valid_loss))
        torch.save(net.state_dict(), ckpt_name)
        valid_loss_min = valid_loss

hi
Epoch: 1 	Training Loss: 2563.090078 	Validation Loss: 2.815736
Validation loss decreased (inf --> 2.815736).  Saving model ...
hi
Epoch: 2 	Training Loss: 2442.370456 	Validation Loss: 2.795225
Validation loss decreased (2.815736 --> 2.795225).  Saving model ...
hi
Epoch: 3 	Training Loss: 2280.096706 	Validation Loss: 2.212632
Validation loss decreased (2.795225 --> 2.212632).  Saving model ...
hi
Epoch: 4 	Training Loss: 2110.341430 	Validation Loss: 1.134389
Validation loss decreased (2.212632 --> 1.134389).  Saving model ...
hi
Epoch: 5 	Training Loss: 1938.048094 	Validation Loss: 0.811361
Validation loss decreased (1.134389 --> 0.811361).  Saving model ...
hi
Epoch: 6 	Training Loss: 1772.620871 	Validation Loss: 0.714316
Validation loss decreased (0.811361 --> 0.714316).  Saving model ...
hi
Epoch: 7 	Training Loss: 1609.623717 	Validation Loss: 0.601214
Validation loss decreased (0.714316 --> 0.601214).  Saving model ...
hi
Epoch: 8 	Training Loss: 1451.534469 	Validation L

In [None]:
train_lossl = [2222.424191, 2204.462145, 2176.80634, 2151.847436, 2124.632558, 2097.601839, 2074.707306, 2045.781735, 2018.009762,1993.408975]

valid_lossl = [2.302149, 2.301881, 2.301242, 2.301733, 2.299262, 2.293659, 2.226565, 1.315879, 0.684092, 0.455054]

import pandas as pd

df = pd.DataFrame(train_lossl, columns=['train_loss'])
df['valid_loss'] = valid_lossl
df['train_loss'].plot(title='Training Loss')
# df['valid_loss'].plot(title='Valid Loss')

In [22]:
print(output.shape)
ckpt_name

torch.Size([640, 9])


'model_alexnet_CIFAR10_bayesian_v6.pt'

In [19]:
net.load_state_dict(torch.load(ckpt_name))

In [38]:
def calc_uncertainity_softmax(output):
    prediction = F.softmax(output, dim = 1)
    results = torch.max(prediction, 1 )
    p_hat = np.array(results[0].detach())
    epistemic = np.mean(p_hat ** 2, axis=0) - np.mean(p_hat, axis=0) ** 2
    epistemic += epistemic 
    #print (epistemic)
    aleatoric = np.mean(p_hat * (1-p_hat), axis = 0)
    aleatoric += aleatoric
    #print (aleatoric)
    return epistemic, aleatoric

In [39]:
def calc_uncertainty_normalized(output):
    prediction = F.relu(output)
    prediction = normalization_function(prediction)
    results = torch.max(prediction, 1 )
    p_hat = np.array(results[0].cpu().detach().numpy())
#     print(p_hat)
    epistemic = np.mean(p_hat ** 2, axis=0) - np.mean(p_hat, axis=0) ** 2 + 1e-10
    epistemic += epistemic 
#     print ("e: " + str(epistemic))
    aleatoric = np.mean(p_hat * (1-p_hat), axis = 0) + 1e-10
    
    aleatoric += aleatoric
    return epistemic, aleatoric
    

In [40]:
def normalization_function(x):
    return (x) / torch.sum(x, dim=0)

In [163]:
len(valid_loader.sampler)

6024

In [138]:
%%time

# # track test loss
# test_loss = 0.0
# class_correct = list(0. for i in range(num_samples))
# class_total = list(0. for i in range(10))

output_class = np.asarray([])

num_samples = 50
net.eval()
m = math.ceil(len(valid_loader_ooc.dataset) / batch_size)
target_list = []
# iterate over test data
for batch_idx, (data, target) in enumerate(valid_loader_ooc):
    target_list.append(target)
    data = data.view(-1, inputs, resize, resize).repeat(num_samples, 1, 1, 1)
    target = target.repeat(num_samples)
    # move tensors to GPU if CUDA is available
    if use_cuda:
        data, target = data.cuda(), target.cuda()
    
    if beta_type is "Blundell":
        beta = 2 ** (m - (batch_idx + 1)) / (2 ** m - 1)
    elif cf.beta_type is "Soenderby":
        beta = min(epoch / (cf.num_epochs // 4), 1)
    elif cf.beta_type is "Standard":
        beta = 1 / m
    else:
        beta = 0
    # forward pass: compute predicted outputs by passing inputs to the model
    output, kl = net.probforward(data)
    output = F.softmax(output, dim = 1)
    
#     if not torch.eq(torch.isnan(output).sum(), 0):
#         print(output, kl)
#         continue
    
    # calculate the batch loss
    #loss = vi(output, target, kl, beta)
#     print
    # update test loss 
    #test_loss += loss.item()*data.size(0) / num_samples
    #test_loss += loss.item()
    # convert output probabilities to predicted class
#     _, pred = torch.max(output, 1) 
    out_list = np.zeros((batch_size,outputs))
    for i in range(num_samples):
        for j in range(i*batch_size, (i+1)*batch_size):
            out_list[j%batch_size] += output.detach().cpu().numpy()[j]
    out_list = out_list/num_samples
    
    max_val = np.amax(out_list, axis = 1) 
    max_class = np.argmax(out_list, axis = 1)
    max_class[max_val<=0.3] = 10
    output_class = np.append(output_class, max_class)

Wall time: 2min 24s


In [139]:
# np.unique(output_class, return_counts=True)
import pandas as pd
pd.Series(output_class).value_counts()*100/ (np.sum(pd.Series(output_class).value_counts()))
# output_class.value_counts()

0.0     33.303571
6.0     25.535714
2.0     17.901786
7.0      9.330357
10.0     7.857143
8.0      5.357143
3.0      0.625000
5.0      0.089286
dtype: float64

In [169]:
%%time

# # track test loss
# test_loss = 0.0
# class_correct = list(0. for i in range(num_samples))
# class_total = list(0. for i in range(10))

output_class = np.asarray([])
counter = 0
num_samples = 10
net.eval()
m = math.ceil(len(valid_loader.dataset) / batch_size)
target_list = []
# iterate over test data
for batch_idx, (data, target) in enumerate(valid_loader):
    target_list.append(target)
    data = data.view(-1, inputs, resize, resize).repeat(num_samples, 1, 1, 1)
    target1 = target
    target = target.repeat(num_samples)
    # move tensors to GPU if CUDA is available
    if use_cuda:
        data, target = data.cuda(), target.cuda()
    # forward pass: compute predicted outputs by passing inputs to the model
    output, kl = net.probforward(data)
    output = F.softmax(output, dim = 1)
    out_list = np.zeros((batch_size,outputs))
    for i in range(num_samples):
        for j in range(i*batch_size, (i+1)*batch_size):
            out_list[j%batch_size] += output.detach().cpu().numpy()[j]
    out_list = out_list/num_samples
    
    max_val = np.amax(out_list, axis = 1) 
    max_class = np.argmax(out_list, axis = 1)
    max_class[max_val<=0.7] = 10
    correct = (max_class == target1.detach().cpu().numpy())
    counter += np.sum(correct)
    output_class = np.append(output_class, max_class)

Wall time: 1min 36s


In [170]:
counter/len(valid_loader.sampler)

0.8164010624169987

In [171]:
pd.Series(output_class).value_counts()*100/ (np.sum(pd.Series(output_class).value_counts()))

0.0     17.812085
5.0     11.802789
3.0     11.420983
2.0     11.321381
6.0     10.856574
10.0    10.790173
7.0      9.495352
4.0      8.549137
8.0      7.951527
dtype: float64