In [1]:
import numpy as np
import matplotlib.pyplot as plt
import math

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torch.distributions import Normal as norm

from termcolor import colored

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Data Process

In [3]:
data = np.genfromtxt("data/bank-note/train.csv", dtype = float, delimiter = ',')
data.shape

(872, 5)

In [4]:
data = np.hstack((np.ones((data.shape[0],1)), data))
data.shape

(872, 6)

In [5]:
train_data = data[:,:-1]
train_label = data[:,-1].astype(int)
train_data.shape, train_label.shape

((872, 5), (872,))

In [6]:
train = torch.tensor(train_data).type(torch.FloatTensor)
label = torch.tensor(train_label).type(torch.LongTensor)

In [7]:
test_ = np.genfromtxt("data/bank-note/test.csv", dtype = float, delimiter = ',')
test_.shape

(500, 5)

In [8]:
test_data = np.hstack((np.ones((test_.shape[0],1)), test_))
test_data.shape

(500, 6)

In [9]:
test__ = test_data[:,:-1]
t__ = test_data[:,-1].astype(int)
test__.shape, t__.shape

((500, 5), (500,))

In [10]:
test = torch.tensor(test__).type(torch.FloatTensor)
t = torch.tensor(t__).type(torch.LongTensor)

## Main functions

In [11]:
class Gaussian(nn.Module):
    def __init__(self, mu, rho):
        super(Gaussian, self).__init__()
        self.norm   = norm(0,1)
        self.mu_    = mu
        self.rho_   = rho
        self.sigma_ = torch.ones(self.mu_.shape)
        
        
    def sample(self):
        self.eps    = self.norm.sample(self.mu_.shape).type(self.mu_.type()).to(device)
        self.sigma_ = torch.log(1 + torch.exp(self.rho_))
        self.W      = self.mu_ + self.sigma_ * self.eps
        return self.W
            
    def loss(self):
        return (0.5*self.mu_**2 + 0.5*self.sigma_ - torch.log(1e-20 + self.sigma_)).sum()

In [12]:
class Blinear(nn.Module):
    def __init__(self, n_input, n_output):
        super(Blinear, self).__init__()
        
        

        
        self.n_input  = n_input
        self.n_output = n_output
        scale = 1.0/np.sqrt(2*self.n_input)
        
        
        #self.mu       = nn.Parameter(norm(0,1).sample((n_output, n_input)))
        
        self.mu       = nn.Parameter(scale*torch.randn(n_output, n_input))
        
        
        
        #self.mu       = nn.Parameter(torch.zeros(n_output, n_input))
        self.rho      = nn.Parameter(scale * torch.ones(n_output, n_input))
        
        self.W        = Gaussian(self.mu, self.rho)

        self.b_mu     = nn.Parameter(torch.zeros(n_output))
        self.b_rho    = nn.Parameter(scale * torch.ones(n_output))
        self.b        = Gaussian(self.b_mu, self.b_rho)
        
    def forward(self, x):
        
        W = self.W.sample()
        b = self.b.sample()
        
        return F.linear(x, W, b)

In [13]:
class BNet(nn.Module):
    
    def __init__(self, layers, act):
        super(BNet, self).__init__()
        
        self.act = act
        self.fc  = nn.ModuleList()
        for i in range(len(layers) - 1):
            self.fc.append(Blinear(layers[i], layers[i+1]))  
            
            
    def forward(self,x):
        for i in range(len(self.fc) - 1):
            x = self.fc[i].forward(x) #forward based on Blinear 
            x = self.act(x)
            
        x = F.log_softmax(self.fc[-1](x), dim=1)
        #x = F.softmax(self.fc[-1].forward(x), dim=1)
        
        return x

In [14]:
class BNN():
    def __init__(self, x, y, layers, act, n_epochs = 10000):
        
        self.x = x
        self.y = y
        self.n_layers = len(layers)
        
        self.nepochs = n_epochs
        self.BNet    = BNet(layers, act)
        
    def get_neg_elbo(self):
        neg_elbo = 0
        for i in range(self.n_layers-1):
            neg_elbo += self.BNet.fc[i].W.loss()
            neg_elbo += self.BNet.fc[i].b.loss()
            
        #print('neg_elbo', neg_elbo)
            
        p_pred = self.BNet(self.x)
        
        return neg_elbo - p_pred[self.y==1, 1].sum() - p_pred[self.y==0, 0].sum()
        
#         return neg_elbo + F.nll_loss(p_pred, self.y, reduction='sum') #mean or sum?!!!
    
    def train(self, lr, decay, step_size = 1000):
    
        optimizer = torch.optim.Adam(self.BNet.parameters(), lr = lr)
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=500, gamma=decay)
        
        for n in range(self.nepochs):
            loss = self.get_neg_elbo()
            if n %1000 == 0:
                with torch.no_grad():
                    print(loss.item())
                    acc = 0
                    pl = 0
                    for i in range(1000):
                        p_pred = self.BNet(self.x).numpy()
                        

#                         pl += ((np.log(1e-10+p_pred[self.y==0, 0])).sum() + 
#                                (np.log(1e-10+p_pred[self.y==1, 1])).sum())/p_pred.shape[0]

                        pl += (p_pred[self.y==0, 0].sum() + 
                               p_pred[self.y==1, 1].sum())/p_pred.shape[0]

                        
                        y_hat = np.argmax(p_pred, axis = 1)
                        
                        acc += (self.y.numpy() == y_hat).astype(int).mean()
                    
                    print(colored('learning rate:{}'.format(optimizer.param_groups[0]['lr']), 'blue'))
                        
                    print(colored('Train accuracy for iteration {} is {}'.format(n, acc/1000), 'red'))
                    #print('Train Accuracy for iteration {} is {}'.format(n, acc/1000))
                    print('Predictive log-likelihood for trainat iteration {} is {}'.format(n, pl/1000))
                    
                    
                    acc = 0
                    pl = 0
                    for i in range(1000):

                        t_pred = self.BNet(test).numpy()

                        pl += (t_pred[t==0, 0].sum() + 
                               t_pred[t==1, 1].sum())/t_pred.shape[0]
                        
#                         pl += ((np.log(1e-10+t_pred[t==0, 0])).sum() + 
#                                (np.log(1e-10+t_pred[t==1, 1])).sum())/t_pred.shape[0]
                        
                        t_hat = np.argmax(t_pred, axis = 1)
                        acc += (t.numpy() == t_hat).astype(int).mean()
                    
                    print('Test accuracy is {}'.format(acc/1000))
                    print('Predictive log-likelihood for test at iteration {} is {}'.format(n, pl/1000))
                    
                    
                    
                    
                    
#                     for p in self.BNet.parameters():
#                         print('parametr', p)
                    
                    
            optimizer.zero_grad()
            loss.backward(retain_graph=True)
            optimizer.step()
            scheduler.step()
            
            
        
#         acc = 0
#         for i in range(1000):
#             with torch.no_grad():
#                 y_pred = self.BNet(train).numpy()
#                 y_hat = np.argmax(y_pred, axis = 1)
#                 acc += (label.numpy() == y_hat).astype(int).mean()
                    
#         print(colored('Train accuracy for iteration {} is {}'.format(n, acc/1000), 'red'))            

        
#         acc = 0
#         for i in range(1000):
#             with torch.no_grad():
#                 t_pred = self.BNet(test).numpy()
#                 t_hat  = np.argmax(t_pred, axis = 1)
#                 acc   += (t.numpy() == t_hat).astype(int).mean()
                    
#         print('Test accuracy is {}'.format(acc/1000))
        

In [15]:
models = []
for lr in [9e-3]:#[1e-3, 0.5e-3, 1e-4, 1e-5]:
    for l in [10,20,50]:
        
        print('---------------------------------------------------------------------')
        print('---------------------------------------------------------------------')
        print(colored("\n\n Start training for lr = {} and hidden_layer size = {}\n\n".format(lr, l), 'green'))
        print('---------------------------------------------------------------------')
        print('---------------------------------------------------------------------')
        model = BNN(train, label, [5, 10, 10, 2], act = nn.ReLU(), n_epochs = 10000)
        model.train(lr = 9e-3, decay = .95)
        models.append(model)
        print('---------------------------------------------------------------------')
        print('---------------------------------------------------------------------')

---------------------------------------------------------------------
---------------------------------------------------------------------
[32m

 Start training for lr = 0.009 and hidden_layer size = 10

[0m
---------------------------------------------------------------------
---------------------------------------------------------------------
2115.61865234375
[34mlearning rate:0.009[0m
[31mTrain accuracy for iteration 0 is 0.5091215596330281[0m
Predictive log-likelihood for trainat iteration 0 is -10.279756968821959
Test accuracy is 0.5038199999999999
Predictive log-likelihood for test at iteration 0 is -11.730947324066163
1957.73291015625
[34mlearning rate:0.008122499999999998[0m
[31mTrain accuracy for iteration 1000 is 0.5162970183486263[0m
Predictive log-likelihood for trainat iteration 1000 is -0.8400718373639878
Test accuracy is 0.5202479999999993
Predictive log-likelihood for test at iteration 1000 is -0.8335117177734375
843.295166015625
[34mlearning rate:0.0073305

[34mlearning rate:0.0073305562499999975[0m
[31mTrain accuracy for iteration 2000 is 0.5431100917431236[0m
Predictive log-likelihood for trainat iteration 2000 is -0.7255249087132435
Test accuracy is 0.5451879999999975
Predictive log-likelihood for test at iteration 2000 is -0.7225917795410151
805.844482421875
[34mlearning rate:0.006615827015624996[0m
[31mTrain accuracy for iteration 3000 is 0.5477844036697302[0m
Predictive log-likelihood for trainat iteration 3000 is -0.7006804800471038
Test accuracy is 0.551483999999997
Predictive log-likelihood for test at iteration 3000 is -0.7123921994018557
800.5078125
[34mlearning rate:0.005970783881601559[0m
[31mTrain accuracy for iteration 4000 is 0.5529288990825749[0m
Predictive log-likelihood for trainat iteration 4000 is -0.7028155730711214
Test accuracy is 0.5572299999999965
Predictive log-likelihood for test at iteration 4000 is -0.7024112403564453
800.8673706054688
[34mlearning rate:0.005388632453145407[0m
[31mTrain accurac

In [None]:
models = []
for lr in [1e-3, 0.5e-3, 1e-4, 1e-5]:
    for l in [10,20,50]:
        
        print('---------------------------------------------------------------------')
        print('---------------------------------------------------------------------')
        print(colored("\n\n Start training for lr = {} and hidden_layer size = {}\n\n".format(lr, l), 'green'))
        print('---------------------------------------------------------------------')
        print('---------------------------------------------------------------------')
        model = BNN(train, label, [5, 10, 10, 2], act = nn.Tanh(), n_epochs = 10000)
        model.train(lr = 9e-3, decay = .95)
        models.append(model)
        print('---------------------------------------------------------------------')
        print('---------------------------------------------------------------------')

---------------------------------------------------------------------
---------------------------------------------------------------------
[32m

 Start training for lr = 0.001 and hidden_layer size = 10

[0m
---------------------------------------------------------------------
---------------------------------------------------------------------
1063.8746337890625
[34mlearning rate:0.009[0m
[31mTrain accuracy for iteration 0 is 0.4937362385321104[0m
Predictive log-likelihood for trainat iteration 0 is -1.5875758364266226
Test accuracy is 0.496964
Predictive log-likelihood for test at iteration 0 is -1.5402280909729005
296.7937927246094
[34mlearning rate:0.008122499999999998[0m
[31mTrain accuracy for iteration 1000 is 0.9588509174311933[0m
Predictive log-likelihood for trainat iteration 1000 is -0.13859569748388514
Test accuracy is 0.951402000000003
Predictive log-likelihood for test at iteration 1000 is -0.1568568328104022
331.81573486328125
[34mlearning rate:0.007330556249

249.62217712402344
[34mlearning rate:0.0073305562499999975[0m
[31mTrain accuracy for iteration 2000 is 0.9859839449541306[0m
Predictive log-likelihood for trainat iteration 2000 is -0.06134544872143947
Test accuracy is 0.9844560000000014
Predictive log-likelihood for test at iteration 2000 is -0.06764487458896637
211.05545043945312
[34mlearning rate:0.006615827015624996[0m
[31mTrain accuracy for iteration 3000 is 0.9887201834862427[0m
Predictive log-likelihood for trainat iteration 3000 is -0.053143383779657
Test accuracy is 0.985676000000001
Predictive log-likelihood for test at iteration 3000 is -0.06393336859035491
205.27781677246094
[34mlearning rate:0.005970783881601559[0m
[31mTrain accuracy for iteration 4000 is 0.9898543577981661[0m
Predictive log-likelihood for trainat iteration 4000 is -0.051426686646741354
Test accuracy is 0.9875899999999983
Predictive log-likelihood for test at iteration 4000 is -0.059422254777908454
186.91551208496094
[34mlearning rate:0.005388

174.96710205078125
[34mlearning rate:0.005388632453145407[0m
[31mTrain accuracy for iteration 5000 is 0.995197247706425[0m
Predictive log-likelihood for trainat iteration 5000 is -0.016107171795527463
Test accuracy is 0.9940160000000046
Predictive log-likelihood for test at iteration 5000 is -0.021098654496848595
170.07110595703125
[34mlearning rate:0.004863240788963729[0m
[31mTrain accuracy for iteration 6000 is 0.9959266055045887[0m
Predictive log-likelihood for trainat iteration 6000 is -0.015418146204348615
Test accuracy is 0.9949820000000038
Predictive log-likelihood for test at iteration 6000 is -0.019170356582105138
160.27735900878906
[34mlearning rate:0.004389074812039765[0m
[31mTrain accuracy for iteration 7000 is 0.9957912844036725[0m
Predictive log-likelihood for trainat iteration 7000 is -0.01679619736231674
Test accuracy is 0.9943040000000047
Predictive log-likelihood for test at iteration 7000 is -0.023028237353414287
158.38685607910156
[34mlearning rate:0.00