#BNN TRAINING

In [7]:

import numpy as np
from sklearn import datasets

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchbnn as bnn
import torch.nn.functional as F
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
import pickle
from sklearn.metrics import f1_score
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
#from torchbnn import transform_model
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
Train = torch.load('trainlist.pt')

In [3]:
Valid = torch.load('vallist.pt')

In [8]:
config = {
    "l1": tune.sample_from(lambda _: 2**np.random.randint(2, 9)),
    "l2": tune.sample_from(lambda _: 2**np.random.randint(2, 9)),
    "lr": np.random.uniform(1e-4, 1e-1),
    "batch_size": np.random.choice([64]),
   # "momentum": np.random.uniform( 0.1,0.5, 0.9)
}


In [5]:
#train_dataloader = DataLoader(Train, int(config["batch_size"]),
train_dataloader = DataLoader(Train, int(config["batch_size"]),
                        shuffle=True, num_workers=0)

In [6]:
val_dataloader = DataLoader(Valid, int(config["batch_size"]),
                        shuffle=True, num_workers=0)

In [9]:
#BCNN MODEL
import copy
import warnings

def transform_layer(input, from_inst, to_inst, args={}, attrs={}):
    if isinstance(input, from_inst) :
        for key in args.keys() :
            arg = args[key]
            if isinstance(arg, str) :
                if arg.startswith(".") :
                    args[key] = getattr(input, arg[1:])
                    
        output = to_inst(**args)
        
        for key in attrs.keys() :
            attr = attrs[key]
            if isinstance(attr, str) :
                if attr.startswith(".") :
                    attrs[key] = getattr(input, attr[1:])
        
            setattr(output, key, attrs[key])
    else :
        output = input        
    return output


def transform_model(input, from_inst, to_inst, args={}, attrs={}, inplace=True, _warn=True):
    if inplace :
        output = input
        if _warn :
            warnings.warn("\n * Caution : The Input Model is CHANGED because inplace=True.", Warning)
    else :
        output = copy.deepcopy(input)
    
    if isinstance(output, from_inst) :
        output = transform_layer(output, from_inst, to_inst, copy.deepcopy(args), copy.deepcopy(attrs))
    else :
        for name, module in output.named_children() :
            setattr(output, name, transform_model(module, from_inst, to_inst, copy.deepcopy(args), copy.deepcopy(attrs), _warn=False))
            
    return output

In [10]:
class ConvNet(nn.Module):
    def __init__(self, num_classes=3 ):
        super(ConvNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=(7,7), stride=(2,2), padding=(3,3)),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=(3,3), stride=(1,1), padding=(1,1)),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d(output_size=(1,1)))
        self.fc = nn.Linear(128, num_classes)
       # self.fc =  bnn.BayesLinear(prior_mu=0, prior_sigma=0.01, in_features=128, out_features=3),
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        return out
ConvNet()

ConvNet(
  (layer1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): AdaptiveAvgPool2d(output_size=(1, 1))
  )
  (fc): Linear(in_features=128, out_features=3, bias=True)
)

In [11]:
#The output of your last Conv2d would be like (N, 64, 7, 7), 
#where N stands for batch_size, 64 for number of channels and 7x7, the height and width of the image.
#So, now Flatten() will convert this into shape (N, 64 x 7 x 7). 
#Now, when it will go to the first Linear, the output will be (N, 100) and after second Linear (N, 10).



class ConvNet2(nn.Module):
    def __init__(self):
        super(ConvNet2, self).__init__()
        
        self.conv_layer = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=(7,7), stride=(2,2), padding=(3,3)),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d(output_size=(1,1))
        )
        
        self.fc_layer = nn.Sequential(
            nn.Linear(64,128),
            nn.ReLU(),
            nn.Linear(128,128)
        )       
        
    def forward(self,x):
        out = self.conv_layer(x)
        out = out.view(x.size(0), -1)
        out = self.fc_layer(out)

        return out
ConvNet2()

ConvNet2(
  (conv_layer): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): AdaptiveAvgPool2d(output_size=(1, 1))
  )
  (fc_layer): Sequential(
    (0): Linear(in_features=64, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=128, bias=True)
  )
)

In [12]:
#model = ConvNet2()
model = ConvNet()

In [13]:
# Convert Linear -> BayesLinear
transform_model(model, nn.Linear, bnn.BayesLinear, 
            args={"prior_mu":0, "prior_sigma":0.1, "in_features" : ".in_features",
                  "out_features" : ".out_features", "bias":".bias"
                 }, 
            attrs={"weight_mu" : ".weight"})

 * Caution : The Input Model is CHANGED because inplace=True.


ConvNet(
  (layer1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): AdaptiveAvgPool2d(output_size=(1, 1))
  )
  (fc): BayesLinear(prior_mu=0, prior_sigma=0.1, in_features=128, out_features=3, bias=True)
)

In [14]:
transform_model(model, nn.Conv2d, bnn.BayesConv2d, 
                args={"prior_mu":0, "prior_sigma":0.1, "in_channels" : ".in_channels",
                      "out_channels" : ".out_channels", "kernel_size" : ".kernel_size",
                      "stride" : ".stride", "padding" : ".padding", "bias":".bias"
                     }, 
                attrs={"weight_mu" : ".weight"})

 * Caution : The Input Model is CHANGED because inplace=True.


ConvNet(
  (layer1): Sequential(
    (0): BayesConv2d(0, 0.1, 3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): BayesConv2d(0, 0.1, 64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): AdaptiveAvgPool2d(output_size=(1, 1))
  )
  (fc): BayesLinear(prior_mu=0, prior_sigma=0.1, in_features=128, out_features=3, bias=True)
)

In [15]:
model = ConvNet()
#model = CNN()

In [16]:
transform_model(model, nn.Conv2d, bnn.BayesConv2d, 
                args={"prior_mu":0, "prior_sigma":0.1, "in_channels" : ".in_channels",
                      "out_channels" : ".out_channels", "kernel_size" : ".kernel_size",
                      "stride" : ".stride", "padding" : ".padding", "bias":".bias"
                     }, 
                attrs={"weight_mu" : ".weight"})

 * Caution : The Input Model is CHANGED because inplace=True.


ConvNet(
  (layer1): Sequential(
    (0): BayesConv2d(0, 0.1, 3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): BayesConv2d(0, 0.1, 64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): AdaptiveAvgPool2d(output_size=(1, 1))
  )
  (fc): Linear(in_features=128, out_features=3, bias=True)
)

In [11]:
net = model
net

CNN(
  (conv_layer): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): AdaptiveAvgPool2d(output_size=(1, 1))
  )
  (fc_layer): Sequential(
    (0): BayesLinear(prior_mu=0, prior_sigma=0.1, in_features=64, out_features=128, bias=True)
    (1): ReLU()
    (2): BayesLinear(prior_mu=0, prior_sigma=0.1, in_features=128, out_features=128, bias=True)
  )
)

In [7]:
#Unocmmment for BNN model
net = nn.Sequential(
    bnn.BayesLinear(prior_mu=0, prior_sigma=0.01, in_features=150528, out_features=1505),
    nn.ReLU(),
    bnn.BayesLinear(prior_mu=0, prior_sigma=0.01, in_features=1505, out_features=1505),
)
net
model=net

#Models Training

In [8]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.8 )
#bnn.BKLLoss(reduction='mean', last_layer_only=False)
scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=0.01, max_lr=0.01, base_momentum = 0.8 )

In [9]:
ce_loss = nn.CrossEntropyLoss()
kl_loss = bnn.BKLLoss(reduction='mean', last_layer_only=False)

In [None]:
n_epochs = 10
print_every = 10
valid_loss_min = np.Inf
val_loss = []
val_acc = []
train_loss = []
train_acc = []
pred_total =[]
target_total = []
f1_train_total = []
f1_val_total = []
total_step = len(train_dataloader)
#since = time.time()
for epoch in range(1, n_epochs+1):
    running_loss = 0.0
    correct = 0
    total=0
    print(f'Epoch {epoch}\n')
    for batch_idx, (data_, target_) in enumerate(train_dataloader):

        data_ = data_.view(data_.size(0), -1) #-Convert to be readable by model(Used only in BNN, Linear)
 
        data_, target_ = data_.to(device), target_.to(device)
            
        # Compute prediction and loss
        outputs = net(data_.float())
       
        #ce_loss =criterion(outputs, target_.flatten().type(torch.LongTensor))
        #kl = kl_loss(net)
        #loss = ce_loss+kl
        loss =criterion(outputs, target_.flatten().type(torch.LongTensor))
        
        
        # Backpropagation and param tunning
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()  
        #STATS
        running_loss += loss.item()       
        _,pred = torch.max(outputs.float(), dim=1)
        correct += torch.sum(pred==target_).item()
        total += target_.size(0)
        #lr=config["lr"]
        #print(pred)
        #print(target_)
        f1 = f1_score(target_, pred, average='micro')
        #train_acc.append(100 * correct / total)
        f1_train_total.append(f1)
        #print(f1)
        pred_total.append(pred)
        target_total.append(target_)
        batch_size =int(config["batch_size"])
        if (batch_idx) % 20 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], LR:[{}], batch_size:[{}], Loss: {:.4f}, f1: {:.4f},acc: {:.4f}'
                   .format(epoch, n_epochs, batch_idx, total_step,optimizer.param_groups[0]['lr'],batch_size, loss.item(),f1,(100 * correct/total)))
           # dateTimeObj = datetime.now()
           # print(dateTimeObj)
           #print(optimizer.param_groups[0]['lr'])
           # print(lr_scheduler.get_lr())
    #torch.save(pred_total, 'pred_total.pt')
    #torch.save(target_total, 'target_total.pt')
    scheduler.step()#to test

    train_acc.append(100 * correct / total)
    train_loss.append(running_loss/total_step)

    print(f'\ntrain-loss: {np.mean(train_loss):.4f}, f1: {(f1):.4f}')
    print(f'\correct: {correct:.4f}, total: {total:.4f}, train-acc: {(100 * correct/total):.4f}')
    batch_loss = 0
    total_t=0
    correct_t=0
    with torch.no_grad():
        net.eval() # switch to eval mode
        for data_t, target_t in (val_dataloader):
            data_ = data_.view(data_.size(0), -1) #-Convert to be readable by model(Used only in BNN, Linear)
            
            data_t, target_t = data_t.to(device), target_t.to(device)
            outputs_t = net(data_t.float())
            #loss_t = criterion(outputs_t.float(), target_t.flatten().type(torch.LongTensor))
            #ce_loss_t =criterion(outputs_t.float(), target_t.flatten().type(torch.LongTensor))
            #kl_t = kl_loss(net)
            #loss_t = ce_loss+kl_t
            batch_loss += loss_t.item()
            
            _,pred_t = torch.max(outputs_t.float(), dim=1)
            correct_t += torch.sum(pred_t==target_t).item()
            #print(pred_t)
            #print(target_t)
            total_t += target_t.size(0)
            f1_val = f1_score(target_t, pred_t, average='micro')
            f1_val_total.append(f1_val)
        val_acc.append(100 * correct_t/total_t)
        val_loss.append(batch_loss/len(val_dataloader))
        network_learned = batch_loss < valid_loss_min
        print(f'validation loss: {np.mean(val_loss):.4f}, validation f1: {(f1_val):}\n') #{(100 * correct_t/total_t):.4f}\n')
        print(f'corret: {correct_t:.4f}, total: {total_t:.4f}, validation acc: {(100 * correct_t/total_t):.4f}\n')
        #save results
        f1_val_total.append(f1_val)
        #Pickling results
        #Pickling results
        with open("bnn_train_acc.txt", "wb") as fp:   
                pickle.dump(train_acc, fp)
        with open("bnn_train_loss.txt", "wb") as fp:   
                pickle.dump(train_loss, fp)
        with open("bnn_val_acc.txt", "wb") as fp:   
                pickle.dump(val_acc, fp)
        with open("bnn_val_loss.txt", "wb") as fp:   
                pickle.dump(val_loss, fp)
        if network_learned:
            valid_loss_min = batch_loss
            print('Improvement-Detected, save-model')
            torch.save(net.state_dict(), 'BNN.pt')
        

    net.train()# switch back to train mode

In [11]:
with open("bnn_train_acc.txt", "rb") as fp:   
        train_acc = pickle.load(fp)
with open("bnn_val_acc.txt", "rb") as fp:   
        val_acc = pickle.load(fp) 

In [7]:
test = torch.load('testlist.pt')

In [8]:
test_dataloader = DataLoader(test, int(config["batch_size"]),
                        shuffle=True, num_workers=0)

In [9]:
config = {
    "l1": tune.sample_from(lambda _: 2**np.random.randint(2, 9)),
    "l2": tune.sample_from(lambda _: 2**np.random.randint(2, 9)),
    "lr": np.random.uniform(1e-4),
    "batch_size": np.random.choice([64]),
    "momentum": np.random.uniform( 0.8)
}

In [12]:
#net = ConvNet()
net.load_state_dict(torch.load('BNN.pt'))
net.eval()

CNN(
  (conv_layer): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): AdaptiveAvgPool2d(output_size=(1, 1))
  )
  (fc_layer): Sequential(
    (0): BayesLinear(prior_mu=0, prior_sigma=0.1, in_features=64, out_features=128, bias=True)
    (1): ReLU()
    (2): BayesLinear(prior_mu=0, prior_sigma=0.1, in_features=128, out_features=128, bias=True)
  )
)

In [13]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=config["lr"], momentum=config["momentum"])
scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, config["lr"], max_lr=config["lr"],  base_momentum = config["momentum"])

In [None]:
valid_loss_min = np.Inf
batch_loss = 0
total_t=0
correct_t=0
val_loss = []
val_acc = []
pred_total =[]
target_total = []
torchpred_total = torch.tensor([])
torchtarget_total = torch.tensor([])
with torch.no_grad():
    net.eval()
    for data_t, target_t in (test_dataloader):
        #Convert to be readable by model
        #data_t = data_t.view(data_t.size(0), -1)
            
        data_t, target_t = data_t.to(device), target_t.to(device)
        outputs_t = net(data_t.float())
        #loss_t = criterion(outputs_t.float(), target_t.flatten().type(torch.LongTensor))
        ce_loss_t =criterion(outputs_t, target_t.flatten().type(torch.LongTensor))
        kl_t = kl_loss(net)
        loss_t = ce_loss_t+kl_t
       # loss =criterion(outputs, target_.flatten().type(torch.LongTensor))
        
        batch_loss += loss_t.item()
        _,pred_t = torch.max(outputs_t.float(), dim=1)
        correct_t += torch.sum(pred_t==target_t).item()
        total_t += target_t.size(0)
        print(pred_t)
        print(target_t)
        pred_total.append(pred_t)
        target_total.append(target_t)

    val_acc.append(100 * correct_t/total_t)
    val_loss.append(batch_loss/len(test_dataloader))
    network_learned = batch_loss < valid_loss_min
    print(f'test loss: {np.mean(val_loss):.4f}, test acc: {(100 * correct_t/total_t):.4f}\n')
    print(f'corret: {correct_t:.4f}, total: {total_t:.4f}\n')
torch.save(pred_total, 'BNN_pred_total.pt')
torch.save(target_total, 'BNN_target_total.pt')