In [1]:
import sys
sys.path.append("./utils/")
import torch
from torchvision import datasets
from torchvision import transforms
import numpy as np
from torch.utils.tensorboard import SummaryWriter
import torch.nn.utils.prune as prune
import utils.util as util

### Hyperparameters

In [2]:
batch_size = 100
kernel_size = 3
lr = 0.01
epochs = 100
momentum = 0.9

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### Load Dataset

In [3]:
training_transoform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
test_transoform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

training_data = datasets.CIFAR10("./data", train=True, transform=training_transoform, download=True)
test_data = datasets.CIFAR10("./data", train=False, transform=test_transoform, download=True)

training_loader = torch.utils.data.DataLoader(dataset=training_data, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_data, batch_size=batch_size, shuffle=False)

# Images have same size
input_size = training_data[0][0].shape
output_size = 10


# training_loader = iter(training_loader)
# images, labels = training_loader.next()

# plt.figure(figsize=(2,2))
# plt.imshow(np.transpose(images[1], (1,2,0)))
# plt.show()


Files already downloaded and verified
Files already downloaded and verified


### Network

In [4]:
class Cifar10Network(torch.nn.Module):

    def __init__(self, kernel_size, output_size, device):

        super(Cifar10Network, self).__init__()

        self.l1 = torch.nn.Conv2d(3, 64, kernel_size, bias=False, device=device)
        self.b1 = torch.nn.BatchNorm2d(64, device=device)
        self.r1 = torch.nn.ReLU()

        self.l2 = torch.nn.Conv2d(64, 64, kernel_size, bias=False, device=device)
        self.b2 = torch.nn.BatchNorm2d(64, device=device)
        self.r2 = torch.nn.ReLU()
        self.m2 = torch.nn.MaxPool2d(2,2)

        self.l3 = torch.nn.Conv2d(64, 128, kernel_size, bias=False, device=device)
        self.b3 = torch.nn.BatchNorm2d(128, device=device)
        self.r3 = torch.nn.ReLU()

        self.l4 = torch.nn.Conv2d(128, 128, kernel_size, bias=False, device=device)
        self.b4 = torch.nn.BatchNorm2d(128, device=device)
        self.r4 = torch.nn.ReLU()
        self.m4 = torch.nn.MaxPool2d(2,2)

        self.l5 = torch.nn.Conv2d(128, 256, kernel_size, bias=False, device=device)
        self.b5 = torch.nn.BatchNorm2d(256, device=device)
        self.r5 = torch.nn.ReLU()

        self.l6 = torch.nn.Conv2d(256, 256, kernel_size, bias=False, device=device)
        self.b6 = torch.nn.BatchNorm2d(256, device=device)
        self.r6 = torch.nn.ReLU()

        self.f7 = torch.nn.Flatten()
        self.l7 = torch.nn.Linear(256, 512, bias=False, device=device)
        self.b7 = torch.nn.BatchNorm1d(512, device=device)

        self.l8 = torch.nn.Linear(512, 512, bias=False, device=device)
        self.b8 = torch.nn.BatchNorm1d(512, device=device)

        self.l9 = torch.nn.Linear(512, 10, bias=False, device=device)
        self.b9 = torch.nn.BatchNorm1d(10, device=device)
        
        self.softMax = torch.nn.Softmax(dim=0)
        self.relu = torch.nn.Sigmoid()


    def forward(self, x):


        out = self.l1(x)
        out = self.b1(out)
        out = self.r1(out)

        out = self.l2(out) 
        out = self.b2(out) 
        out = self.r2(out)
        out = self.m2(out) 

        out = self.l3(out)
        out = self.b3(out) 
        out = self.r3(out)
        
        out = self.l4(out) 
        out = self.b4(out) 
        out = self.r4(out)
        out = self.m4(out) 

        out = self.l5(out) 
        out = self.b5(out)
        out = self.r5(out) 

        out = self.l6(out) 
        out = self.b6(out) 
        out = self.r6(out)

        out = self.f7(out)
        out = self.l7(out)
        out = self.b7(out)

        out = self.l8(out)
        out = self.b8(out) 

        out = self.l9(out) 
        out = self.b9(out) 

        return out

model = Cifar10Network(kernel_size, output_size, device)  
loss_function = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 200)

### Training

In [5]:
# # writer = SummaryWriter("./test_grafici/Cifar10Network")
# save_path = "./models/Cifar10Network_model"

# maxAcc = 0

# for epoch in range(epochs):

#     loss = None

#     for i, (images, labels) in enumerate(training_loader):
        
#         step = (i+1) + epoch * len(training_loader)

#         # Forward phase
#         images = images.to(device)
#         labels = labels.to(device)
#         predictions = model(images)

#         labels_pred = torch.zeros(len(labels), output_size, device=device)
#         for j in range(len(labels)):
#             labels_pred[j][labels[j]] = 1

#         loss = loss_function(predictions, labels_pred)

#         # Backpropagation
#         optimizer.zero_grad()
#         loss.backward()
#         optimizer.step()
        
#         if i % 100 == 0:
#             print(f"epoch = {epoch+1}/{epochs}, step = {i}/{len(training_loader)}, loss = {loss}")


#     # Evaluate accuracy on test dat 
#     accVal = util.getAccuracy(model, test_loader, device=device, dim=2)
#     print(f"accVal = {accVal}%")

#     if accVal > maxAcc:
#         maxAcc = accVal
#         torch.save(model.state_dict(), save_path)

#     # Update learning rate
#     #for g in optimizer.param_groups:
#     #    g['lr'] = lr * 0.95
    
#     scheduler.step()

#     # Write result on tensorboard
#     # writer.add_scalar("Training loss", loss, epoch)
#     # writer.add_scalar("Validation accuracy", accVal, epoch)  

### Load Model

In [6]:
from utils.util import load_model
from cifar10Model import Cifar10Network
model = Cifar10Network(kernel_size, 10, device)
load_model(model, "./models/Cifar10Network_model")
# acc = model.getAccuracy(test_loader)
# print(acc)

### Binarizzazione del modello

In [7]:
import customeLayer as cl
import binarize as bin

class Cifar10NetworkBin(torch.nn.Module):

    def __init__(self, model: Cifar10Network = None, device="cpu"):

        super(Cifar10NetworkBin, self).__init__()

        self.device = device
        self.weights = []
        self.bnWeights = []
        self.bnBias = []

        # Parametri del modello da binarizzare
        if model is not None:        
            g = list(model.parameters())
            for i in range(int(len(g) / 3)):
                self.weights.append(g[3*i])
                self.bnWeights.append(g[3*i + 1])
                self.bnBias.append(g[3*i + 2])

        else:

            self.weights.append(torch.empty(64,3,3,3, device=self.device))
            self.weights.append(torch.empty(64,64,3,3, device=self.device))
            self.weights.append(torch.empty(128,64,3,3, device=self.device))
            self.weights.append(torch.empty(128,128,3,3, device=self.device))
            self.weights.append(torch.empty(256,128,3,3, device=self.device))
            self.weights.append(torch.empty(256,256,3,3, device=self.device))
            self.weights.append(torch.empty(512,256, device=self.device))
            self.weights.append(torch.empty(512,512, device=self.device))
            self.weights.append(torch.empty(10,512, device=self.device))

            self.bnWeights.append(torch.empty(64, device=self.device))
            self.bnWeights.append(torch.empty(64, device=self.device))
            self.bnWeights.append(torch.empty(128, device=self.device))
            self.bnWeights.append(torch.empty(128, device=self.device))
            self.bnWeights.append(torch.empty(256, device=self.device))
            self.bnWeights.append(torch.empty(256, device=self.device))
            self.bnWeights.append(torch.empty(512, device=self.device))
            self.bnWeights.append(torch.empty(512, device=self.device))
            self.bnWeights.append(torch.empty(10, device=self.device))
            
            self.bnBias.append(torch.empty(64, device=self.device))
            self.bnBias.append(torch.empty(64, device=self.device))
            self.bnBias.append(torch.empty(128, device=self.device))
            self.bnBias.append(torch.empty(128, device=self.device))
            self.bnBias.append(torch.empty(256, device=self.device))
            self.bnBias.append(torch.empty(256, device=self.device))
            self.bnBias.append(torch.empty(512, device=self.device))
            self.bnBias.append(torch.empty(512, device=self.device))
            self.bnBias.append(torch.empty(10, device=self.device))


        # Layers
        # self.bin1 = cl.BinarizeLayer2Level2D(self.weights[0].shape[1], device=self.device)
        self.l1 = cl.BinarizeConv2d(self.weights[0].shape[1], self.weights[0].shape[0], kernel_size=3, device=self.device)
        # self.l1 = cl.ConvBinLayer(self.weights[0], device=device, binfunction=bin.binarize11)
        # self.b1 = cl.BatchNorm2D(self.bnWeights[0], self.bnBias[0], device=self.device)
        self.b1 = torch.nn.BatchNorm2d(self.bnWeights[0].shape[0], device=self.device)
        self.ht1 = torch.nn.Hardtanh(inplace=True)

        # self.bin2 = cl.BinarizeLayer2Level2D(self.weights[1].shape[1], device=self.device)
        self.l2 = cl.BinarizeConv2d(self.weights[1].shape[1], self.weights[1].shape[0], kernel_size=3, device=self.device)
        # self.l2 = cl.ConvBinLayer(self.weights[1], device=device, binfunction=bin.binarize11)
        # self.b2 = cl.BatchNorm2D(self.bnWeights[1], self.bnBias[1], device=self.device)
        self.b2 = torch.nn.BatchNorm2d(self.bnWeights[1].shape[0], device=self.device)
        self.m2 = torch.nn.MaxPool2d(2,2)
        self.ht2 = torch.nn.Hardtanh(inplace=True)

        # self.bin3 = cl.BinarizeLayer2Level2D(self.weights[2].shape[1], device=self.device)
        self.l3 = cl.BinarizeConv2d(self.weights[2].shape[1], self.weights[2].shape[0], kernel_size=3, device=self.device)
        # self.l3 = cl.ConvBinLayer(self.weights[2], device=device, binfunction=bin.binarize11)
        # self.b3 = cl.BatchNorm2D(self.bnWeights[2], self.bnBias[2], device=self.device)
        self.b3 = torch.nn.BatchNorm2d(self.bnWeights[2].shape[0], device=self.device)
        self.ht3 = torch.nn.Hardtanh(inplace=True)

        # self.bin4 = cl.BinarizeLayer2Level2D(self.weights[3].shape[1], device=self.device)
        self.l4 = cl.BinarizeConv2d(self.weights[3].shape[1], self.weights[3].shape[0], kernel_size=3, device=self.device)
        # self.l4 = cl.ConvBinLayer(self.weights[3], device=device, binfunction=bin.binarize11)
        # self.b4 = cl.BatchNorm2D(self.bnWeights[3], self.bnBias[3], device=self.device)
        self.b4 = torch.nn.BatchNorm2d(self.bnWeights[3].shape[0], device=self.device)
        self.m4 = torch.nn.MaxPool2d(2,2)
        self.ht4 = torch.nn.Hardtanh(inplace=True)

        # self.bin5 = cl.BinarizeLayer2Level2D(self.weights[4].shape[1], device=self.device)
        self.l5 = cl.BinarizeConv2d(self.weights[4].shape[1], self.weights[4].shape[0], kernel_size=3, device=self.device)
        # self.l5 = cl.ConvBinLayer(self.weights[4], device=device, binfunction=bin.binarize11)
        # self.b5 = cl.BatchNorm2D(self.bnWeights[4], self.bnBias[4], device=self.device)
        self.b5 = torch.nn.BatchNorm2d(self.bnWeights[4].shape[0], device=self.device)
        self.ht5 = torch.nn.Hardtanh(inplace=True)

        # self.bin6 = cl.BinarizeLayer2Level2D(self.weights[5].shape[1], device=self.device)
        self.l6 = cl.BinarizeConv2d(self.weights[5].shape[1], self.weights[5].shape[0], kernel_size=3, device=self.device)
        # self.l6 = cl.ConvBinLayer(self.weights[5], device=device, binfunction=bin.binarize11)
        # self.b6 = cl.BatchNorm2D(self.bnWeights[5], self.bnBias[5], device=self.device)
        self.b6 = torch.nn.BatchNorm2d(self.bnWeights[5].shape[0], device=self.device)
        self.ht6 = torch.nn.Hardtanh(inplace=True)

        self.f7 = torch.nn.Flatten()
        # self.bin7 = cl.BinarizeLayer2Level(self.weights[6].shape[1], device=self.device)
        self.l7 = cl.BinarizeLinear(self.weights[6].shape[1], self.weights[6].shape[0], device=self.device)
        # self.l7 = cl.LinearBin(self.weights[6], device=device, binFunction=bin.binarize11)
        # self.b7 = cl.BatchNorm1D(512, self.bnWeights[6], self.bnBias[6], device=self.device)
        self.b7 = torch.nn.BatchNorm1d(self.bnWeights[6].shape[0], device=self.device)
        self.ht7 = torch.nn.Hardtanh(inplace=True)

        # self.bin8 = cl.BinarizeLayer2Level(self.weights[7].shape[1], device=self.device)
        self.l8 = cl.BinarizeLinear(self.weights[7].shape[1], self.weights[7].shape[0], device=self.device)
        # self.l8 = cl.LinearBin(self.weights[7], device=device, binFunction=bin.binarize11)
        # self.b8 = cl.BatchNorm1D(512, self.bnWeights[7], self.bnBias[7], device=self.device)
        self.b8 = torch.nn.BatchNorm1d(self.bnWeights[7].shape[0], device=self.device)
        self.ht8 = torch.nn.Hardtanh(inplace=True)

        # self.bin9 = cl.BinarizeLayer2Level(self.weights[8].shape[1], device=self.device)
        self.l9 = cl.BinarizeLinear(self.weights[8].shape[1], self.weights[8].shape[0], device=self.device)
        # self.l9 = cl.LinearBin(self.weights[8], device=device, binFunction=bin.binarize11)
        # self.b9 = cl.BatchNorm1D(10, self.bnWeights[8], self.bnBias[8], device=self.device)
        self.b9 = torch.nn.BatchNorm1d(self.bnWeights[8].shape[0], device=self.device)
        self.ht9 = torch.nn.Hardtanh(inplace=True)

        self.softMax = torch.nn.Softmax(dim=1)
        self.relu = torch.nn.ReLU()
        self.log = torch.nn.LogSoftmax()

    
    def setInference(self, inference: bool):
        return

        self.b1.inference = inference
        self.b2.inference = inference
        self.b3.inference = inference
        self.b4.inference = inference
        self.b5.inference = inference
        self.b6.inference = inference
        self.b7.inference = inference
        self.b8.inference = inference
        self.b9.inference = inference

    def frozeParameter(self, batch_size):
        return 
        
        self.b1.frozeParameters(batch_size)
        self.b2.frozeParameters(batch_size)
        self.b3.frozeParameters(batch_size)
        self.b4.frozeParameters(batch_size)
        self.b5.frozeParameters(batch_size)
        self.b6.frozeParameters(batch_size)
        self.b7.frozeParameters(batch_size)
        self.b8.frozeParameters(batch_size)
        self.b9.frozeParameters(batch_size)
        
        
    def forward(self, x):

        # Binarizzazione dell'ingresso
        # out = self.bin1(x)
        out = self.l1(x)
        out = self.b1(out)
        out = self.ht1(out)

        # out = self.bin2(out)
        out = self.l2(out) 
        out = self.b2(out)
        out = self.m2(out)
        out = self.ht2(out)

        # out = self.bin3(out)
        out = self.l3(out)
        out = self.b3(out)
        out = self.ht3(out)
        
        # out = self.bin4(out)
        out = self.l4(out) 
        out = self.b4(out)
        out = self.m4(out)
        out = self.ht4(out)

        # out = self.bin5(out)
        out = self.l5(out) 
        out = self.b5(out)
        out = self.ht5(out)

        # out = self.bin6(out)
        out = self.l6(out) 
        out = self.b6(out)
        out = self.ht6(out)

        out = self.f7(out)
        # out = self.bin7(out)
        out = self.l7(out)
        out = self.b7(out)
        out = self.ht7(out)

        # out = self.bin8(out)
        out = self.l8(out)
        out = self.b8(out)
        out = self.ht8(out)
    
        # out = self.bin9(out)
        out = self.l9(out)
        out = self.b9(out)
        # out = self.ht9(out)

        return self.log(out)

    def regularization(self):
        r = 0
        for g in self.named_parameters():
            if g[0].find("weights") != -1:
                r += torch.sum(torch.pow(1 - torch.abs(g[1].data),2))
        return r
        

    def trainModel(self, training_loader, test_loader, epochs=100, lr=0.01, writer = None, PATH = None):

        loss_function = torch.nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(self.parameters(), lr=lr)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 200)
    
        maxAcc = 0
        
        for epoch in range(epochs):

            total_loss = 0
            loss = None
            self.setInference(False)

            for i, (images, labels) in enumerate(training_loader):
                
                step = (i+1) + epoch * len(training_loader)

                # Forward phase
                images = images.to(self.device)
                labels = labels.to(self.device)

                predictions = self(images)

                # labels_pred = torch.zeros(len(labels), 10, device=self.device)
                # for j in range(len(labels)):
                #     labels_pred[j][labels[j]] = 1

                loss = loss_function(predictions, labels)
                total_loss += float(loss)

                # Backpropagation
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                
                if i % 100 == 0:
                    print(f"epoch = {epoch+1}/{epochs}, step = {i}/{len(training_loader)}, loss = {loss}")


            else:

                self.frozeParameter(len(images))
                self.setInference(True)
                accVal = util.getAccuracy(self, test_loader, device=self.device, dim=2)
                print(f"accVal = {accVal}%")

            # Save the model if it is the best so far 
            if PATH != None and accVal > maxAcc:
                maxAcc = accVal
                torch.save(self.state_dict(), PATH)

            # Update learning rate
            scheduler.step()

            # Write result on tensorboard
            if writer != None:
                writer.add_scalar("Training loss", total_loss / len(training_loader), epoch)
                writer.add_scalar("Validation accuracy", accVal, epoch) 
            # -----------------------------------------------------


bmodel = Cifar10NetworkBin(model, device=device)

### Training del modello binarizzato 

In [9]:
PATH = "./models/Cifar10Network_binarized_model"
# writer = SummaryWriter("./test_grafici/Cifar10NetworkBinarized")
writer = None
cl.init_model(bmodel)
bmodel.trainModel(training_loader, test_loader, epochs, lr=0.1, writer=writer, PATH=PATH)

  return self.log(out)


epoch = 1/100, step = 0/500, loss = 2.4058034420013428
epoch = 1/100, step = 100/500, loss = 2.3098220825195312
epoch = 1/100, step = 200/500, loss = 2.1831789016723633
epoch = 1/100, step = 300/500, loss = 2.2150771617889404
epoch = 1/100, step = 400/500, loss = 2.2769320011138916
accVal = 16.03%
epoch = 2/100, step = 0/500, loss = 2.236830472946167
epoch = 2/100, step = 100/500, loss = 2.208827018737793
epoch = 2/100, step = 200/500, loss = 2.299254894256592
epoch = 2/100, step = 300/500, loss = 2.3299472332000732
epoch = 2/100, step = 400/500, loss = 2.231877326965332
accVal = 15.18%
epoch = 3/100, step = 0/500, loss = 2.2881555557250977
epoch = 3/100, step = 100/500, loss = 2.197570562362671
epoch = 3/100, step = 200/500, loss = 2.2876925468444824
epoch = 3/100, step = 300/500, loss = 2.150352716445923
epoch = 3/100, step = 400/500, loss = 2.2343738079071045
accVal = 16.12%
epoch = 4/100, step = 0/500, loss = 2.2825205326080322
epoch = 4/100, step = 100/500, loss = 2.27427721023559

KeyboardInterrupt: 