# Resnet33 for CIFAR-10 using Brevitas

In [2]:
#Install once
!pip install git+https://github.com/Xilinx/brevitas.git

Collecting git+https://github.com/Xilinx/brevitas.git
  Cloning https://github.com/Xilinx/brevitas.git to /tmp/pip-req-build-fxz894v6
  Running command git clone -q https://github.com/Xilinx/brevitas.git /tmp/pip-req-build-fxz894v6
Collecting docrep (from Brevitas==0.2.0a0)
  Downloading https://files.pythonhosted.org/packages/dd/4a/ac09d6e07713e22baa4ab4e6f422d25e53425f3dc042616387dfbc272504/docrep-0.2.7.tar.gz
Building wheels for collected packages: Brevitas, docrep
  Building wheel for Brevitas (setup.py) ... [?25ldone
[?25h  Created wheel for Brevitas: filename=Brevitas-0.2.0a0-cp37-cp37m-linux_x86_64.whl size=1726385 sha256=e283c3b84aa91759ae83b0b0b0ada5ac1e479ffdf3da59501e5a7fa272c34107
  Stored in directory: /tmp/pip-ephem-wheel-cache-ifhos0np/wheels/7b/ba/1b/b3bebdeb51db39fc118c4d60ef8556d8a9ab0f1bfda8767a3d
  Building wheel for docrep (setup.py) ... [?25ldone
[?25h  Created wheel for docrep: filename=docrep-0.2.7-cp37-none-any.whl size=23002 sha256=6ee068467a08371b0fb0eaec

### Import Statements

In [3]:
#Resnet33 for images CIFAR-10 3*32*32 = 3*1024
#Resnet33 for images MNIST 1*28*28
#import statements
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F

import brevitas.nn as qnn
from brevitas.core.quant import QuantType

### Data Preprocessing

In [4]:
#Get Data and preprocess
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

#trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
#                                        download=True, transform=transform)

trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                       download=True, transform=transforms.ToTensor())
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                         shuffle=True, num_workers=2)

testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transforms.ToTensor())
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                        shuffle=False, num_workers=2)

#classes = ('plane', 'car', 'bird', 'cat',
#           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

classes = ('0','1','2','3','4','5','6','7','8','9')

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw
Processing...
Done!


### Model Definition

In [5]:
# Quantized ResNet

class residual_unit(torch.nn.Module):
    def __init__(self, N, C, L, W, training=False):
        super(residual_unit, self).__init__()
        self.ru_conv1 = qnn.QuantConv1d(in_channels=32, out_channels=32, kernel_size=3, padding=1, weight_quant_type=QuantType.INT, weight_bit_width=8)
        self.ru_bn1 = torch.nn.BatchNorm1d(32, affine=training)
        self.ru_act1 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val=6)
        self.ru_conv2 = qnn.QuantConv1d(in_channels=32, out_channels=32, kernel_size=3, padding=1, weight_quant_type=QuantType.INT, weight_bit_width=8)
        self.ru_bn2 = torch.nn.BatchNorm1d(32, affine=training)
        self.ru_act2 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val=6)
        
    def forward(self, x):
        y = self.ru_conv1(x)
        y = self.ru_bn1(y)
        y = self.ru_act1(y)
        y = self.ru_conv2(y)
        y = self.ru_bn2(y)
        y = y + x
        y = self.ru_act2(y)
        return y

class residual_stack(torch.nn.Module):
    def __init__(self, N, C, L, W, training=False):
        super(residual_stack, self).__init__()
        self.rs_conv1 = qnn.QuantConv1d(in_channels=C, out_channels=32, kernel_size=3, padding=1, weight_quant_type=QuantType.INT, weight_bit_width=8)
        self.rs_bn1 = torch.nn.BatchNorm1d(32, affine=training)
        self.rs_ru1 = residual_unit(N, C, L, W, training) #Create an object of the custom nn model
        self.rs_ru2 = residual_unit(N, C, L, W, training)
        self.rs_mp1 = torch.nn.MaxPool1d(kernel_size=2, stride=2)
        
    def forward(self, x):
        y = self.rs_conv1(x)
        y = self.rs_bn1(y)
        y = self.rs_ru1(y)
        y = self.rs_ru2(y)
        y = self.rs_mp1(y)
        return y

class resnet33_quant(torch.nn.Module):
    def __init__(self, N, C, L, W, training=False):
        super(resnet33_quant, self).__init__()
        self.rn33_rs1 = residual_stack(N, 1, 1024,1, training) #output is N*32*512
        self.rn33_rs2 = residual_stack(N, 32, 512,1, training) #output is N*32*256
        self.rn33_rs3 = residual_stack(N, 32, 256,1, training) #output is N*32*128
        self.rn33_rs4 = residual_stack(N, 32, 128,1, training) #output is N*32*64
        self.rn33_rs5 = residual_stack(N, 32, 64,1, training) #output is N*32*32
        self.rn33_rs6 = residual_stack(N, 32, 32,1, training) #output is N*32*16
        self.flat = torch.nn.Flatten() #output is N*512
        self.fc1 = qnn.QuantLinear(512, 128, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=8) #output is N*128
        self.selu1 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val=6)
        #self.alphadrop1 = torch.nn.AlphaDropout(p=0.95)
        self.fc2 = qnn.QuantLinear(128, 128, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=8) #output is N*128
        self.selu2 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val=6)
        #self.alphadrop2 = torch.nn.AlphaDropout(p=0.95)
        self.fc3 = qnn.QuantLinear(128, 10, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=8) #output is N*24
        #self.smx1 = torch.nn.Softmax(dim=1)#dimension
        
    def forward(self, x):
        #print('input' + str(x.size()))
        y = self.rn33_rs1(x)
        #print(y.size())
        y = self.rn33_rs2(y)
        #print(y.size())
        y = self.rn33_rs3(y)
        #print(y.size())
        y = self.rn33_rs4(y)
        #print(y.size())
        y = self.rn33_rs5(y)
        #print(y.size())
        y = self.rn33_rs6(y)
        #print(y.size())
        #85272 parameters
        y = self.flat(y)
        #print(y.size())
        y = self.fc1(y)
        y = self.selu1(y)
        #y = self.alphadrop1(y)
        #print(y.size())
        y = self.fc2(y)
        y = self.selu2(y)
        #y = self.alphadrop2(y)
        #print(y.size())
        y = self.fc3(y)
        #y = self.smx1(y)
        #print(y.size())
        return y

### Define Optimizer, Loss Function

In [6]:
#trainset
#testset
N, C, L, W, modulation_classes = 4,1,1024,1,10
#x = torch.randn(N,C,L)
#y = torch.randn(N, modulation_classes)
training = True
learning_rate = 1e-3

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

#Instantiate Model
model = resnet33_quant(N,C,L,W)
model.to(device)

#Print Model for reference
#print(model)

train_data = iter(trainloader) #train_data is a list of 12500 lists. Each list is [input tensor, label tensor]. Input tensor is of size(4,3,32,32), output tensor is of size(4)
test_data = iter(testloader)

#print(train_data.next()[1].size())

#Print number of parameters
print(sum([param.nelement() for param in model.parameters()]))
    
#for name, param in model.named_parameters():
    #if param.requires_grad:
    #print('{:s}\t{:s}\t{:s}'.format(name.ljust(40), str(param.size()).ljust(30), str(param.nelement()).rjust(10)))

#Define Loss function
#criterion = torch.nn.MSELoss(reduction='sum')
criterion = torch.nn.CrossEntropyLoss()

#Define optimizer
#SGD
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=5e-5)
#Adam's algo
#optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=opt.wd) #What is opt? looks like torch.opt

cpu


  "Consider removing it.".format(name))
  "Consider removing it.".format(name))
  "Consider removing it.".format(name))


173636


### Train Network

In [7]:
import time
tic = time.process_time()
for epoch in range(0,2): #training steps
    running_loss = 0.0
    for i,data in enumerate(trainloader,0): #Loop through data
        inputs, labels = data[0].to(device), data[1].to(device) #Batch size of 4
        inputs_new = torch.zeros([4,1,32,32], dtype=torch.float32, device=device)
        inputs_new[:,:,2:30,2:30] = inputs
        inputs_new = inputs_new.reshape(4,1,1024)

        outputs = model(inputs_new) #Get output
        
        loss = criterion(outputs, labels)#Get loss
        
        #optimizer.zero_grad()
        
        loss.backward() #Back propagation
        
        #optimizer.step()
        
        #print(loss.item())
        #print('Hello World before')
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
            
        #print('Hello World mid')
        with torch.no_grad():
            for param in model.parameters():
                param -= learning_rate*param.grad
            
        
        #print('Hello World after')
        #print(learning_rate)
        model.zero_grad()
        
        
toc = time.process_time()           
print(toc-tic)
print('Finished Training')


#Save Network
#PATH = './cifar_net.pth'
#torch.save(net.state_dict(), PATH) 
        

Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/nikulshr/anaconda3/lib/python3.7/multiprocessing/queues.py", line 242, in _feed
    send_bytes(obj)
  File "/home/nikulshr/anaconda3/lib/python3.7/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/home/nikulshr/anaconda3/lib/python3.7/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/home/nikulshr/anaconda3/lib/python3.7/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
  File "/home/nikulshr/anaconda3/lib/python3.7/multiprocessing/queues.py", line 242, in _feed
    send_bytes(obj)
  File "/home/nikulshr/anaconda3/lib/python3.7/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/home/nikulshr/anaconda3/lib/python3.7/multiprocessing/connection.py", line 404, in _send_by

KeyboardInterrupt: 

### Class wise accuracy

In [None]:
#Save Network
PATH = './resnet33_new_relu.pth'
torch.save(model.state_dict(), PATH) 

model1 = resnet33_quant(N,C,L,W)
model1.to(device)
model1.load_state_dict(torch.load(PATH))

class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        images_new = torch.zeros([4,1,32,32], dtype=torch.float32, device=device)
        images_new[:,:,2:30,2:30] = images
        images_new = images_new.reshape(4,1,1024) 
        outputs = model1(images_new)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1


for i in range(10):
    print('Accuracy of %5s : %f %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

### Total accuracy across all classes

In [None]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        images_new = torch.zeros([4,1,32,32], dtype=torch.float32, device=device)
        images_new[:,:,2:30,2:30] = images
        images_new = images_new.reshape(4,1,1024) 
        outputs = model1(images_new)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %f %%' % (
    100 * correct / total))