In [9]:
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler
#from utee import misc, quant, selector

import torch.nn.functional as F  # useful stateless functions

import torchvision.datasets as dset
import torchvision.transforms as T

import numpy as np


#Load CIFAR-10
NUM_TRAIN = 49000

# The torchvision.transforms package provides tools for preprocessing data
# and for performing data augmentation; here we set up a transform to
# preprocess the data by subtracting the mean RGB value and dividing by the
# standard deviation of each RGB value; we've hardcoded the mean and std.
transform = T.Compose([
                T.ToTensor(),
                T.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
            ])

# We set up a Dataset object for each split (train / val / test); Datasets load
# training examples one at a time, so we wrap each Dataset in a DataLoader which
# iterates through the Dataset and forms minibatches. We divide the CIFAR-10
# training set into train and val sets by passing a Sampler object to the
# DataLoader telling how it should sample from the underlying Dataset.
cifar10_train = dset.CIFAR10('./cs231n/datasets', train=True, download=True,
                             transform=transform)
loader_train = DataLoader(cifar10_train, batch_size=64, 
                          sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN)))

cifar10_val = dset.CIFAR10('./cs231n/datasets', train=True, download=True,
                           transform=transform)
loader_val = DataLoader(cifar10_val, batch_size=64, 
                        sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN, 50000)))

cifar10_test = dset.CIFAR10('./cs231n/datasets', train=False, download=True, 
                            transform=transform)
loader_test = DataLoader(cifar10_test, batch_size=64)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [10]:
USE_GPU = True

dtype = torch.float32 # we will be using float throughout this tutorial

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

# Constant to control how frequently we print train loss
print_every = 100

print('using device:', device)

using device: cuda


In [11]:
def flatten(x):
    N = x.shape[0] # read in N, C, H, W
    return x.view(N, -1)  # "flatten" the C * H * W values into a single vector per image

class Flatten(nn.Module):
    def forward(self, x):
        return flatten(x)

def test_flatten():
    x = torch.arange(12).view(2, 1, 3, 2)
    print('Before flattening: ', x)
    print('After flattening: ', flatten(x))

#test_flatten()

In [12]:
def check_accuracy_part34(loader, model):
    if loader.dataset.train:
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set')   
    num_correct = 0
    num_samples = 0
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))

In [13]:
def train_part34(model, optimizer, epochs=1):
    """
    Train a model on CIFAR-10 using the PyTorch Module API.
    
    Inputs:
    - model: A PyTorch Module giving the model to train.
    - optimizer: An Optimizer object we will use to train the model
    - epochs: (Optional) A Python integer giving the number of epochs to train for
    
    Returns: Nothing, but prints model accuracies during training.
    """
    model = model.to(device=device)  # move the model parameters to CPU/GPU
    t_begin = time.time()
    for e in range(epochs):
        for t, (x, y) in enumerate(loader_train):
            model.train()  # put model to training mode
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)

            scores = model(x)
            loss = F.cross_entropy(scores, y)

            # Zero out all of the gradients for the variables which the optimizer
            # will update.
            optimizer.zero_grad()

            # This is the backwards pass: compute the gradient of the loss with
            # respect to each  parameter of the model.
            loss.backward()

            # Actually update the parameters of the model using the gradients
            # computed by the backwards pass.
            optimizer.step()
            

            if t % print_every == 0:
                t_elapse = time.time() - t_begin
                print('Elapsed %.4f s, Epoch %d,  Iteration %d, loss = %.4f' % (t_elapse, e, t, loss.item()))
                check_accuracy_part34(loader_val, model)
                print()

In [14]:
#torch.save(model.state_dict(), '../pretrain_model/training.pt')

In [20]:
class ExpConvNet(nn.Module):
    def __init__(self):
        super().__init__()
        ########################################################################
        # TODO: Set up the layers you need for a three-layer ConvNet with the  #
        # architecture defined above.                                          #
        ########################################################################
        self.conv1 = nn.Conv2d(3, 32, 5, stride=1, padding=2)
        nn.init.kaiming_normal_(self.conv1.weight)
        nn.init.constant_(self.conv1.bias, 0)
        self.conv2 = nn.Conv2d(32, 64, 3, stride=1, padding=1)
        nn.init.kaiming_normal_(self.conv2.weight)
        nn.init.constant_(self.conv2.bias, 0)
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.bn1 = nn.BatchNorm2d(32)
        self.bn2 = nn.BatchNorm2d(64)
        self.fc1 = nn.Linear(64*8*8, 512)
        self.fc2 = nn.Linear(512, 10)
        nn.init.kaiming_normal_(self.fc1.weight)
        nn.init.kaiming_normal_(self.fc2.weight)
        nn.init.constant_(self.fc1.bias, 0)
        nn.init.constant_(self.fc2.bias, 0)
        ########################################################################
        #                          END OF YOUR CODE                            #       
        ########################################################################

    def forward(self, x):
        scores = None
        ########################################################################
        # TODO: Implement the forward function for a 3-layer ConvNet. you      #
        # should use the layers you defined in __init__ and specify the        #
        # connectivity of those layers in forward()                            #
        ########################################################################
        x = self.conv1(x)
        #x = self.bn1(x)
        x = F.relu(x)
        x = self.maxpool(F.relu(x))
        x = self.conv2(x)
        #x = self.bn2(x)
        x = F.relu(x)
        x = self.maxpool(F.relu(x))
        x = flatten(x)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        
        scores = x
        ########################################################################
        #                             END OF YOUR CODE                         #
        ########################################################################
        return scores

In [15]:
from Model.quantization import *

class FixedLayerConvNet(nn.Module):
    def __init__(self, _bits=8):
        super().__init__()
        ########################################################################
        # TODO: Set up the layers you need for a three-layer ConvNet with the  #
        # architecture defined above.                                          #
        ########################################################################
        self.bits = _bits
        self.conv1 = nn.Conv2d(3, 32, 5, stride=1, padding=2)
        #nn.init.kaiming_normal_(self.conv1.weight)
        #nn.init.constant_(self.conv1.bias, 0)
        self.conv2 = nn.Conv2d(32, 64, 3, stride=1, padding=1)
        #nn.init.kaiming_normal_(self.conv2.weight)
        #nn.init.constant_(self.conv2.bias, 0)
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.bn1 = nn.BatchNorm2d(32)
        self.bn2 = nn.BatchNorm2d(64)
        #self.bn2 = nn.BatchNorm2d(512)
        self.fc1 = nn.Linear(64*8*8, 512)
        self.fc2 = nn.Linear(512, 10)
        #nn.init.kaiming_normal_(self.fc.weight)
        self.quant = activation_quantization(self.bits, Quant.linear)
        ########################################################################
        #                          END OF YOUR CODE                            #       
        ########################################################################

    def forward(self, x):
        scores = None
        ########################################################################
        # TODO: Implement the forward function for a 3-layer ConvNet. you      #
        # should use the layers you defined in __init__ and specify the        #
        # connectivity of those layers in forward()                            #
        ########################################################################
        #x = quantization.apply(x, self.bits, Quant.linear)
        x = self.quant(x)
        x = self.conv1(x)
        x = F.relu(x)
        x = self.maxpool(F.relu(x))
        #x = quantization.apply(x, self.bits, Quant.linear)
        x = self.quant(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = self.maxpool(F.relu(x))
        x = flatten(x)
        x = self.quant(x)
        #x = quantization.apply(x, self.bits, Quant.linear)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.quant(x)
        #x = quantization.apply(x, self.bits, Quant.linear)
        x = self.fc2(x)
        
        scores = x
        ########################################################################
        #                             END OF YOUR CODE                         #
        ########################################################################
        return scores


In [17]:
from Model.quantization import *

#TODO: 1. Implement different function
#      2. differentiate FC and CONV
def quantize_weight(model, bits):
    weight = model.state_dict()
    for k, v in weight.items():
        weight[k] = Quant.linear(v, bits)
        #print(weight[k])
    model.load_state_dict(weight)
    return model

#TODO: Add a dictionary for bit width and function.
def train_fixed_weight(model, optimizer, epochs=1, bits=8):
    """
    Train a model on CIFAR-10 using the PyTorch Module API.
    
    Inputs:
    - model: A PyTorch Module giving the model to train.
    - optimizer: An Optimizer object we will use to train the model
    - epochs: (Optional) A Python integer giving the number of epochs to train for
    
    Returns: Nothing, but prints model accuracies during training.
    """
    model = model.to(device=device)  # move the model parameters to CPU/GPU
    t_begin = time.time()
    for e in range(epochs):
        for t, (x, y) in enumerate(loader_train):
            model.train()  # put model to training mode
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)

            scores = model(x)
            loss = F.cross_entropy(scores, y)

            # Zero out all of the gradients for the variables which the optimizer
            # will update.
            optimizer.zero_grad()

            # This is the backwards pass: compute the gradient of the loss with
            # respect to each  parameter of the model.
            loss.backward()

            # Actually update the parameters of the model using the gradients
            # computed by the backwards pass.
            optimizer.step()
            
            #Add quantization for weight
            model = quantize_weight(model, bits)

            if t % print_every == 0:
                t_elapse = time.time() - t_begin
                print('Elapsed %.4f s, Epoch %d,  Iteration %d, loss = %.4f' % (t_elapse, e, t, loss.item()))
                check_accuracy_part34(loader_val, model)
                print()

In [69]:
#Load module to loadmodel
PATH = '../pretrain_model/training.pt'

exp_bits = 4

#define the model
fix_model = FixedLayerConvNet(exp_bits)
direct_fix_model = FixedLayerConvNet(exp_bits)
model = ExpConvNet()


#load model
fix_model.load_state_dict(torch.load(PATH))
fix_model = quantize_weight(fix_model, exp_bits)

direct_fix_model.load_state_dict(torch.load(PATH))
direct_fix_model = quantize_weight(direct_fix_model, exp_bits)

model.load_state_dict(torch.load(PATH))

#Solve the weight type problem, change to cudafloat tensor
if USE_GPU:
    fix_model.cuda()
    fix_model = torch.nn.DataParallel(fix_model, device_ids=range(torch.cuda.device_count()))

if USE_GPU:
    direct_fix_model.cuda()
    direct_fix_model = torch.nn.DataParallel(direct_fix_model, device_ids=range(torch.cuda.device_count()))
    #cudnn.benchmark = True
    
if USE_GPU:
    model.cuda()
    model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count()))


In [70]:
print("\nOriginal Floating Point Accuracy:")
check_accuracy_part34(loader_test, model)
print("Finetune Fixed Point Accuracy:")
check_accuracy_part34(loader_test, fix_model)
print("\nDirect Fixed Point Accuracy:")
check_accuracy_part34(loader_test, direct_fix_model)



Original Floating Point Accuracy:
Checking accuracy on test set
Got 6950 / 10000 correct (69.50)
Finetune Fixed Point Accuracy:
Checking accuracy on test set
Got 5423 / 10000 correct (54.23)

Direct Fixed Point Accuracy:
Checking accuracy on test set
Got 5423 / 10000 correct (54.23)


In [71]:
#Train this model
learning_rate = 1e-3

optimizer = optim.Adam(params=fix_model.parameters(), lr=learning_rate)
#optimizer = optim.SGD(fix_model.parameters(), lr=learning_rate,momentum=0.9, nesterov=True)
train_fixed_weight(fix_model, optimizer, epochs=10)

Elapsed 0.0152 s, Epoch 0,  Iteration 0, loss = 1.2426
Checking accuracy on validation set
Got 533 / 1000 correct (53.30)

Elapsed 1.5043 s, Epoch 0,  Iteration 100, loss = 1.1681
Checking accuracy on validation set
Got 536 / 1000 correct (53.60)

Elapsed 2.9968 s, Epoch 0,  Iteration 200, loss = 1.5309
Checking accuracy on validation set
Got 539 / 1000 correct (53.90)

Elapsed 4.4826 s, Epoch 0,  Iteration 300, loss = 1.0697
Checking accuracy on validation set
Got 538 / 1000 correct (53.80)

Elapsed 5.9684 s, Epoch 0,  Iteration 400, loss = 1.1535
Checking accuracy on validation set
Got 539 / 1000 correct (53.90)

Elapsed 7.4563 s, Epoch 0,  Iteration 500, loss = 1.5188
Checking accuracy on validation set
Got 541 / 1000 correct (54.10)

Elapsed 8.9535 s, Epoch 0,  Iteration 600, loss = 1.6893
Checking accuracy on validation set
Got 545 / 1000 correct (54.50)

Elapsed 10.4472 s, Epoch 0,  Iteration 700, loss = 1.1961
Checking accuracy on validation set
Got 572 / 1000 correct (57.20)

E

Elapsed 94.8886 s, Epoch 8,  Iteration 200, loss = 0.6344
Checking accuracy on validation set
Got 603 / 1000 correct (60.30)

Elapsed 96.4020 s, Epoch 8,  Iteration 300, loss = 0.9142
Checking accuracy on validation set
Got 598 / 1000 correct (59.80)

Elapsed 97.9242 s, Epoch 8,  Iteration 400, loss = 1.0082
Checking accuracy on validation set
Got 602 / 1000 correct (60.20)

Elapsed 99.4206 s, Epoch 8,  Iteration 500, loss = 0.7071
Checking accuracy on validation set
Got 602 / 1000 correct (60.20)

Elapsed 100.9164 s, Epoch 8,  Iteration 600, loss = 0.9210
Checking accuracy on validation set
Got 599 / 1000 correct (59.90)

Elapsed 102.4089 s, Epoch 8,  Iteration 700, loss = 1.0858
Checking accuracy on validation set
Got 595 / 1000 correct (59.50)

Elapsed 103.4445 s, Epoch 9,  Iteration 0, loss = 1.3169
Checking accuracy on validation set
Got 602 / 1000 correct (60.20)

Elapsed 104.9390 s, Epoch 9,  Iteration 100, loss = 1.0781
Checking accuracy on validation set
Got 594 / 1000 correct

In [72]:
print("Finetune Fixed Point Accuracy:")
check_accuracy_part34(loader_test, fix_model)
print("\nDirect Fixed Point Accuracy:")
check_accuracy_part34(loader_test, direct_fix_model)


Finetune Fixed Point Accuracy:
Checking accuracy on test set
Got 5959 / 10000 correct (59.59)

Direct Fixed Point Accuracy:
Checking accuracy on test set
Got 5423 / 10000 correct (54.23)


In [44]:
print(direct_fix_model.module.state_dict()['conv1.weight']*2**(exp_bits))

tensor([[[[  4.,  10.,  -2.,  12.,  10.],
          [  2.,  -4.,  12.,  12.,   6.],
          [  0.,  18.,   0.,  12.,  -8.],
          [-18.,   0.,  -8., -10., -14.],
          [-12., -22.,   0.,  -4.,  -6.]],

         [[ -6., -14., -22.,   4.,  10.],
          [  6.,   2.,  -4.,   6.,  -2.],
          [  8.,  26.,  10.,   8.,   0.],
          [ -4.,  12., -12., -12.,  -6.],
          [  4.,   2.,  16., -24.,  -6.]],

         [[  0.,  24.,  -6.,  24.,  22.],
          [-16., -28.,  12.,  -2.,   0.],
          [ -2.,   4.,   0.,  10.,   8.],
          [ -2.,  -6.,  -4., -10.,  -8.],
          [ -2.,  -6.,  -6.,   0.,  -6.]]],


        [[[ -2.,   2.,   2.,  -2.,  -6.],
          [ -2.,  -6.,  10.,   6.,   2.],
          [ -2., -20.,  -4.,  10.,  46.],
          [  8.,   2.,  -2.,   4.,  14.],
          [ -2.,  14.,  -4., -30.,  -4.]],

         [[  4.,   8.,  -8.,  10., -26.],
          [-10., -12.,   6.,   6.,   8.],
          [  4., -12.,  -2.,  10.,   2.],
          [ 12.,   6.,  