In [1]:
import numpy as np
import torch
import torch.nn as nn
import torchvision
from torch.utils.data import DataLoader
from torchvision import datasets
import torchvision.transforms as transforms
import os
import time
import sys
from torch.quantization import QuantStub, DeQuantStub
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable
import pandas as pd

In [2]:
class QuantizedMLP(nn.Module):
    def __init__(self):
        super(QuantizedMLP, self).__init__()
        self.linear1 = nn.Linear(784,250)
        self.linear2 = nn.Linear(250,100)
        self.linear3 = nn.Linear(100,10)
        self.quant = QuantStub()
        self.dequant = DeQuantStub()

    def forward(self, X):
        X = self.quant(X)
        X = F.relu(self.linear1(X))
        X = F.relu(self.linear2(X))
        X = self.linear3(X)
        X = self.dequant(X)
        return X

In [3]:
train_csv = pd.read_csv("fashion-mnist_train.csv")
test_csv = pd.read_csv("fashion-mnist_test.csv")

device = torch.device("cpu")

Y_train = train_csv['label'].values
X_train = train_csv.drop(['label'], axis=1).values

Y_test = test_csv['label'].values
X_test = test_csv.drop(['label'], axis=1).values

torch_Y_train = torch.from_numpy(Y_train).type(torch.LongTensor)
torch_X_train = torch.from_numpy(X_train).type(torch.LongTensor)
torch_Y_test = torch.from_numpy(Y_test).type(torch.LongTensor)
torch_X_test = torch.from_numpy(X_test).type(torch.LongTensor)

train = torch.utils.data.TensorDataset(torch_X_train, torch_Y_train)
test = torch.utils.data.TensorDataset(torch_X_test, torch_Y_test)

BATCH_SIZE = 32

train_loader = torch.utils.data.DataLoader(train, batch_size = BATCH_SIZE)
test_loader = torch.utils.data.DataLoader(test, batch_size = BATCH_SIZE)

In [4]:
def fit(model, train_loader, epoch_number=5):
    optimizer = torch.optim.Adam(model.parameters())
    error = nn.CrossEntropyLoss()
    model.train()

    for epoch in range(epoch_number):
        correct = 0

        for batch_idx, (X_batch, Y_batch) in enumerate(train_loader):
            var_X_batch = Variable(X_batch).float()
            var_Y_batch = Variable(Y_batch)

            optimizer.zero_grad()
            output = model(var_X_batch)
            loss = error(output, var_Y_batch)
            loss.backward()
            optimizer.step()

            predicted = torch.max(output.data, 1)[1]
            correct += (predicted == var_Y_batch).sum()
            if batch_idx % 200 == 0:
                print('Epoch : {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tAccuracy: {:.6f}'.format(
                 epoch + 1, batch_idx*len(X_batch), len(train_loader.dataset), 100.*batch_idx / len(train_loader), loss.data, float(correct*100) / float(BATCH_SIZE*(batch_idx+1))))

In [5]:
def evaluate(model, loader=test_loader):
    correct = 0 
    for test_imgs, test_labels in loader:
        test_imgs = Variable(test_imgs).float()
        
        output = model(test_imgs)
        predicted = torch.max(output,1)[1]
        correct += (predicted == test_labels).sum()
    print("Test accuracy:{:.3f} ".format( float(correct) / (len(loader)*BATCH_SIZE)))

In [6]:
model = QuantizedMLP()
SEED = 9856
torch.manual_seed(SEED)
fit(model, train_loader)



In [7]:
def calc_size(model):
    torch.save(model.state_dict(), "model.p")
    size=os.path.getsize("model.p")
    os.remove('model.p')
    return "{:.3f} KB".format(size / 1024)

In [8]:
calc_size(model)

'870.585 KB'

In [9]:
from contextlib import contextmanager

@contextmanager
def single_thread():  
    num = torch.get_num_threads()
    torch.set_num_threads(1)
    yield
    torch.set_num_threads(num)

In [13]:
%%timeit -r10

with single_thread():
    evaluate(model, train_loader)

Test accuracy:0.865 
Test accuracy:0.865 
Test accuracy:0.865 
Test accuracy:0.865 
Test accuracy:0.865 
Test accuracy:0.865 
Test accuracy:0.865 
Test accuracy:0.865 
Test accuracy:0.865 
Test accuracy:0.865 
Test accuracy:0.865 
1.61 s ± 135 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


# Dinamic

In [10]:
model_int8_dinamic = torch.quantization.quantize_dynamic(
    model,  # the original model
    {torch.nn.Linear},  # a set of layers to dynamically quantize
    dtype=torch.qint8)

In [11]:
calc_size(model_int8_dinamic)

'222.126 KB'

In [18]:
%%timeit -r10

with single_thread():
    evaluate(model_int8_dinamic, train_loader)

Test accuracy:0.867 
Test accuracy:0.867 
Test accuracy:0.867 
Test accuracy:0.867 
Test accuracy:0.867 
Test accuracy:0.867 
Test accuracy:0.867 
Test accuracy:0.867 
Test accuracy:0.867 
Test accuracy:0.867 
Test accuracy:0.867 
1.23 s ± 87.7 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


# Static

In [14]:
model.qconfig = torch.quantization.get_default_qconfig('fbgemm')
model_prepared = torch.quantization.prepare(model)



In [15]:
model_int8_static = torch.quantization.convert(model_prepared)



In [16]:
calc_size(model_int8_static)

'229.540 KB'

In [17]:
%%timeit -r10

with single_thread():
    evaluate(model_int8_static, train_loader)

Test accuracy:0.816 
Test accuracy:0.816 
Test accuracy:0.816 
Test accuracy:0.816 
Test accuracy:0.816 
Test accuracy:0.816 
Test accuracy:0.816 
Test accuracy:0.816 
Test accuracy:0.816 
Test accuracy:0.816 
Test accuracy:0.816 
1.22 s ± 91.2 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
