In [4]:
import numpy
import numpy as np


def calcScaleZeroPoint(min_val, max_val, num_bits=8):
    qmin = 0.
    qmax = 2. ** num_bits - 1.
    scale = (max_val - min_val) / (qmax - qmin)

    zero_point = qmax - max_val / scale

    if zero_point < qmin:
        zero_point = qmin
    elif zero_point > qmax:
        zero_point = qmax

    zero_point = int(zero_point)

    return scale, zero_point


def quantize_tensor(x, scale, zero_point, num_bits=8, signed=False):
    if signed:
        qmin = - 2. ** (num_bits - 1)
        qmax = 2. ** (num_bits - 1) - 1
    else:
        qmin = 0.
        qmax = 2. ** num_bits - 1.

    q_x = zero_point + x / scale

    q_x = numpy.clip(q_x, qmin, qmax).round()

    return q_x

def dequantize_tensor(q_x, scale, zero_point):
    return scale * (q_x - zero_point)

In [5]:
import random
import numpy

a = numpy.random.normal(loc=0.0, scale=1.0, size=100000)


In [24]:
k = 1
max_val = a.max()
min_val = a.min()

scale, zero_point = calcScaleZeroPoint(min_val, max_val, k)

scale = 1.5958

print(scale, zero_point)
loss = 0
for i in a:
    q_i = quantize_tensor(i, scale, zero_point, k)
    f_i = dequantize_tensor(q_i, scale, zero_point)
    # print(q_i, i)
    loss += (numpy.abs(f_i - i) ** 2)
loss = loss ** 0.5

print(loss)

1.5958 0
247.2403774020081


In [28]:
import numpy as np

scales = [1.5958, 0.9957, 0.586, 0.3352, 0.1881, 0.1041, 0.0569, 0.0308]

test = np.random.normal(loc=0.22, scale=0.555, size=100000)

def uL2Q(w_f, k):
    if k > 8:
        scale = w_f.max() - w_f.min()
    else:
        scale = scales[k-1]

    a = scale * (np.std(w_f, ddof=1) ** 0.5)
    b = np.mean(w_f)

    fai = (w_f - b) / a - 0.5

    w_q_ = np.clip(fai, -(2**(k-1)), 2**(k-1) - 1).round()

    w_q = a*w_q_ + b

    return w_q_, w_q

w_q_, w_q = uL2Q(test, 2)

# print(w_q_)
# print(test)
# print(w_q)

loss = 0
for i in range(100000):
    loss += (np.abs(w_q[i] - test[i]) ** 2)
loss = loss ** 0.5

print(loss)

135.77853363125806


In [69]:
import torch
from torch import nn

conv2 = nn.Conv2d(3, 64, 3)

def ul2q(w_tensor, num_bits=8):

    if num_bits > 8:
        scale = w_tensor.max() - w_tensor.min()
    else:
        scale = scales[num_bits-1]

    a = scale * (torch.std(w_tensor) ** 0.5)
    b = torch.mean(w_tensor)

    fai = (w_tensor - b) / a - 0.5

    w_q_ = torch.clamp_(fai, -(2**(num_bits-1)), 2**(num_bits-1) - 1).round()

    w_q = a*w_q_ + b

    return w_q_, w_q



In [70]:
w_q_, w_q = ul2q(conv2.weight.data)

conv2.weight.data = w_q_

test_data = torch.randn(64, 3, 32, 32)

test_data_q = ul2q(test_data)

# print(conv2(test_data_q))