# Linear Quantization Mode

In [None]:
import torch 
from my_utils import get_q_scale_symmetric

test_tensor = torch.randn(4,4)
print(test_tensor)

tensor([[-1.1126, -0.5602,  1.2740, -0.9342],
        [-0.8312,  0.9440,  0.4725,  0.5596],
        [ 0.4687,  0.8807,  0.0733, -0.3021],
        [-0.0227, -1.3223, -0.9463,  1.3512]])


In [8]:
get_q_scale_symmetric(test_tensor)

0.010639303312526913

In [18]:
from my_utils import *

quantized_tensor, scale = linear_q_symmetric(test_tensor, dtype=torch.int8)

dequantized_tensor = linear_dequantization(quantized_tensor, scale, 0)

print((dequantized_tensor - test_tensor).square().mean()) 

tensor(8.1444e-06)


# Quantization Different granularities


In [19]:
test_tensor = torch.tensor(
    [[191.6, -13.5, 728.6],
     [92.14, 295.5, -184],
     [0, 684.6, 245.5]]
)

quantized_tensor, scale = linear_q_symmetric(test_tensor)

dequantized_tensor = linear_dequantization(quantized_tensor, scale, 0)

print((dequantized_tensor - test_tensor).square().mean()) 

tensor(2.5092)


# Per channel quantization

In [35]:
test_tensor = torch.tensor(
    [[191.6, -13.5, 728.6],
     [92.14, 295.5, -184],
     [0, 684.6, 245.5]]
)

# dim = 0, if we want to quantize along the rows, 1 along the columns 
dim = 0 
output_dim = test_tensor.shape[dim]
print(output_dim)

scale = torch.zeros(output_dim)
print(scale)

for index in range(output_dim):
    sub_tensor = test_tensor.select(dim, index)
    scale[index] = get_q_scale_symmetric(sub_tensor)
    print(sub_tensor)

print(scale)

3
tensor([0., 0., 0.])
tensor([191.6000, -13.5000, 728.6000])
tensor([  92.1400,  295.5000, -184.0000])
tensor([  0.0000, 684.6000, 245.5000])
tensor([5.7370, 2.3268, 5.3906])


In [36]:
scale_shape = [1] * test_tensor.dim()
print(scale_shape)

scale_shape[dim] = -1
print(scale_shape)

scale = scale.view(scale_shape)
print(scale)

quantized_tensor = linear_q_with_scale_and_zero_point(test_tensor, scale=scale, zero_point=0)

print(quantized_tensor)


[1, 1]
[-1, 1]
tensor([[5.7370],
        [2.3268],
        [5.3906]])
tensor([[ 33,  -2, 127],
        [ 40, 127, -79],
        [  0, 127,  46]], dtype=torch.int8)


# view function

In [26]:
m = torch.tensor( [[1,2,3], [4,5,6], [7,8,9]] )
print(m)


tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])


In [30]:
s = torch.tensor([1,5,10])
print(s)

s.shape

# we can reshape the tensor so the first dimention is of size 1, and the second dimention can contain the rest 

s.view(1,3).shape

s.view(1,-1).shape

tensor([ 1,  5, 10])


torch.Size([1, 3])

In [None]:
scale = torch.tensor([ [1], [5], [10] ])
print(scale.shape)
# now we divide each row by the scale 
m/scale

torch.Size([3, 1])


tensor([[1.0000, 2.0000, 3.0000],
        [0.8000, 1.0000, 1.2000],
        [0.7000, 0.8000, 0.9000]])

In [None]:
scale = torch.tensor([[1,5,10] ])
print(scale.shape)
# now we divide each column by scale 
m/scale

torch.Size([1, 3])


tensor([[1.0000, 0.4000, 0.3000],
        [4.0000, 1.0000, 0.6000],
        [7.0000, 1.6000, 0.9000]])

In [1]:
from my_utils import *

test_tensor = torch.tensor(
    [[191.6, -13.5, 728.6],
     [92.14, 295.5, -184],
     [0, 684.6, 245.5]]
)

quantized_tensor_0, scale_0 = linear_q_symmetric_per_channel(test_tensor, dim = 0 )

quantized_tensor_1, scale_1 = linear_q_symmetric_per_channel(test_tensor, dim = 1 )

In [3]:
dequantized_tensor_0 = linear_dequantization(quantized_tensor_0, scale_0,0)

print(dequantized_tensor_0)
print((dequantized_tensor_0 - test_tensor).square().mean())

tensor([[ 189.3213,  -11.4740,  728.6000],
        [  93.0709,  295.5000, -183.8150],
        [   0.0000,  684.6000,  247.9653]])
tensor(1.8084)


In [4]:
dequantized_tensor_1 = linear_dequantization(quantized_tensor_1, scale_1,0)

print(dequantized_tensor_1)
print((dequantized_tensor_1 - test_tensor).square().mean())

tensor([[ 191.6000,  -16.1717,  728.6000],
        [  92.0284,  296.4803, -183.5842],
        [   0.0000,  684.6000,  246.6913]])
tensor(1.0781)
