In [1]:
import numpy as np

import torch
import torch.nn as nn

import tntorch as tnt

In [2]:
def tt_dot(in_modes, out_modes, ranks, inputs, weight, bias=None) :
    "according to TGaripov repo"
    res = inputs
    res = res.view(-1, int(np.prod(in_modes)))
    res = res.transpose(1, 0)
    res = res.contiguous()
    dim = len(in_modes)
    for ii in range(dim) :
        res = res.view(ranks[ii] * in_modes[ii], -1)
        res = torch.matmul(weight[ii], res)
        res = res.view(out_modes[ii], -1)
        res = res.transpose(1, 0)
        res = res.contiguous()
    res = res.view(-1, int(np.prod(out_modes)))

    if bias is not None :
        res += bias
    return res

In [3]:
class TTLayer(nn.Module):
    def __init__(self, in_modes, out_modes, ranks, bias=True):
        super().__init__()
        self.in_modes = in_modes
        self.out_modes = out_modes
        self.ranks = ranks
        dim = len(self.in_modes)

        assert len(self.in_modes) == len(self.out_modes) == len(self.ranks)-1
        
        self.weight = self._create_tt_cores(self.in_modes, self.out_modes, self.ranks)

        if bias:
            self.bias = nn.Parameter(torch.Tensor(np.prod(out_modes)))
        else:
            self.register_parameter('bias', None)
            
        self.reset_parameters()

    def reset_normal(self) :
        normal_z = ((((0.05**2)/np.prod(self.ranks)))**(1/(len(self.ranks)-1))) ** 0.5 
        for i in range(len(self.weight)) :
            nn.init.normal_(self.weight[i], 0, normal_z)

    def reset_parameters(self) :
        self.reset_normal()
        if self.bias is not None:
            self.bias.data.zero_()

    def forward(self, input):
        return tt_dot(self.in_modes, self.out_modes, self.ranks, input, self.weight, self.bias)

    def _create_tt_cores(self, in_modes, out_modes, ranks):
        """
        in_modes: shape of initial tensor
        out_modes: shape of out tensor 
        Total tensor shape is element_wise_product(in_modes,out_modes)
        ranks: desirable ranks of tt
        return: weights
        """
        dim = len(in_modes)
        _tt_cores_list = []

        for i in range(dim) :
            _tt_cores_list.append(nn.Parameter(torch.Tensor(out_modes[i] * ranks[i+1], in_modes[i] * ranks[i])))

        weight = nn.ParameterList(_tt_cores_list)
        return weight 

In [4]:
# Hyper-parameters 
input_size = 784
hidden_size = 256
num_classes = 10
num_epochs = 5
batch_size = 100
learning_rate = 0.001

# in_modes = [2,14,14,2]
# out_modes = [2,8,8,2]
ranks = [1,2,4,2,1]
in_modes = [7,4,7,4]
# out_modes = [5,5,5,5]
out_modes = [2,8,8,2]
tt_ranks = [1,2,4,2,1]

In [5]:
import torchvision
import torchvision.transforms as transforms


# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


# MNIST dataset 
train_dataset = torchvision.datasets.MNIST(root='../../data', 
                                           train=True, 
                                           transform=transforms.ToTensor(),  
                                           download=False)

test_dataset = torchvision.datasets.MNIST(root='../../data', 
                                          train=False, 
                                          transform=transforms.ToTensor())

# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=False)


In [6]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes, compress=False, in_modes=None, out_modes=None, ranks=None):
        super(NeuralNet, self).__init__()
        if compress:
            self.fc1 = TTLayer(in_modes, out_modes, ranks)
        else:
            self.fc1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)  
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out


In [7]:
def num_model_params(model):
    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])
    return params

In [8]:
model_tt = NeuralNet(input_size, hidden_size, num_classes, True, in_modes, out_modes, tt_ranks).to(device)

In [9]:
num_model_params(NeuralNet(input_size, hidden_size, num_classes).to(device))/num_model_params(model_tt)

56.94739787353106

In [10]:
def train_nn(model, train_loader, criterion, optimizer):
    total_step = len(train_loader)
    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(train_loader):  
            # Move tensors to the configured device
            images = images.reshape(-1, 28*28).to(device)
            labels = labels.to(device)
            # Forward pass
            outputs = model(images.double())
            loss = criterion(outputs, labels)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if (i+1) % 100 == 0:
                print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                       .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
    return model

In [11]:
def test_model(model, test_loader):
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images = images.reshape(-1, 28*28).to(device)
            labels = labels.to(device)
            outputs = model(images.double())
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))
    return correct/total

In [12]:
model_tt = NeuralNet(input_size, hidden_size, num_classes, True, in_modes, out_modes, tt_ranks).to(device)
criterion_tt = nn.CrossEntropyLoss()
optimizer_tt = torch.optim.Adam(model_tt.parameters(), lr=learning_rate)  

model_tt = train_nn(model_tt, train_loader,criterion_tt, optimizer_tt)

Epoch [1/5], Step [100/600], Loss: 1.0074
Epoch [1/5], Step [200/600], Loss: 0.7649
Epoch [1/5], Step [300/600], Loss: 0.3654
Epoch [1/5], Step [400/600], Loss: 0.5241
Epoch [1/5], Step [500/600], Loss: 0.4166
Epoch [1/5], Step [600/600], Loss: 0.4175
Epoch [2/5], Step [100/600], Loss: 0.3205
Epoch [2/5], Step [200/600], Loss: 0.2854
Epoch [2/5], Step [300/600], Loss: 0.3477
Epoch [2/5], Step [400/600], Loss: 0.3505
Epoch [2/5], Step [500/600], Loss: 0.2539
Epoch [2/5], Step [600/600], Loss: 0.1879
Epoch [3/5], Step [100/600], Loss: 0.2152
Epoch [3/5], Step [200/600], Loss: 0.2725
Epoch [3/5], Step [300/600], Loss: 0.3695
Epoch [3/5], Step [400/600], Loss: 0.1758
Epoch [3/5], Step [500/600], Loss: 0.1589
Epoch [3/5], Step [600/600], Loss: 0.0707
Epoch [4/5], Step [100/600], Loss: 0.0773
Epoch [4/5], Step [200/600], Loss: 0.1279
Epoch [4/5], Step [300/600], Loss: 0.2271
Epoch [4/5], Step [400/600], Loss: 0.1459
Epoch [4/5], Step [500/600], Loss: 0.0765
Epoch [4/5], Step [600/600], Loss:

In [13]:
model = NeuralNet(input_size, hidden_size, num_classes, compress=False).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  

model = train_nn(model, train_loader, criterion, optimizer)

Epoch [1/5], Step [100/600], Loss: 0.3275
Epoch [1/5], Step [200/600], Loss: 0.1984
Epoch [1/5], Step [300/600], Loss: 0.2375
Epoch [1/5], Step [400/600], Loss: 0.2280
Epoch [1/5], Step [500/600], Loss: 0.1115
Epoch [1/5], Step [600/600], Loss: 0.1821
Epoch [2/5], Step [100/600], Loss: 0.2358
Epoch [2/5], Step [200/600], Loss: 0.1317
Epoch [2/5], Step [300/600], Loss: 0.0862
Epoch [2/5], Step [400/600], Loss: 0.1457
Epoch [2/5], Step [500/600], Loss: 0.1399
Epoch [2/5], Step [600/600], Loss: 0.0963
Epoch [3/5], Step [100/600], Loss: 0.0584
Epoch [3/5], Step [200/600], Loss: 0.2307
Epoch [3/5], Step [300/600], Loss: 0.1082
Epoch [3/5], Step [400/600], Loss: 0.1430
Epoch [3/5], Step [500/600], Loss: 0.0660
Epoch [3/5], Step [600/600], Loss: 0.1182
Epoch [4/5], Step [100/600], Loss: 0.0328
Epoch [4/5], Step [200/600], Loss: 0.1310
Epoch [4/5], Step [300/600], Loss: 0.0527
Epoch [4/5], Step [400/600], Loss: 0.0745
Epoch [4/5], Step [500/600], Loss: 0.0612
Epoch [4/5], Step [600/600], Loss:

In [18]:
test_model(model_tt, test_loader)
test_model(model, test_loader)
print("Compression rate: {}".format(num_model_params(model)/num_model_params(model_tt)))

Accuracy of the network on the 10000 test images: 96.43 %
Accuracy of the network on the 10000 test images: 97.49 %
Compression rate: 56.94739787353106


## Fine-tuning

In [19]:
model_tt.fc1.weight

ParameterList(
    (0): Parameter containing: [torch.DoubleTensor of size 4x7]
    (1): Parameter containing: [torch.DoubleTensor of size 32x8]
    (2): Parameter containing: [torch.DoubleTensor of size 16x28]
    (3): Parameter containing: [torch.DoubleTensor of size 2x8]
)

In [21]:
model.fc1.weight.size()

torch.Size([256, 784])

In [43]:
w1_original = model.fc1.weight.data

w1_reshaped = w1_original.view(list(np.array(in_modes)*np.array(out_modes)))

In [50]:
tt_w1 = tnt.Tensor(w1_reshaped, ranks_tt=tt_ranks[1:-1])

In [51]:
def metrics(t, full):
    print(t)
    print('Compression ratio: {}/{} = {:g}'.format(full.numel(), t.numel(), full.numel() / t.numel()))
    print('Relative error:', tnt.relative_error(full, t))
    print('RMSE:', tnt.rmse(full, t))
    print('R^2:', tnt.r_squared(full, t))

In [52]:
metrics(tt_w1, w1_reshaped)

4D TT tensor:

 14  32  56   8
  |   |   |   |
 (0) (1) (2) (3)
 / \ / \ / \ / \
1   2   4   2   1

Compression ratio: 200704/748 = 268.321
Relative error: tensor(0.9790)
RMSE: tensor(0.0712)
R^2: tensor(0.0416)


In [53]:
w1_cores = tt_w1.cores

In [54]:
model_tt = NeuralNet(input_size, hidden_size, num_classes, True, in_modes, out_modes, tt_w1.ranks_tt).to(device)


params = []

for child  in model_tt.fc1.children():
    for param in list(child.parameters()):
        params.append(param)
        print(param.size())
        
reshaped_cores = []

for w,p in zip(w1_cores, params):
    print(w.size())
    reshaped_cores.append(w.view_as(p))
        
print("="*10)

new_params = []
for m,p in zip(w1_cores,params):
    print(m.view_as(p).size())
    new_params.append(m.view_as(p))


torch.Size([4, 7])
torch.Size([32, 8])
torch.Size([16, 28])
torch.Size([2, 8])
torch.Size([1, 14, 2])
torch.Size([2, 32, 4])
torch.Size([4, 56, 2])
torch.Size([2, 8, 1])
torch.Size([4, 7])
torch.Size([32, 8])
torch.Size([16, 28])
torch.Size([2, 8])


In [55]:
for p, n_p in zip(model_tt.fc1.weight.parameters(), new_params):
    p.data.copy_(n_p)

In [56]:
test_model(model_tt, test_loader)
test_model(model, test_loader)
print("Compression rate: {}".format(num_model_params(model)/num_model_params(model_tt)))

Accuracy of the network on the 10000 test images: 5.88 %
Accuracy of the network on the 10000 test images: 97.49 %
Compression rate: 56.94739787353106


Here is the problem.