In [36]:
from transformers import GPT2Model, GPT2Config, GPT2LMHeadModel

# Initializing a GPT2 configuration
configuration = GPT2Config()

# Initializing a model from the configuration
model = GPT2LMHeadModel(configuration)

# Accessing the model configuration
configuration = model.config

In [37]:
from typing import Union, List, Tuple

def factorize(n: int) -> List[int]:
    if n == 1:
        return [1]
    factors = []
    i = 2
    while i * i <= n:
        while n % i == 0:
            factors.append(i)
            n //= i
        i += 1
    if n != 1:
        factors.append(n)
    return factors

def log2(n: int, assert_eq: bool):
    res = 0
    while 2 ** res < n:
        res += 1

    if assert_eq:
        assert 2 ** res == n

    return res

def best_approx(n: int, max_factor: int = 3):
    n_factors = log2(n, False)
    print ("n_factors", n_factors)
    while True:
        factors = factorize(n)
        print ("len(factors)", len(factors))
        if len(factors) <= n_factors and all([f <= max_factor for f in factors]):
            return n
        n += 1

In [38]:
from tltorch.factorized_layers import FactorizedLinear
from tltorch.factorized_tensors import FactorizedTensor

In [39]:
from tltorch.tensor_hooks import tensor_dropout

## FactorizedTensor (tensor_dropout can be applied)

In [40]:
import torch
inpt = torch.zeros(size = (3072, 768))

In [51]:
tensor = FactorizedTensor.new(shape = [16, 16, 256, 36], rank=[1, 64, 64, 64, 1], factorization='TT').normal_()
print (tensor.factors)
tensor = tensor_dropout(tensor, p=0.5)
print (tensor.factors)


FactorList(
    (factor_0): Parameter containing: [torch.FloatTensor of size 1x16x64]
    (factor_1): Parameter containing: [torch.FloatTensor of size 64x16x64]
    (factor_2): Parameter containing: [torch.FloatTensor of size 64x256x64]
    (factor_3): Parameter containing: [torch.FloatTensor of size 64x36x1]
)
tensor dropout
apply TTTensor
apply TTTensor
apply <tltorch.tensor_hooks._tensor_dropout.TTDropout object at 0x7f2a8403c350>
apply <torch.utils.hooks.RemovableHandle object at 0x7f2b8f4bddd0>
FactorList(
    (factor_0): Parameter containing: [torch.FloatTensor of size 1x16x64]
    (factor_1): Parameter containing: [torch.FloatTensor of size 64x16x64]
    (factor_2): Parameter containing: [torch.FloatTensor of size 64x256x64]
    (factor_3): Parameter containing: [torch.FloatTensor of size 64x36x1]
)


In [42]:
import torch
inpt = torch.zeros(size = (1, 16))

tensor(inpt)

index torch.Size([16])
factor.shape torch.Size([1, 16, 64])


TTTensor(shape=(16, 16, 256, 36), rank=(1, 64, 64, 64, 1))

In [66]:
inpt = torch.zeros(size = (768, 3072))
tensor(inpt)

index torch.Size([3072])
factor.shape torch.Size([1, 16, 64])


IndexError: list index out of range

In [43]:
tensor.factors

FactorList(
    (factor_0): Parameter containing: [torch.FloatTensor of size 1x16x64]
    (factor_1): Parameter containing: [torch.FloatTensor of size 64x16x64]
    (factor_2): Parameter containing: [torch.FloatTensor of size 64x256x64]
    (factor_3): Parameter containing: [torch.FloatTensor of size 64x36x1]
)

In [44]:
tensor = FactorizedTensor.new((30, 40, 20), rank=0.5, factorization='CP').normal_()
inpt = torch.zeros(size = (1,133))
tensor(inpt)
tensor = tensor_dropout(tensor, p=0.5)
#remove_tensor_dropout(tensor)

tensor dropout
apply CPTensor
apply CPTensor
apply <tltorch.tensor_hooks._tensor_dropout.CPDropout object at 0x7f2b98b38250>
apply <torch.utils.hooks.RemovableHandle object at 0x7f2a8403c9d0>


In [45]:
tensor.factors

FactorList(
    (factor_0): Parameter containing: [torch.FloatTensor of size 30x133]
    (factor_1): Parameter containing: [torch.FloatTensor of size 40x133]
    (factor_2): Parameter containing: [torch.FloatTensor of size 20x133]
)

In [64]:
tensor

TTTensor(shape=(16, 16, 256, 36), rank=(1, 64, 64, 64, 1))

## FactorizedLinear (can be build-in into existing model)

In [59]:
from tltorch.factorized_layers.factorized_linear import FactorizedLinear

In [60]:
old_layer = model.transformer.h[2].mlp.c_fc
(in_, out_) = old_layer.weight.shape
itf = (16,16,3)
otf = (16,16,12)
layer = FactorizedLinear(in_tensorized_features=itf, out_tensorized_features=otf, rank=[1, 64, 64, 1], factorization = 'blocktt')

[3072, 768]


In [61]:
import torch
inpt = torch.zeros(size = (3072, 768))

In [62]:
layer(inpt)

tensor([[ 0.0189,  0.0063,  0.0097,  ..., -0.0006, -0.0299, -0.0122],
        [ 0.0189,  0.0063,  0.0097,  ..., -0.0006, -0.0299, -0.0122],
        [ 0.0189,  0.0063,  0.0097,  ..., -0.0006, -0.0299, -0.0122],
        ...,
        [ 0.0189,  0.0063,  0.0097,  ..., -0.0006, -0.0299, -0.0122],
        [ 0.0189,  0.0063,  0.0097,  ..., -0.0006, -0.0299, -0.0122],
        [ 0.0189,  0.0063,  0.0097,  ..., -0.0006, -0.0299, -0.0122]],
       grad_fn=<AddmmBackward0>)

In [63]:
layer

FactorizedLinear(in_features=768, out_features=3072, weight of size (3072, 768) tensorized to ((16, 16, 12), (16, 16, 3)),factorization=BlockTT, rank=[1, 64, 64, 1], with a single layer parametrized, 