# Set Bert

In [1]:
from transformers import AutoTokenizer, DistilBertModel
tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-multilingual-cased")
distilBertModel = DistilBertModel.from_pretrained("distilbert/distilbert-base-multilingual-cased")

In [None]:
# get some vocab info
import random
random_tokens = random.sample(list(tokenizer.vocab), 10)
random_ids = [tokenizer.vocab[token] for token in random_tokens]

print("{0:20}{1:15}".format("token", "id"))
print("-" * 25)
for t, i in zip(random_tokens, random_ids):
    print("{0:15}{1:10}".format(t, i))

In [2]:
distilBertModel.eval()

DistilBertModel(
  (embeddings): Embeddings(
    (word_embeddings): Embedding(119547, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (transformer): Transformer(
    (layer): ModuleList(
      (0-5): 6 x TransformerBlock(
        (attention): MultiHeadSelfAttention(
          (dropout): Dropout(p=0.1, inplace=False)
          (q_lin): Linear(in_features=768, out_features=768, bias=True)
          (k_lin): Linear(in_features=768, out_features=768, bias=True)
          (v_lin): Linear(in_features=768, out_features=768, bias=True)
          (out_lin): Linear(in_features=768, out_features=768, bias=True)
        )
        (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (ffn): FFN(
          (dropout): Dropout(p=0.1, inplace=False)
          (lin1): Linear(in_features=768, out_features=3072, bias=True)
          (lin2): L

In [3]:
input = tokenizer("嗨", return_tensors="pt")
seqTensor = distilBertModel(**input, output_hidden_states=True)

In [21]:
print(seqTensor.hidden_states[0].shape)

torch.Size([1, 3, 768])


In [19]:
print(len(seqTensor.hidden_states))

7


In [9]:
print(input)

{'input_ids': tensor([[101, 100, 102]]), 'attention_mask': tensor([[1, 1, 1]])}


# Set BertDecoder

In [4]:
from utils.DevConf import DevConf
devConf = DevConf('mps')

In [5]:
from model.BertDecoder.SentiClassifier import SentiClassifier

In [6]:
mapper = SentiClassifier(768, 12, 2, devConf=devConf)

In [11]:
from model.BertDecoder.SentiClassifier_Cross import SentiClassifier_Cross

In [12]:
print(not isinstance(mapper, SentiClassifier))
print(not isinstance(mapper, SentiClassifier_Cross))

False
True


In [7]:
ans = mapper.forward(seqTensor)
# print(ans.shape)
print(ans)

tensor([[ 1.5515e-02,  6.7839e-02, -2.8733e-02, -7.7939e-02, -4.8201e-02,
          7.6650e-04,  6.0756e-02,  1.4687e-02, -3.5091e-02,  3.8786e-02,
         -2.4219e-02,  6.9973e-02, -3.5821e-03,  1.1017e-01, -6.1632e-02,
          3.2076e-03,  1.7479e-02,  4.2271e-02,  5.5858e-02, -4.7596e-02,
         -1.0977e-01, -4.5340e-03,  3.8417e-02,  7.4270e-02, -1.0639e-01,
          1.1155e-01,  8.8370e-02,  4.1836e-04, -1.7464e-03, -1.2006e-02,
          1.2026e-01,  5.8393e-02, -4.9086e-02,  1.1670e-01, -2.2997e-02,
         -2.1828e-02, -1.8341e-02, -1.1862e-01,  9.6593e-02,  1.4285e-01,
         -1.0608e-01, -2.8651e-02, -7.1754e-02,  3.7221e-02,  3.7768e-02,
         -2.5128e-02,  2.0181e-03,  1.6639e-02, -3.0742e-02,  5.3533e-02,
         -5.7006e-02, -4.9872e-02, -3.6237e-03, -1.4313e-01, -1.7708e-02,
         -6.0654e-02, -4.4661e-02,  2.1638e-02,  1.3119e-03,  1.0348e-02,
          7.9516e-03,  7.5657e-02,  2.5445e-02,  3.4548e-02,  2.6398e-02,
          3.6327e-02,  2.2170e-03,  2.

In [8]:
print(ans.shape)

torch.Size([1, 768])


# Combination Model

In [7]:
from model.CombinationModel import CombinationModel

In [8]:
from torch import nn

In [9]:
cModel = CombinationModel(tokenizer=tokenizer, distilBert=distilBertModel, decoder=mapper, outputProject=nn.Linear(768, 5))

In [10]:
cModel.forward("嗨", True)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


IndexError: list index out of range

# Train

In [2]:
import torch

In [None]:
loss_fn = torch.nn.CrossEntropyLoss()

In [14]:
model = SentiClassifier(distilBertModel=distilBertModel, tokenizer=tokenizer, output_dim=2)

In [15]:
print(model("hello world!"))

tensor([[0.4752, 0.5248]], grad_fn=<SoftmaxBackward0>)


In [12]:
import torch
from einops import repeat

In [27]:
x = torch.ones(2, requires_grad=True)
y = repeat(x, "l -> b l", b=2) * 2
# with torch.no_grad():
#     for i in range(3):
#         y = 2*y
#     # x.retain_grad()
y1 = 2*y[0]
y2 = y[1]
z = y1.sum() + y2.sum()

In [18]:
print(x)
print(y)

tensor([1., 1.], requires_grad=True)
tensor([[4., 4.],
        [4., 4.]], grad_fn=<MulBackward0>)


In [28]:
z.backward()

In [29]:
print(x.grad)

tensor([6., 6.])
