In [1]:
import os

os.environ['CUDA_VISIBLE_DEVICES'] = ''
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

In [4]:
from transformers import T5Model, T5Tokenizer
from transformers.models.bart.modeling_bart import shift_tokens_right

In [6]:
model = T5Model.from_pretrained("mesolitica/t5-super-tiny-bahasa-cased")
tokenizer = T5Tokenizer.from_pretrained("mesolitica/t5-super-tiny-bahasa-cased")

Some weights of the model checkpoint at mesolitica/t5-super-tiny-bahasa-cased were not used when initializing T5Model: ['lm_head.weight']
- This IS expected if you are initializing T5Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing T5Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Downloading spiece.model:   0%|          | 0.00/784k [00:00<?, ?B/s]

Downloading special_tokens_map.json:   0%|          | 0.00/2.15k [00:00<?, ?B/s]

Downloading tokenizer_config.json:   0%|          | 0.00/2.35k [00:00<?, ?B/s]

In [7]:
strings = ['i like', 'i hate so much']
initial_text = 'terjemah Inggeris ke Melayu: '
input_ids = [{'input_ids': tokenizer.encode(f'{initial_text}{s}', return_tensors='pt')[0]} for s in strings]
padded = tokenizer.pad(input_ids, padding='longest')
padded['decoder_input_ids'] = shift_tokens_right(padded['input_ids'], 
                                                 model.config.pad_token_id, model.config.decoder_start_token_id)

In [8]:
outputs = model(**padded)
hidden_states = outputs[0]
hidden_states.shape

torch.Size([2, 12, 256])

In [9]:
eos_mask = padded['input_ids'].eq(model.config.eos_token_id)
eos_mask.shape

torch.Size([2, 12])

In [10]:
sentence_representation = hidden_states[eos_mask, :].view(hidden_states.size(0), -1, hidden_states.size(-1))[
    :, -1, :
]
sentence_representation.shape

torch.Size([2, 256])

In [11]:
from torch import nn
import torch

class BartClassificationHead(nn.Module):
    """Head for sentence-level classification tasks."""

    def __init__(
        self,
        input_dim: int,
        inner_dim: int,
        num_classes: int,
        pooler_dropout: float,
    ):
        super().__init__()
        self.dense = nn.Linear(input_dim, inner_dim)
        self.dropout = nn.Dropout(p=pooler_dropout)
        self.out_proj = nn.Linear(inner_dim, num_classes)

    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
        hidden_states = self.dropout(hidden_states)
        hidden_states = self.dense(hidden_states)
        hidden_states = torch.tanh(hidden_states)
        hidden_states = self.dropout(hidden_states)
        hidden_states = self.out_proj(hidden_states)
        return hidden_states

In [12]:
classification_head = BartClassificationHead(
    model.config.d_model,
    model.config.d_model,
    3,
    0.1,
)
model._init_weights(classification_head.dense)
model._init_weights(classification_head.out_proj)

In [14]:
classification_head(hidden_states).shape

torch.Size([2, 12, 3])

In [15]:
classification_head(sentence_representation).shape

torch.Size([2, 3])