### Teste de distilação de modelo usando BERT e BiLSTM no dataset SST-2.

Neste notebook exploraremos a distilação de conhecimento utilizando o modelo pré-treinado BERT como professor e treinando uma BiLSTM como aluna.

### Carregando Base de Dados

In [2]:
from pathlib import Path
from os.path import join

def load_sentences(data_folder, file):
    path = join(data_folder, file)
    sentences = open(path).readlines()
    sentences = list(map(lambda x: x.strip(), sentences))
    return sentences

data_folder = Path('../../data/STT2/')

train_file = Path('train.txt')
test_file = Path('test.txt')
dev_file = Path('dev.txt')

train_sentences = load_sentences(data_folder, train_file)
test_sentences = load_sentences(data_folder, test_file)
dev_sentences = load_sentences(data_folder, dev_file)

print("Sentenças de Treino: \n", train_sentences[:10])
print("Sentenças de Teste: \n", test_sentences[:10])
print("Sentenças de Dev: \n", dev_sentences[:10])

print("Tamanho de Treino: \n", len(train_sentences))
print("Tamanho de Teste: \n", len(test_sentences))
print("Tamanho de Dev: \n", len(dev_sentences))


Sentenças de Treino: 
 ["The Rock is destined to be the 21st Century 's new `` Conan '' and that he 's going to make a splash even greater than Arnold Schwarzenegger , Jean-Claud Van Damme or Steven Segal .", "The gorgeously elaborate continuation of `` The Lord of the Rings '' trilogy is so huge that a column of words can not adequately describe co-writer\\/director Peter Jackson 's expanded vision of J.R.R. Tolkien 's Middle-earth .", 'Singer\\/composer Bryan Adams contributes a slew of songs -- a few potential hits , a few more simply intrusive to the story -- but the whole package certainly captures the intended , er , spirit of the piece .', "You 'd think by now America would have had enough of plucky British eccentrics with hearts of gold .", 'Yet the act is still charming here .', "Whether or not you 're enlightened by any of Derrida 's lectures on `` the other '' and `` the self , '' Derrida is an undeniably fascinating and playful fellow .", 'Just the labour involved in creati

### Carregamento do Modelo

In [3]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("textattack/bert-base-uncased-SST-2")
bert_model = AutoModelForSequenceClassification.from_pretrained("textattack/bert-base-uncased-SST-2")
embedding = bert_model.bert.embeddings

In [4]:
import os

tensors_folder = Path('tensors')
tensors_path = os.path.join(data_folder, tensors_folder)

if not os.path.isdir(tensors_path):
    print("Pasta de Tensores Não Encontrada.")
    os.mkdir(tensors_path)
    print("Gerando Tensores. Espere.")

    max_length = 35
    print("Gerando Tokens...")
    examples = train_sentences
    inputs = tokenizer(examples,
                            return_tensors='pt',
                            padding='max_length',
                            truncation=True,
                            max_length=max_length)

    dev_inputs = tokenizer(dev_sentences,
                            return_tensors='pt',
                            padding='max_length',
                            truncation=True,
                            max_length=max_length)

    test_inputs = tokenizer(test_sentences,
                            return_tensors='pt',
                            padding='max_length',
                            truncation=True,
                            max_length=max_length)

    embedding.eval()
    bert_model.eval()

    with torch.no_grad():
        print("Gerando Tensores de Treino...")
        bert_logits = bert_model(**inputs)[0]
        inputs = embedding(inputs['input_ids'])

        torch.save(bert_logits, os.path.join(tensors_path, Path("train_logits.pt")))
        torch.save(inputs, os.path.join(tensors_path, Path("train_inputs.pt")))

        print("Gerando Tensores de Validação...")
        dev_bert_logits = bert_model(**dev_inputs)[0]
        dev_inputs = embedding(dev_inputs['input_ids'])

        torch.save(dev_bert_logits, os.path.join(tensors_path, Path("dev_logits.pt")))
        torch.save(dev_inputs, os.path.join(tensors_path, Path("dev_inputs.pt")))

        print("Gerando Tensores de Teste...")
        test_bert_logits = bert_model(**test_inputs)[0]
        test_inputs = embedding(test_inputs['input_ids'])

        torch.save(test_bert_logits, os.path.join(tensors_path, Path("test_logits.pt")))
        torch.save(test_inputs, os.path.join(tensors_path, Path("test_inputs.pt")))

else:
    print("Pasta de Tensores Encontrada. Carregando Tensores...")
    bert_logits = torch.load(os.path.join(tensors_path, Path("train_logits.pt")))
    inputs = torch.load(os.path.join(tensors_path, Path("train_inputs.pt")))

    dev_bert_logits = torch.load(os.path.join(tensors_path, Path("dev_logits.pt")))
    dev_inputs = torch.load(os.path.join(tensors_path, Path("dev_inputs.pt")))
        
    test_bert_logits = torch.load(os.path.join(tensors_path, Path("test_logits.pt")))
    test_inputs = torch.load(os.path.join(tensors_path, Path("test_inputs.pt")))

Pasta de Tensores Encontrada. Carregando Tensores...


### Formato do Modelo

In [5]:
bert_model.bert.embeddings

BertEmbeddings(
  (word_embeddings): Embedding(30522, 768, padding_idx=0)
  (position_embeddings): Embedding(512, 768)
  (token_type_embeddings): Embedding(2, 768)
  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
  (dropout): Dropout(p=0.1, inplace=False)
)

### Novo Modelo BiLSTM

In [6]:
from torch import nn
from torch.nn import functional as F
import pytorch_lightning as pl
from ranger import Ranger 

class BiLSTM(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.bilstm = nn.LSTM(
            input_size=768,
            hidden_size=150,
            num_layers=1,
            batch_first=True,
            bidirectional=True,
        )
        self.dense = nn.Linear(
            in_features=300,
            out_features=200,
        )
        self.output = nn.Linear(
            in_features=200,
            out_features=2,
        )
        
    def configure_optimizers(self):
        optimizer = Ranger(self.parameters())
        return optimizer

    def forward(self, x):
        _, (last_state, _) = self.bilstm(x)
        last_state = last_state.view(x.size(0), -1)
        dropped_last_state = F.dropout(last_state, 0.2)
        dense_state = nn.functional.relu(self.dense(dropped_last_state))
        dropped_dense_state = F.dropout(dense_state, 0.2)
        logits = self.output(dropped_dense_state)
        return logits
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.mse_loss(y_hat, y)
        result = pl.TrainResult(loss)
        result.log('train_loss', loss, prog_bar=True)
        return result

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.mse_loss(y_hat, y)
        result = pl.EvalResult(checkpoint_on=loss)
        result.log('val_loss', loss, prog_bar=True)
        return result
    
    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.mse_loss(y_hat, y)
        result = pl.EvalResult()
        result.log('test_loss', loss, prog_bar=True)
        return result

In [7]:
from torch.utils.data import Dataset, DataLoader

class STT2Dataset(Dataset):
    def __init__(self, inputs, bert_logits):
        self.inputs = inputs
        self.bert_logits = bert_logits
        
    def __len__(self):
        return len(self.inputs)
    
    def __getitem__(self, idx):
        return self.inputs[idx], self.bert_logits[idx]

### Predição

In [1]:
batch_size = 1024

bilstm_model = BiLSTM()

train_dataset = STT2Dataset(inputs, bert_logits)
dev_dataset = STT2Dataset(dev_inputs, dev_bert_logits)
test_dataset = STT2Dataset(test_inputs, test_bert_logits)

train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
dev_dataloader = DataLoader(dev_dataset, batch_size=batch_size)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

trainer = pl.Trainer(
    max_epochs=10,
    gpus=[0],
)
trainer.fit(bilstm_model, train_dataloader, dev_dataloader)

NameError: name 'BiLSTM' is not defined

In [1]:
trainer.test(test_dataloaders=test_dataloader)


NameError: name 'trainer' is not defined