In [130]:
from transformers import BertTokenizer, BertModel
  
bert_tokenizer = BertTokenizer.from_pretrained("bert-base-cased")

bert_model = BertModel.from_pretrained("bert-base-cased")


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [131]:
one_encoded = bert_tokenizer.encode_plus('How much will this cost?', add_special_tokens=True, return_tensors='pt')
two_encoded = bert_tokenizer.encode_plus('Is it expensive?', add_special_tokens=True, return_tensors='pt')


In [136]:
# the CLS token is at the beginning in BERT
one_embedded = bert_model(**one_encoded).last_hidden_state[:,0,:]
two_embedded = bert_model(**two_encoded).last_hidden_state[:,0,:]


In [137]:
import torch

torch.nn.CosineSimilarity()(one_embedded, two_embedded)

tensor([0.9723], grad_fn=<DivBackward0>)

In [129]:
from transformers import XLNetTokenizer, XLNetModel
  
xlnet_tokenizer = XLNetTokenizer.from_pretrained("xlnet-base-cased")

xlnet_model = XLNetModel.from_pretrained("xlnet-base-cased")


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetModel: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [121]:
model

XLNetModel(
  (word_embedding): Embedding(32000, 768)
  (layer): ModuleList(
    (0): XLNetLayer(
      (rel_attn): XLNetRelativeAttention(
        (layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (ff): XLNetFeedForward(
        (layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (layer_1): Linear(in_features=768, out_features=3072, bias=True)
        (layer_2): Linear(in_features=3072, out_features=768, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (1): XLNetLayer(
      (rel_attn): XLNetRelativeAttention(
        (layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (ff): XLNetFeedForward(
        (layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (layer_1): Linear(in_features=768, out_features=3072, b

In [152]:
one_encoded = xlnet_tokenizer.encode_plus('How much will this cost?', add_special_tokens=True, return_tensors='pt')
two_encoded = xlnet_tokenizer.encode_plus('Is it expensive?', add_special_tokens=True, return_tensors='pt')

In [153]:
tokenizer.convert_ids_to_tokens(one_encoded['input_ids'][0])

['▁How', '▁much', '▁will', '▁this', '▁cost', '?', '<sep>', '<cls>']

In [154]:
# the CLS token is at the end in XLNET
one_embedded = xlnet_model(**one_encoded).last_hidden_state[:,-1,:]
two_embedded = xlnet_model(**two_encoded).last_hidden_state[:,-1,:]


In [158]:
torch.nn.CosineSimilarity()(one_embedded, two_embedded)

tensor([0.9734], grad_fn=<DivBackward0>)

In [160]:
from transformers import XLNetTokenizer, XLNetForSequenceClassification
  
tokenizer = XLNetTokenizer.from_pretrained("mohsenfayyaz/xlnet-base-cased-toxicity")

seq_clf_model = XLNetForSequenceClassification.from_pretrained("mohsenfayyaz/xlnet-base-cased-toxicity")


In [161]:
seq_clf_model

XLNetForSequenceClassification(
  (transformer): XLNetModel(
    (word_embedding): Embedding(32000, 768)
    (layer): ModuleList(
      (0): XLNetLayer(
        (rel_attn): XLNetRelativeAttention(
          (layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (ff): XLNetFeedForward(
          (layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (layer_1): Linear(in_features=768, out_features=3072, bias=True)
          (layer_2): Linear(in_features=3072, out_features=768, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (1): XLNetLayer(
        (rel_attn): XLNetRelativeAttention(
          (layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (ff): XLNetFeedForward(
          (layer_norm): LayerNorm((768,), eps=1e

In [181]:
encoded = tokenizer.encode_plus('hello there I am Sinan', return_tensors='pt')

encoded

{'input_ids': tensor([[   44, 16691, 15683,     4,     3]]), 'token_type_ids': tensor([[0, 0, 0, 0, 2]]), 'attention_mask': tensor([[1, 1, 1, 1, 1]])}

In [182]:
tokenizer.convert_ids_to_tokens(encoded['input_ids'][0])

['▁you', '▁punk', '▁bitch', '<sep>', '<cls>']

In [183]:
output = seq_clf_model(**encoded)

In [184]:
LABELS = list(seq_clf_model.config.id2label.values())

LABELS

['Non-Toxic', 'Toxic']

In [185]:
output.logits

tensor([[-1.7793,  2.1901]], grad_fn=<AddmmBackward>)

In [188]:
list(zip(LABELS, torch.nn.Softmax(dim=1)(output.logits)[0].detach()))

[('Non-Toxic', tensor(0.0185)), ('Toxic', tensor(0.9815))]