In [None]:
!pip install transformers
!pip install datasets

Collecting transformers
  Downloading transformers-4.17.0-py3-none-any.whl (3.8 MB)
[K     |████████████████████████████████| 3.8 MB 5.3 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.4.0-py3-none-any.whl (67 kB)
[K     |████████████████████████████████| 67 kB 675 kB/s 
Collecting tokenizers!=0.11.3,>=0.11.1
  Downloading tokenizers-0.11.6-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.5 MB)
[K     |████████████████████████████████| 6.5 MB 33.6 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 36.0 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.49-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 34.8 MB/s 
Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers
  Attempting uninstall: pyyaml
    Found existing

In [None]:

import argparse
from datasets import load_dataset
from transformers import AutoModelForSequenceClassification
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
import torch
from transformers import BertModel, BertTokenizerFast
from torch import nn
from torch.nn import Dropout
from transformers import AdamW
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from keras.preprocessing.sequence import pad_sequences
from torch.nn import CrossEntropyLoss

#model_name = "bert-base-uncased"
model_name = "bert-base-uncased"
bs = 4
max_len = 256
num_epochs = 2

In [None]:
class Model(torch.nn.Module):
    def __init__(self,
                 model_name_or_path: str,
                 dropout: float,
                 num_labels: int):
        super(Model, self).__init__()
        self.bert_model = BertModel.from_pretrained(model_name_or_path)
        self.dropout = Dropout(dropout)
        self.num_labels = num_labels
        self.classifier = nn.Linear(self.bert_model.config.hidden_size, num_labels)
        
    def forward(self,
                input_ids: torch.tensor,
                attention_mask: torch.tensor,
                token_type_ids: torch.tensor,
                label: torch.tensor = None
                ):
        _, pooler_output = self.bert_model(input_ids=input_ids,
                                           attention_mask=attention_mask,
                                           token_type_ids=token_type_ids,
                                           return_dict=False)
        logits = self.classifier(self.dropout(pooler_output))
        
        loss_fct = CrossEntropyLoss()
        # Compute losses if labels provided
        if label is not None:
            loss = loss_fct(logits.view(-1, self.num_labels), label.type(torch.long))
        else:
            loss = torch.tensor(0)

        return logits, loss


def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    f1 = f1_score(labels, preds, average="weighted")
    acc = accuracy_score(labels, preds)
    return {"accuracy": acc, "f1": f1}


def prepare_data(data, data_sampler, batch_size):
    
    tokenizer = BertTokenizerFast.from_pretrained(model_name, do_lower_case=True, model_max_length=max_len)
    tokens = []
    labels = []
    
    for i in range(len(data)):
        tokens.append(tokenizer.encode(data[i]["text"]))
        labels.append(data[i]["label"])

    # truncate all inputs
    tokens = [token[0:max_len] for token in tokens]
    input_ids = pad_sequences(tokens, maxlen=max_len, dtype="long", value=0.0, truncating="post", padding="post")    
    masks = [[float(i != 0.0) for i in ii] for ii in input_ids]

    tensor_data = TensorDataset(torch.tensor(input_ids), torch.tensor(masks), torch.tensor(labels))

    if data_sampler == "sequential":
        sampler = SequentialSampler(tensor_data)
    else:
        sampler = RandomSampler(tensor_data)
    
    data_loader = DataLoader(tensor_data, sampler = sampler, batch_size = bs)
    
    return data_loader

In [None]:
emotions = load_dataset("emotion")
print(emotions["train"][0]["text"])
print(emotions["train"][0]["label"])

train_data_loader = prepare_data(emotions["train"], "random", bs)
test_data_loader = prepare_data(emotions["test"], "sequential", bs)
dev_data_loader = prepare_data(emotions["validation"], "sequential", bs)
    
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

num_labels = 6

Downloading builder script:   0%|          | 0.00/1.66k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/1.61k [00:00<?, ?B/s]

Using custom data configuration default


Downloading and preparing dataset emotion/default (download: 1.97 MiB, generated: 2.07 MiB, post-processed: Unknown size, total: 4.05 MiB) to /root/.cache/huggingface/datasets/emotion/default/0.0.0/348f63ca8e27b3713b6c04d723efe6d824a56fb3d1449794716c0f0296072705...


Downloading data:   0%|          | 0.00/1.66M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/204k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/207k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/16000 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/2000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/2000 [00:00<?, ? examples/s]

Dataset emotion downloaded and prepared to /root/.cache/huggingface/datasets/emotion/default/0.0.0/348f63ca8e27b3713b6c04d723efe6d824a56fb3d1449794716c0f0296072705. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

i didnt feel humiliated
0


Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/455k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [None]:
model = Model(model_name, 0.1, num_labels)
model.to(device)

optimizer = AdamW(model.parameters(), lr=5e-5, eps=1e-8)

for epoch in range(num_epochs):
  model.train()
  itercnt = 0
  for batch in train_data_loader:
    batch = tuple(t.to(device) for t in batch)
    input_ids, mask, label = batch

    model.zero_grad()
    _, loss = model(input_ids, mask, None, label)
    #print(itercnt, loss)
    itercnt += 1

    loss.backward()
    optimizer.step()
  error = 0
  model.eval()
  for batch in dev_data_loader:
    batch = tuple(t.to(device) for t in batch)
    input_ids, mask, label = batch
    with torch.no_grad():
      logits,_ = model(input_ids, mask, None, label)
      predictions = torch.argmax(logits, dim = 1)
      predictions = predictions.cpu().numpy()
      label = label.cpu().numpy()
      for i in range(len(predictions)):
        if predictions[i] != label[i]:
          error += 1
  error_rate = 100 * error / len(emotions["validation"])
  print("VALIDATION ERROR RATE: ", error_rate, error, len(emotions["validation"]))

Downloading:   0%|          | 0.00/420M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


VALIDATION ERROR RATE:  8.35 167 2000
VALIDATION ERROR RATE:  7.4 148 2000


In [None]:
# now testing
error = 0
model.eval()
for batch in test_data_loader:
  batch = tuple(t.to(device) for t in batch)
  input_ids, mask, label = batch
  with torch.no_grad():
    logits,_ = model(input_ids, mask, None, label)
    predictions = torch.argmax(logits, dim = 1)
    predictions = predictions.cpu().numpy()
    label = label.cpu().numpy()
    for i in range(len(predictions)):
        if predictions[i] != label[i]:
            error += 1
error_rate = 100 * error / len(emotions["test"])
print("VALIDATION ERROR RATE: ", error_rate, error, len(emotions["test"]))

VALIDATION ERROR RATE:  7.25 145 2000
