<a href="https://colab.research.google.com/github/nsomabalint/intent-detection/blob/modelling/BERT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install wandb transformers datasets



In [2]:
%env WANDB_LOG_MODEL=true

env: WANDB_LOG_MODEL=true


In [3]:
import torch
import wandb
import pandas as pd
import numpy as np
from transformers import AutoTokenizer
from datasets import Dataset
from transformers import AutoModelForSequenceClassification
from transformers import TrainingArguments, Trainer, EarlyStoppingCallback
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
from sklearn.preprocessing import LabelEncoder
from google.colab import drive


def load_dataset_df():
    file_paths = ["dataset/train", "dataset/valid", "dataset/test"]

    examples = []

    for file_path in file_paths:
        file_path = "/content/drive/MyDrive/intent_detection_project/" + file_path
        ds_name = file_path.split("/")[-1]
        with open(f'{file_path}/seq.in', 'r', encoding="utf-8") as f_text, open(f'{file_path}/label', 'r',
                                                                                encoding="utf-8") as f_label:
            for text, label in zip(f_text, f_label):
                examples.append((text.strip(), label.strip(), ds_name))

    df = pd.DataFrame(examples, columns=["text", "intent", "ds_name"])
    df = df.sample(frac=1.0)
    df = df.reset_index(drop=True)

    return df


def compute_metrics(p):
    pred, labels = p
    pred = np.argmax(pred, axis=1)

    accuracy = accuracy_score(y_true=labels, y_pred=pred)
    recall = recall_score(y_true=labels, y_pred=pred, average="weighted")
    precision = precision_score(y_true=labels, y_pred=pred, average="weighted")
    f1 = f1_score(y_true=labels, y_pred=pred, average="weighted")

    return {"val_accuracy": accuracy, "val_precision": precision, "val_recall": recall, "val_f1": f1}


def tokenize_function(examples, tokenizer):
    return tokenizer(examples["message"], padding="max_length", truncation=True)

In [4]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
wandb.init(project="intent-detection", entity='nsoma')

MODEL_NAME = 'bert-base-cased'

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
device = "cuda" if torch.cuda.is_available() else "cpu"

[34m[1mwandb[0m: Currently logged in as: [33mnsoma[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [6]:
dataset = load_dataset_df()

le = LabelEncoder().fit(dataset.intent.tolist())

dataset['label'] = le.transform(dataset.intent.tolist())
dataset = dataset.rename(columns={'text': 'message'})

dataset.head()

Unnamed: 0,message,intent,ds_name,label
0,where's the money that got charged to my card?...,topping_up_by_card,test,62
1,looks like my card payment was duplicated afte...,transaction_charged_twice,train,63
2,we want to start a college fund for my daughter.,age_limit,train,2
3,i made a cash deposit almost a week ago but it...,balance_not_updated_after_cheque_or_cash_deposit,test,7
4,i believe my top up was reverted.,top_up_reverted,train,61


In [7]:
wandb.log({"labels": list(le.classes_)})

In [8]:
cols = ['message', 'label']

train_df = dataset[dataset.ds_name == 'train'][cols]
val_df = dataset[dataset.ds_name == 'valid'][cols]
test_df = dataset[dataset.ds_name == 'test'][cols]

train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(val_df)
test_dataset = Dataset.from_pandas(test_df)

In [9]:
train_dataset = train_dataset.map(lambda x: tokenize_function(x, tokenizer), batched=True)
val_dataset = val_dataset.map(lambda x: tokenize_function(x, tokenizer), batched=True)
test_dataset = test_dataset.map(lambda x: tokenize_function(x, tokenizer), batched=True)

  0%|          | 0/9 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

In [10]:
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=77)

if device == "cuda":
    model.to(device)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

In [11]:
epoch = 20
lr = 1e-5
patience = 5


training_args = TrainingArguments(f"{MODEL_NAME}_{epoch}_{patience}_{lr}", 
                                  evaluation_strategy="epoch",
                                  save_strategy="epoch",
                                  report_to="wandb",
                                  num_train_epochs=epoch,
                                  load_best_model_at_end=True,
                                  learning_rate=lr,
                                  per_device_train_batch_size=16,
                                  per_device_eval_batch_size=16)

trainer = Trainer(model=model,
                  args=training_args,
                  train_dataset=train_dataset, 
                  eval_dataset=val_dataset,
                  compute_metrics=compute_metrics,
                  callbacks=[EarlyStoppingCallback(early_stopping_patience=patience)],)


trainer.train()

The following columns in the training set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: message, __index_level_0__. If message, __index_level_0__ are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 8622
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 10780
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


Epoch,Training Loss,Validation Loss,Val Accuracy,Val Precision,Val Recall,Val F1
1,3.7777,2.815619,0.568182,0.601286,0.568182,0.527132
2,2.3891,1.651832,0.773377,0.790306,0.773377,0.75441
3,1.4601,1.005326,0.86039,0.871004,0.86039,0.850974
4,0.9033,0.667845,0.885065,0.899795,0.885065,0.881372
5,0.5827,0.483944,0.914935,0.920148,0.914935,0.914538
6,0.3836,0.393929,0.92013,0.926229,0.92013,0.919671
7,0.2688,0.343466,0.922078,0.926883,0.922078,0.921753
8,0.1846,0.32501,0.923377,0.928721,0.923377,0.923245
9,0.1261,0.322786,0.928571,0.934051,0.928571,0.928367
10,0.0893,0.315366,0.927922,0.932041,0.927922,0.927592


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: message, __index_level_0__. If message, __index_level_0__ are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1540
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to bert-base-cased_20_5_1e-05/checkpoint-539
Configuration saved in bert-base-cased_20_5_1e-05/checkpoint-539/config.json
Model weights saved in bert-base-cased_20_5_1e-05/checkpoint-539/pytorch_model.bin
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: message, __index_level_0__. If message, __index_level_0__ are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num exam

TrainOutput(global_step=8085, training_loss=0.6421451923503652, metrics={'train_runtime': 12252.034, 'train_samples_per_second': 14.074, 'train_steps_per_second': 0.88, 'total_flos': 3.40510671687168e+16, 'train_loss': 0.6421451923503652, 'epoch': 15.0})

In [12]:
raw_pred, _, _ = trainer.predict(test_dataset)
y_pred = np.argmax(raw_pred, axis=1)


f1 = f1_score(test_df["label"].tolist(), y_pred, average="weighted")
acc = accuracy_score(test_df["label"].tolist(), y_pred)

wandb.log({"test_accuracy": acc, "test_f1_score": f1})

print("Accuracy:", acc)
print("F1:", f1)

The following columns in the test set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: message, __index_level_0__. If message, __index_level_0__ are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 3080
  Batch size = 16


Accuracy: 0.923051948051948
F1: 0.9232065240063176


In [13]:
wandb.finish()

VBox(children=(Label(value='413.472 MB of 413.472 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0,…

0,1
eval/loss,█▅▃▂▁▁▁▁▁▁▁▁▁▁▁
eval/runtime,▅▇▁█▂▄▂▃▃▄▁▇▂▄▄
eval/samples_per_second,▄▂█▁▇▅▇▆▆▅█▂▇▅▅
eval/steps_per_second,▅▃█▁▇▅▇▆▆▅█▂▇▅▅
eval/val_accuracy,▁▅▇▇███████████
eval/val_f1,▁▅▇▇███████████
eval/val_precision,▁▅▇▇███████████
eval/val_recall,▁▅▇▇███████████
test_accuracy,▁
test_f1_score,▁

0,1
eval/loss,0.33868
eval/runtime,51.3518
eval/samples_per_second,29.989
eval/steps_per_second,1.889
eval/val_accuracy,0.93247
eval/val_f1,0.93241
eval/val_precision,0.93748
eval/val_recall,0.93247
test_accuracy,0.92305
test_f1_score,0.92321
