In [1]:
%pip install transformers

Collecting transformers
  Downloading transformers-4.41.0-py3-none-any.whl.metadata (43 kB)
     ---------------------------------------- 0.0/43.8 kB ? eta -:--:--
     ----------------- -------------------- 20.5/43.8 kB 682.7 kB/s eta 0:00:01
     ----------------------------------- -- 41.0/43.8 kB 495.5 kB/s eta 0:00:01
     -------------------------------------- 43.8/43.8 kB 430.5 kB/s eta 0:00:00
Collecting filelock (from transformers)
  Downloading filelock-3.14.0-py3-none-any.whl.metadata (2.8 kB)
Collecting huggingface-hub<1.0,>=0.23.0 (from transformers)
  Downloading huggingface_hub-0.23.0-py3-none-any.whl.metadata (12 kB)
Collecting regex!=2019.12.17 (from transformers)
  Downloading regex-2024.5.15-cp311-cp311-win_amd64.whl.metadata (41 kB)
     ---------------------------------------- 0.0/42.0 kB ? eta -:--:--
     -------------------------------------  41.0/42.0 kB 960.0 kB/s eta 0:00:01
     -------------------------------------- 42.0/42.0 kB 675.2 kB/s eta 0:00:00
Collec

In [2]:
%pip install seqeval

Collecting seqeval
  Downloading seqeval-1.2.2.tar.gz (43 kB)
     ---------------------------------------- 0.0/43.6 kB ? eta -:--:--
     ---------------------------- ----------- 30.7/43.6 kB 1.3 MB/s eta 0:00:01
     -------------------------------------- 43.6/43.6 kB 427.8 kB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting scikit-learn>=0.21.3 (from seqeval)
  Downloading scikit_learn-1.4.2-cp311-cp311-win_amd64.whl.metadata (11 kB)
Collecting scipy>=1.6.0 (from scikit-learn>=0.21.3->seqeval)
  Downloading scipy-1.13.0-cp311-cp311-win_amd64.whl.metadata (60 kB)
     ---------------------------------------- 0.0/60.6 kB ? eta -:--:--
     ---------------------------------------- 0.0/60.6 kB ? eta -:--:--
     ---------------------------------------- 60.6/60.6 kB 3.4 MB/s eta 0:00:00
Collecting joblib>=1.2.0 (from scikit-learn>=0.21.3->seqeval)
  Downloading joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Co

In [4]:
import json
import pickle
import time
import datetime
import random
import os
import csv

import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import torch
from torch.utils.data import TensorDataset, random_split
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from transformers import BertTokenizer
from transformers import BertForSequenceClassification, AdamW, BertConfig
from transformers import get_linear_schedule_with_warmup
from seqeval.metrics import f1_score

import matplotlib.pyplot as plt

device = torch.device("cpu")

SEED_VAL = 42

random.seed(SEED_VAL)
np.random.seed(SEED_VAL)
torch.manual_seed(SEED_VAL);  # Semicolon prevents jupyter from displaying last line as output

In [5]:
from google.colab import drive

drive.mount('/content/drive/')


Mounted at /content/drive/


In [6]:
SNIPS_PATH = "drive/MyDrive/datasets"
TRAIN_PATH = f"{SNIPS_PATH}/train.csv"
VAL_PATH = f"{SNIPS_PATH}/validation.csv"
TEST_PATH = f"{SNIPS_PATH}/test.csv"
#df = pd.read_csv(TEST_PATH,sep=',')

# def load_snips_file(file_path):
#     list_pair =[]
#     with open(file_path,'r',encoding="utf8") as f:
#         for line in f:
#             split_line = line.split(',')
#             pair = split_line[1],split_line[2].strip()
#             list_pair.append(pair)
#     return list_pair


In [7]:
df_train = pd.read_csv(TRAIN_PATH, skipinitialspace=True)
df_valid = pd.read_csv(VAL_PATH, skipinitialspace=True)
df_test = pd.read_csv(TEST_PATH, skipinitialspace=True)

In [8]:
df_train = df_train.replace({r'\r|\n': ''}, regex=True)
df_valid = df_valid.replace({r'\r|\n': ''}, regex=True)
df_test = df_test.replace({r'\r|\n': ''}, regex=True)

In [9]:
df_train.drop('id', axis=1, inplace=True)
df_test.drop('id', axis=1, inplace=True)
df_valid.drop('id', axis=1, inplace=True)

In [10]:
df_train.head()

Unnamed: 0,utterance,logical_form
0,follow <name>,( follow ( lambda $1 e ( person $1 ) ( name $1...
1,go to the <room>,"( go "" <room> "" )"
2,give me the <object>,"( bring ( lambda $1 e ( is_a $1 "" <object> "" )..."
3,bring <object> to me,"( bring ( lambda $1 e ( is_a $1 "" <object> "" )..."
4,bring me the <object>,"( bring ( lambda $1 e ( is_a $1 "" <object> "" )..."


In [11]:
df_train = df_train[df_train['logical_form'].str.contains('bring|put', na=False)].reset_index()
df_test = df_test[df_test['logical_form'].str.contains('bring|put', na=False)].reset_index()
df_valid = df_valid[df_valid['logical_form'].str.contains('bring|put', na=False)].reset_index()

## Intent ID Lookup

In order to input the intent labels into our model we will need to **Create a dictionary mapping each intent name to an integer ID (starting with 0), and assign the dictionary to intent_labeltoid**.

In [12]:
#ograniczamy się do bring i put

intents = df_train['logical_form'].unique().tolist()
intent_labeltoid = {intents[i]: i  for i in range(len(intents)) if ('bring' in intents[i] or 'put' in intents[i])}
intent_labeltoid

{'( bring ( lambda $1 e ( is_a $1 " <object> " ) ) )': 0,
 '( bring ( lambda $1 e ( is_a $1 " <object> " ) ) ( lambda $1 e ( person $1 ) ( name $1 " <name> " ) ) )': 1,
 '( put ( lambda $1 e ( is_a $1 " <object> " ) ) " <location> " )': 2,
 '( bring ( lambda $1 e ( largest $1 ) ) " <location> " )': 3,
 '( bring ( lambda $1 e ( biggest $1 ) ) " <location> " )': 4,
 '( bring ( lambda $1 e ( is_a $1 " <object> " ) ) ( lambda $1 e ( person $1 ) ( name $1 " <name> " ) ( at $1 " <location> " ) ) )': 5,
 '( bring ( lambda $1 e ( lightest $1 ) ) " <location> " )': 6,
 '( bring ( lambda $1 e ( smallest $1 ) ) " <location> " )': 7,
 '( bring ( lambda $1 e ( thinnest $1 ) ) " <location> " )': 8,
 '( bring ( lambda $1 e ( leftmost $1 " <location> " ) ) )': 9,
 '( bring ( lambda $1 e ( heaviest $1 ) ) " <location> " )': 10,
 '( bring ( lambda $1 e ( rightmost $1 " <location> " ) ) )': 11,
 '( bring ( lambda $1 e ( is_a $1 " <category> " ) ( biggest $1 ) ) " <location> " )': 12,
 '( put ( lambda $1 

In [13]:
df_train_list = df_train.values.tolist()
df_test_list = df_test.values.tolist()
df_valid_list = df_valid.values.tolist()
for lista in [df_train_list, df_test_list, df_valid_list]:
    for item in lista:
        item.pop(0)
        item.reverse()
df_train_list

[['( bring ( lambda $1 e ( is_a $1 " <object> " ) ) )',
  'give me the <object>'],
 ['( bring ( lambda $1 e ( is_a $1 " <object> " ) ) )',
  'bring <object> to me'],
 ['( bring ( lambda $1 e ( is_a $1 " <object> " ) ) )',
  'bring me the <object>'],
 ['( bring ( lambda $1 e ( is_a $1 " <object> " ) ) )',
  'deliver <object> to me'],
 ['( bring ( lambda $1 e ( is_a $1 " <object> " ) ) ( lambda $1 e ( person $1 ) ( name $1 " <name> " ) ) )',
  'give <object> to <name>'],
 ['( bring ( lambda $1 e ( is_a $1 " <object> " ) ) )',
  'give the <object> to me'],
 ['( bring ( lambda $1 e ( is_a $1 " <object> " ) ) )',
  'deliver me the <object>'],
 ['( bring ( lambda $1 e ( is_a $1 " <object> " ) ) )',
  'bring the <object> to me'],
 ['( bring ( lambda $1 e ( is_a $1 " <object> " ) ) ( lambda $1 e ( person $1 ) ( name $1 " <name> " ) ) )',
  'bring <object> to <name>'],
 ['( bring ( lambda $1 e ( is_a $1 " <object> " ) ) )',
  'deliver the <object> to me'],
 ['( bring ( lambda $1 e ( is_a $1 " <

In [14]:
#How many training examples are there for each intent?
df_train['logical_form'].value_counts()

logical_form
( put ( lambda $1 e ( is_a $1 " <object> " ) ) " <location> " )                                                                                              176
( bring ( lambda $1 e ( is_a $1 " <object> " ) ) )                                                                                                           146
( bring ( lambda $1 e ( is_a $1 " <object> " ) ( at $1 " <location> " ) ) )                                                                                  136
( put ( lambda $1 e ( is_a $1 " <object> " ) ( at $1 " <location> " ) ) " <location> " )                                                                     121
( bring ( lambda $1 e ( is_a $1 " <object> " ) ( at $1 " <location> " ) ) ( lambda $1 e ( person $1 ) ( at $1 " <location> " ) ( name $1 " <name> " ) ) )     59
( bring ( lambda $1 e ( is_a $1 " <object> " ) ) ( lambda $1 e ( person $1 ) ( is $1 " <gesture> " ) ( at $1 " <room> " ) ) )                                 53
( bring ( lambda $1 e

In [30]:
# def create_mini_training_set(examples_per_intent):
#     intent_array = np.array(df_train_list)[:,0]
#     mini_batch =[]
#     for intent in intents:
#         add = intent_array[intent_array==intent]
#         shuffled_indicies=np.random.RandomState(seed=42).permutation(len(add))
#         class_indicies=shuffled_indicies[:examples_per_intent]
#         sampled_set = np.array(df_train_list)[class_indicies]
#         mini_batch.append(sampled_set)
#     mini_batch = np.array(mini_batch)
#     mini_set = mini_batch.transpose(1,0,2).reshape(-1,mini_batch.shape[2])
#     return mini_set

In [16]:
import re

def get_pad_length():
    all_train_examples_sentences = df_train['utterance']
    word_length = []
    for sentence in all_train_examples_sentences:
        number_words = len(re.findall(r'\b\w+\b|<\w+>', sentence))
        word_length.append(number_words)
    return max(word_length)

PAD_LEN = get_pad_length()

In [17]:
PAD_LEN

20

In [18]:
INTENT_DIM = 31

## BERT Tokenizer


In [19]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

test_utterance = "give <object> to me"

print(tokenizer.encode_plus(
            test_utterance, add_special_tokens=True, max_length=PAD_LEN, pad_to_max_length=True,
            truncation=True, return_attention_mask=True, return_tensors='pt'
    ))

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

{'input_ids': tensor([[ 101, 2507, 1026, 4874, 1028, 2000, 2033,  102,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])}




In [20]:
def examples_to_dataset(examples):
    input_ids = []
    attention_masks = []
    labels = []
    for instance in examples:
        token_dict = tokenizer.encode_plus(
                instance[1], add_special_tokens=True, max_length=PAD_LEN, pad_to_max_length=True,
                truncation=True, return_attention_mask=True, return_tensors='pt')
        input_ids.append(token_dict['input_ids'])
        attention_masks.append(token_dict['attention_mask'])
        labels.append(torch.tensor(intent_labeltoid[instance[0]]).type(torch.LongTensor))

    input_ids = torch.cat(input_ids)
    attention_masks = torch.cat(attention_masks)
    labels = torch.stack(labels)


    dataset = TensorDataset(input_ids, attention_masks, labels)

    return dataset

In [21]:
#prepare the validation/test dataloaders
val_dataset = examples_to_dataset(df_valid_list)
test_dataset = examples_to_dataset(df_test_list)
BATCH_SIZE = 50
validation_dataloader = DataLoader(val_dataset, sampler=RandomSampler(df_valid_list), batch_size=BATCH_SIZE)
test_dataloader = DataLoader(test_dataset, sampler=SequentialSampler(df_test_list), batch_size=BATCH_SIZE)

# Training Setup

BERT will output a matrix tensor of shape batch size by #intents, which consists of the intent probabilities for each utterance in the batch. In order to compute the accuracy of the predictions, this prediction matrix is compared with the tensor of correct label IDs (a tensor of size #intents).



**The method `accuracy()`, which takes in the predictions (shape batch size by #intents) as a numpy array, and the correct labels as a numpy array (shape #intents), and returns the floating point accuracy of those predictions in the range \[0,1\]**

In [22]:
def get_accuracy(preds, labels):
    pred_convd = np.argmax(preds,1).flatten()
    labels_flat = labels.flatten()
    correct_labels = np.equal(pred_convd,labels_flat).sum()
    accuracy_value = correct_labels/len(labels)
    return accuracy_value

In [23]:
# Quick tests for the implementation of accuracy.

preds1 = np.array([[1,2,3], [1,3,2], [3,2,1]])

assert get_accuracy(preds1, np.array([2,1,0])) == 1.0
assert get_accuracy(preds1, np.array([2,2,0])) == 2/3
assert get_accuracy(preds1, np.array([3,2,1])) == 0.0

In [24]:
count=0
for batch in tqdm(list(test_dataloader)):
    a,b,c= batch[0],batch[1],batch[2]
    count+=1
    if count ==1:
        break
print(a);print(b);print(c)
print(batch)
print(len(list(test_dataloader)))

  0%|          | 0/9 [00:00<?, ?it/s]

tensor([[  101,  2507,  1026,  4874,  1028,  2000,  2033,   102,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0],
        [  101,  3531,  2507,  2033,  1996,  1026,  4874,  1028,   102,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0],
        [  101,  3531,  2507,  1026,  4874,  1028,  2000,  1026,  2171,  1028,
           102,     0,     0,     0,     0,     0,     0,     0,     0,     0],
        [  101,  3531,  3288,  1026,  4874,  1028,  2000,  1026,  2171,  1028,
           102,     0,     0,     0,     0,     0,     0,     0,     0,     0],
        [  101,  8957,  3531,  2507,  1026,  4874,  1028,  2000,  2033,   102,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0],
        [  101,  2071,  2017,  2507,  1026,  4874,  1028,  2000,  1026,  2171,
          1028,   102,     0,     0,     0,     0,     0,     0,     0,     0],
        [  101,  8957,  3531,  3288,  2033,  1

In [25]:
def evaluate(model, dataloader):
    model.eval()

    accuracy=[]

    for batch in tqdm(list(dataloader)):
        b_input_ids,b_input_mask,b_labels = batch


        with torch.no_grad():
            (loss, logits) = model(b_input_ids,
                                   token_type_ids=None,
                                   attention_mask=b_input_mask,
                                   labels=b_labels, return_dict=False)

        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()

        #logit_probability =torch.nn.Softmax(logits)
        batch_accuracy = get_accuracy(logits, label_ids)
        accuracy.append(batch_accuracy)
    avg_accuracy = np.mean(accuracy)  # TODO Compute final accuracy
    print("Validation Accuracy: {}".format(avg_accuracy))
    return avg_accuracy


- The input IDs, input mask, and labels are obtained from the dataloader. These inputs are passed through the model to get a prediction. After which the loss is computed for each batch.
- Thus the batch losses over time is monitored to compute the average training loss for each epoch. After every 5 batches,the validation accuracy is computed.

In [26]:

def train(model, dataloader, epochs):
    optimizer = AdamW(model.parameters(), lr=2e-5, eps=1e-8)
    total_steps = len(train_dataloader) * epochs
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)

    for epoch_i in range(0, EPOCHS):
        print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))

        model.train()
        #n_iteration = 0
        accuracy = []
        total_train_loss = []

        for step, batch in tqdm(list(enumerate(train_dataloader))):
            # get input IDs, input mask, and labels from batch
            b_input_ids,b_input_mask,b_labels = batch

            model.zero_grad()
            #pass inputs through model
            (loss, logits) = model(b_input_ids,
                                   token_type_ids=None,
                                   attention_mask=b_input_mask,
                                   labels=b_labels, return_dict=False)
            logits = logits.detach().cpu().numpy()
            label_ids = b_labels.to('cpu').numpy()

            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            scheduler.step()
            # Add to total_train_loss
            total_train_loss.append(loss)
            #logit_probability =torch.nn.Softmax(logits)
            batch_accuracy = get_accuracy(logits, label_ids)
            accuracy.append(batch_accuracy)
            #n_iteration += 1
        # Compute average train loss
        new_loss = [x.cpu().detach().numpy() for x in total_train_loss]
        avg_train_loss = np.mean(new_loss)
        print("  Average training loss: {0:.2f}".format(avg_train_loss))
        print("  Average Training accuracy: {0:.2f}".format(np.mean(accuracy)))
    #validation_accuracy =evaluate(bert_model, validation_dataloader)



In [31]:
BATCH_SIZE = 16

EPOCHS = 10


train_dataset = examples_to_dataset(df_train_list)
train_dataloader = DataLoader(train_dataset, sampler=RandomSampler(train_dataset), batch_size=BATCH_SIZE)


bert_model = BertForSequenceClassification.from_pretrained(
    "bert-base-uncased", # Use the 12-layer BERT model, with an uncased vocab.
    num_labels = INTENT_DIM,
    output_attentions = False, # Whether the model returns attentions weights.
    output_hidden_states = False, # Whether the model returns all hidden-states.
)

train(bert_model, train_dataloader, EPOCHS)

print("Evaluating on test set:")
print("Test accuracy:", evaluate(bert_model, test_dataloader))

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.






  0%|          | 0/87 [00:00<?, ?it/s]

  Average training loss: 2.94
  Average Training accuracy: 0.27


  0%|          | 0/87 [00:00<?, ?it/s]

  Average training loss: 1.98
  Average Training accuracy: 0.54


  0%|          | 0/87 [00:00<?, ?it/s]

  Average training loss: 1.34
  Average Training accuracy: 0.75


  0%|          | 0/87 [00:00<?, ?it/s]

  Average training loss: 0.91
  Average Training accuracy: 0.84


  0%|          | 0/87 [00:00<?, ?it/s]

  Average training loss: 0.65
  Average Training accuracy: 0.89


  0%|          | 0/87 [00:00<?, ?it/s]

  Average training loss: 0.50
  Average Training accuracy: 0.94


  0%|          | 0/87 [00:00<?, ?it/s]

  Average training loss: 0.40
  Average Training accuracy: 0.96


  0%|          | 0/87 [00:00<?, ?it/s]

  Average training loss: 0.33
  Average Training accuracy: 0.97


  0%|          | 0/87 [00:00<?, ?it/s]

  Average training loss: 0.29
  Average Training accuracy: 0.98


  0%|          | 0/87 [00:00<?, ?it/s]

  Average training loss: 0.27
  Average Training accuracy: 0.98
Evaluating on test set:


  0%|          | 0/9 [00:00<?, ?it/s]

Validation Accuracy: 0.8488888888888888
Test accuracy: 0.8488888888888888
