In [62]:
%pip install transformers



In [63]:
%pip install seqeval



In [1]:
import json
import pickle
import time
import datetime
import random
import os
import csv

import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import torch
from torch.utils.data import TensorDataset, random_split
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from transformers import BertTokenizer
from transformers import BertForSequenceClassification, AdamW, BertConfig
from transformers import get_linear_schedule_with_warmup
from seqeval.metrics import f1_score


device = torch.device("cpu")

SEED_VAL = 42

random.seed(SEED_VAL)
np.random.seed(SEED_VAL)
torch.manual_seed(SEED_VAL);  # Semicolon prevents jupyter from displaying last line as output

In [65]:
from google.colab import drive

drive.mount('/content/drive/')


Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [22]:
#SNIPS_PATH = "drive/MyDrive/generated_datasets"
TRAIN_PATH = "train.csv"
VAL_PATH = "val.csv"
TEST_PATH = "test.csv"

In [23]:
df_train = pd.read_csv(TRAIN_PATH, skipinitialspace=True)
df_valid = pd.read_csv(VAL_PATH, skipinitialspace=True)
df_test = pd.read_csv(TEST_PATH, skipinitialspace=True)

In [24]:
df_train['Subject'] = df_train['Subject'].apply(lambda x: None if x == 'me' else x)
df_valid['Subject'] = df_valid['Subject'].apply(lambda x: None if x == 'me' else x)
df_test['Subject'] = df_test['Subject'].apply(lambda x: None if x == 'me' else x)

df_train.dropna(inplace=True)
df_train = df_train.reset_index(drop=True)
df_valid.dropna(inplace=True)
df_valid = df_valid.reset_index(drop=True)
df_test.dropna(inplace=True)
df_test = df_test.reset_index(drop=True)

df_train

Unnamed: 0,Command,Subject,Target
0,"Robot, carry the scrubby from the corridor to ...",scrubby,table
1,"Robot, pick up the coke from the corridor.",coke,corridor
2,"Robot, pick up the sponge and place it in the ...",sponge,room
3,"Robot, pick up the grape juice from the living...",juice,room
4,"Robot, fetch the sprite and place it on the cu...",sprite,cupboard
...,...,...,...
3324,"Robot, pick up the sponge from the bedroom.",sponge,bedroom
3325,"Robot, get the spoon located in the living room.",spoon,room
3326,"Robot, carry the chocolate drink to the counter.",drink,counter
3327,"Robot, get me the sausages from the dining table.",sausages,table


In [25]:
df_train = df_train.replace({r'\r|\n': ''}, regex=True)
df_valid = df_valid.replace({r'\r|\n': ''}, regex=True)
df_test = df_test.replace({r'\r|\n': ''}, regex=True)

In [26]:
df_train.head()

Unnamed: 0,Command,Subject,Target
0,"Robot, carry the scrubby from the corridor to ...",scrubby,table
1,"Robot, pick up the coke from the corridor.",coke,corridor
2,"Robot, pick up the sponge and place it in the ...",sponge,room
3,"Robot, pick up the grape juice from the living...",juice,room
4,"Robot, fetch the sprite and place it on the cu...",sprite,cupboard


## Intent ID Lookup

In order to input the intent labels into our model we will need to **Create a dictionary mapping each intent name to an integer ID (starting with 0), and assign the dictionary to intent_labeltoid**.

In [27]:

intents = df_train['Target'].unique().tolist()
intents_valid = df_valid['Target'].unique().tolist()
intents_test = df_test['Target'].unique().tolist()

intents = [*intents, *intents_valid, *intents_test]

#usuwanie duplikatów
intents = list(set(intents))

intent_labeltoid = {intents[i]: i  for i in range(len(intents))}
intent_labeltoid

{'desk': 0,
 'kitchen': 1,
 'cupboard': 2,
 'bedroom': 3,
 'corridor': 4,
 'bookcase': 5,
 'counter': 6,
 'room': 7,
 'table': 8,
 'sink': 9}

In [28]:
df_train_list = df_train.values.tolist()
df_test_list = df_test.values.tolist()
df_valid_list = df_valid.values.tolist()
for lista in [df_train_list, df_test_list, df_valid_list]:
    for item in lista:
        item.reverse()
        #do trenowania subject
        # item.pop(0)
        #do trenowania target
        item.pop(1)
df_valid_list

[['room', 'Robot, pick up the coke from the dining room.'],
 ['table', 'Robot, fetch the bowl and place it on the side table.'],
 ['table', 'Robot, carry the fork to the side table.'],
 ['cupboard', 'Robot, get me the dish from the cupboard.'],
 ['table',
  'Robot, get the potato chips from the kitchen and put it on the storage table.'],
 ['room', 'Robot, pick up the pringles and place it in the dining room.'],
 ['kitchen', 'Robot, pick up the cereal from the kitchen.'],
 ['room', 'Robot, bring the scrubby to the living room.'],
 ['table', 'Robot, carry the sausages from the corridor to the end table.'],
 ['kitchen', 'Robot, fetch the sausages and bring it to the kitchen.'],
 ['table', 'Robot, fetch the sausages and place it on the storage table.'],
 ['bedroom', 'Robot, bring the noodles from the bedroom.'],
 ['table',
  'Robot, get the orange juice from the corridor and put it on the end table.'],
 ['room', 'Robot, fetch the scrubby from the living room.'],
 ['room', 'Robot, fetch the

In [29]:
#How many training examples are there for each intent?
df_train['Target'].value_counts()

Target
room        856
table       547
corridor    416
bedroom     396
kitchen     390
desk        151
counter     149
cupboard    148
bookcase    147
sink        129
Name: count, dtype: int64

In [76]:
# def create_mini_training_set(examples_per_intent):
#     intent_array = np.array(df_train_list)[:,0]
#     mini_batch =[]
#     for intent in intents:
#         add = intent_array[intent_array==intent]
#         shuffled_indicies=np.random.RandomState(seed=42).permutation(len(add))
#         class_indicies=shuffled_indicies[:examples_per_intent]
#         sampled_set = np.array(df_train_list)[class_indicies]
#         mini_batch.append(sampled_set)
#     mini_batch = np.array(mini_batch)
#     mini_set = mini_batch.transpose(1,0,2).reshape(-1,mini_batch.shape[2])
#     return mini_set

In [30]:
import re

def get_pad_length():
    all_train_examples_sentences = df_train['Command']
    word_length = []
    for sentence in all_train_examples_sentences:
        number_words = len(re.findall(r'\b\w+\b|<\w+>', sentence))
        word_length.append(number_words)
    return max(word_length)

PAD_LEN = get_pad_length()

In [31]:
PAD_LEN

16

In [32]:
INTENT_DIM = 10

## BERT Tokenizer


In [33]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

test_utterance = "Robot, get the knife located in the dining room."

print(tokenizer.encode_plus(
            test_utterance, add_special_tokens=True, max_length=PAD_LEN, pad_to_max_length=True,
            truncation=True, return_attention_mask=True, return_tensors='pt'
    ))

{'input_ids': tensor([[ 101, 8957, 1010, 2131, 1996, 5442, 2284, 1999, 1996, 7759, 2282, 1012,
          102,    0,    0,    0]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0]])}




In [34]:
def examples_to_dataset(examples):
    input_ids = []
    attention_masks = []
    labels = []
    for instance in examples:
        token_dict = tokenizer.encode_plus(
                instance[1], add_special_tokens=True, max_length=PAD_LEN, pad_to_max_length=True,
                truncation=True, return_attention_mask=True, return_tensors='pt')
        input_ids.append(token_dict['input_ids'])
        attention_masks.append(token_dict['attention_mask'])
        labels.append(torch.tensor(intent_labeltoid[instance[0]]).type(torch.LongTensor))

    input_ids = torch.cat(input_ids)
    attention_masks = torch.cat(attention_masks)
    labels = torch.stack(labels)


    dataset = TensorDataset(input_ids, attention_masks, labels)

    return dataset

In [35]:
#prepare the validation/test dataloaders
val_dataset = examples_to_dataset(df_valid_list)
test_dataset = examples_to_dataset(df_test_list)
BATCH_SIZE = 50
validation_dataloader = DataLoader(val_dataset, sampler=RandomSampler(df_valid_list), batch_size=BATCH_SIZE)
test_dataloader = DataLoader(test_dataset, sampler=SequentialSampler(df_test_list), batch_size=BATCH_SIZE)

# Training Setup

BERT will output a matrix tensor of shape batch size by #intents, which consists of the intent probabilities for each utterance in the batch. In order to compute the accuracy of the predictions, this prediction matrix is compared with the tensor of correct label IDs (a tensor of size #intents).



**The method `accuracy()`, which takes in the predictions (shape batch size by #intents) as a numpy array, and the correct labels as a numpy array (shape #intents), and returns the floating point accuracy of those predictions in the range \[0,1\]**

In [36]:
def get_accuracy(preds, labels):
    pred_convd = np.argmax(preds,1).flatten()
    labels_flat = labels.flatten()
    correct_labels = np.equal(pred_convd,labels_flat).sum()
    accuracy_value = correct_labels/len(labels)
    return accuracy_value

In [37]:
# Quick tests for the implementation of accuracy.

preds1 = np.array([[1,2,3], [1,3,2], [3,2,1]])

assert get_accuracy(preds1, np.array([2,1,0])) == 1.0
assert get_accuracy(preds1, np.array([2,2,0])) == 2/3
assert get_accuracy(preds1, np.array([3,2,1])) == 0.0

In [38]:
count=0
for batch in tqdm(list(test_dataloader)):
    a,b,c= batch[0],batch[1],batch[2]
    count+=1
    if count ==1:
        break
print(a);print(b);print(c)
print(batch)
print(len(list(test_dataloader)))

  0%|          | 0/15 [00:00<?, ?it/s]

tensor([[  101,  8957,  1010,  4060,  2039,  1996,  9841,  2013,  1996,  2542,
          2282,  1012,   102,     0,     0,     0],
        [  101,  8957,  1010,  4287,  1996,  5442,  2013,  1996,  2203,  2795,
          2000,  1996,  2542,  2282,  1012,   102],
        [  101,  8957,  1010,  3288,  1996,  8416,  2000,  1996,  7759,  2282,
          1012,   102,     0,     0,     0,     0],
        [  101,  8957,  1010,  2131,  1996, 18157,  3762,  2013,  1996,  7759,
          2282,  1998,  2404,  2009,  2006,   102],
        [  101,  8957,  1010, 18584,  1996,  5442,  1998,  2173,  2009,  2006,
          1996, 25337,  1012,   102,     0,     0],
        [  101,  8957,  1010, 18584,  1996, 11867, 17625,  1998,  3288,  2009,
          2000,  1996,  7759,  2282,  1012,   102],
        [  101,  8957,  1010,  4287,  1996,  8416,  2013,  1996,  2542,  2282,
          2000,  1996,  2217,  2795,  1012,   102],
        [  101,  8957,  1010,  3288,  1996,  8579,  2545,  2013,  1996,  7759,
    

In [39]:
def evaluate(model, dataloader):
    model.eval()

    accuracy=[]

    for batch in tqdm(list(dataloader)):
        b_input_ids,b_input_mask,b_labels = batch


        with torch.no_grad():
            (loss, logits) = model(b_input_ids,
                                   token_type_ids=None,
                                   attention_mask=b_input_mask,
                                   labels=b_labels, return_dict=False)

        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()

        #logit_probability =torch.nn.Softmax(logits)
        batch_accuracy = get_accuracy(logits, label_ids)
        accuracy.append(batch_accuracy)
    avg_accuracy = np.mean(accuracy)  # TODO Compute final accuracy
    print("Validation Accuracy: {}".format(avg_accuracy))
    return avg_accuracy


- The input IDs, input mask, and labels are obtained from the dataloader. These inputs are passed through the model to get a prediction. After which the loss is computed for each batch.
- Thus the batch losses over time is monitored to compute the average training loss for each epoch. After every 5 batches,the validation accuracy is computed.

In [40]:

def train(model, dataloader, epochs):
    optimizer = AdamW(model.parameters(), lr=2e-5, eps=1e-8)
    total_steps = len(train_dataloader) * epochs
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)

    for epoch_i in range(0, EPOCHS):
        print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))

        model.train()
        #n_iteration = 0
        accuracy = []
        total_train_loss = []

        for step, batch in tqdm(list(enumerate(train_dataloader))):
            # get input IDs, input mask, and labels from batch
            b_input_ids,b_input_mask,b_labels = batch

            model.zero_grad()
            #pass inputs through model
            (loss, logits) = model(b_input_ids,
                                   token_type_ids=None,
                                   attention_mask=b_input_mask,
                                   labels=b_labels, return_dict=False)
            logits = logits.detach().cpu().numpy()
            label_ids = b_labels.to('cpu').numpy()

            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            scheduler.step()
            # Add to total_train_loss
            total_train_loss.append(loss)
            #logit_probability =torch.nn.Softmax(logits)
            batch_accuracy = get_accuracy(logits, label_ids)
            accuracy.append(batch_accuracy)
            #n_iteration += 1
        # Compute average train loss
        new_loss = [x.cpu().detach().numpy() for x in total_train_loss]
        avg_train_loss = np.mean(new_loss)
        print("  Average training loss: {0:.2f}".format(avg_train_loss))
        print("  Average Training accuracy: {0:.2f}".format(np.mean(accuracy)))
    #validation_accuracy =evaluate(bert_model, validation_dataloader)



In [41]:
BATCH_SIZE = 16

EPOCHS = 5


train_dataset = examples_to_dataset(df_train_list)
train_dataloader = DataLoader(train_dataset, sampler=RandomSampler(train_dataset), batch_size=BATCH_SIZE)


bert_model = BertForSequenceClassification.from_pretrained(
    "bert-base-uncased", # Use the 12-layer BERT model, with an uncased vocab.
    num_labels = INTENT_DIM,
    output_attentions = False, # Whether the model returns attentions weights.
    output_hidden_states = False, # Whether the model returns all hidden-states.
)

train(bert_model, train_dataloader, EPOCHS)

print("Evaluating on test set:")
print("Test accuracy:", evaluate(bert_model, test_dataloader))

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




  0%|          | 0/209 [00:00<?, ?it/s]

  Average training loss: 0.95
  Average Training accuracy: 0.73


  0%|          | 0/209 [00:00<?, ?it/s]

  Average training loss: 0.11
  Average Training accuracy: 0.98


  0%|          | 0/209 [00:00<?, ?it/s]

  Average training loss: 0.08
  Average Training accuracy: 0.98


  0%|          | 0/209 [00:00<?, ?it/s]

  Average training loss: 0.08
  Average Training accuracy: 0.97


  0%|          | 0/209 [00:00<?, ?it/s]

  Average training loss: 0.07
  Average Training accuracy: 0.98
Evaluating on test set:


  0%|          | 0/15 [00:00<?, ?it/s]

Validation Accuracy: 0.9848888888888889
Test accuracy: 0.9848888888888889
