# Setting Up GPU / CPU

In [None]:
! pip install torch

In [5]:
# Activate GPU for faster training by clicking on 'Runtime' > 'Change runtime type' and then selecting GPU as the Hardware accelerator
# Then check if GPU is available
import torch
torch.cuda.is_available()

True

In [6]:
# Setting up the device for GPU usage
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'
print(device)

cuda


# Installing and Loading Required Packages

In [None]:
! pip install transformers torch pandas numpy datasets accelerate scikit-learn

In [14]:
from datasets import Dataset, load_metric
from transformers import AutoTokenizer, AutoModelForSequenceClassification, DataCollatorWithPadding, Trainer, TrainerCallback, TrainingArguments
from torch.utils.data import DataLoader

In [15]:
from sklearn.metrics import f1_score, accuracy_score
from sklearn.metrics import classification_report

In [16]:
import pandas as pd
import numpy as np

In [17]:
import os

# Importing The Dataset

## Bitext - Customer Service Tagged Training Dataset

### Overview

This dataset can be used to train intent recognition models on Natural Language Understanding (NLU) platforms: LUIS, Dialogflow, Lex, RASA and any other NLU platform that accepts text as input.

The training dataset contains 8,100 utterances (300 per intent), because most platforms limit the number of utterances that can be used for training

## Cleaning up the dataset

From the dataset, we are removing the categories which is not having less than 3 intents, which removes the following categories:
- CANCELLATION_FEE
- FEEDBACK
- NEWSLETTER

In [21]:
#navigate to parent directory
parent_dir = os.path.dirname(os.getcwd())

#setting the filenames
training_file = os.path.join(parent_dir, 'data/train/Bitext_Sample_Customer_Service_Training_Dataset.csv')
testing_file  = os.path.join(parent_dir, 'data/test/Bitext_Sample_Customer_Service_Testing_Dataset.csv')

#opening the files
training_df = pd.read_csv(training_file)
testing_df  = pd.read_csv(testing_file)

In [22]:
training_df.head()

Unnamed: 0,utterance,intent,entity_type,entity_value,start_offset,end_offset,category,tags
0,how can I cancel purchase 113542617735902?,cancel_order,order_id,113542617735902,26.0,41.0,ORDER,BIL
1,can you help me canceling purchase 00004587345?,cancel_order,order_id,4587345,35.0,46.0,ORDER,BIL
2,i want assistance to cancel purchase 732201349959,cancel_order,order_id,732201349959,37.0,49.0,ORDER,BLQ
3,i want assistance to cancel order 732201349959,cancel_order,order_id,732201349959,34.0,46.0,ORDER,BQ
4,"I don't want my last item, help me cancel orde...",cancel_order,order_id,370795561790,48.0,60.0,ORDER,BCLN


In [23]:
testing_df.head()

Unnamed: 0,utterance,intent,entity_type,entity_value,start_offset,end_offset,category,tags
0,I do not know how I can cancel purchase 00123842,cancel_order,order_id,123842,40.0,48.0,ORDER,BEL
1,help to cancel purchase 00004587345,cancel_order,order_id,4587345,24.0,35.0,ORDER,BL
2,cancelling purchase 00123842,cancel_order,order_id,123842,20.0,28.0,ORDER,BKL
3,cancel purchase 00004587345,cancel_order,order_id,4587345,16.0,27.0,ORDER,BKL
4,I don't know how to cancel order 732201349959,cancel_order,order_id,732201349959,34.0,46.0,ORDER,BZ


In [25]:
# retain only categories with intents more than 2
training_df = training_df[training_df["category"].isin(['ACCOUNT', 'CONTACT', 'ORDER', 'PAYMENT', 'REFUND', 'SHIPPING_ADDRESS'])]
# retain only categories with intents more than 2
testing_df = testing_df[testing_df["category"].isin(['ACCOUNT', 'CONTACT', 'ORDER', 'PAYMENT', 'REFUND', 'SHIPPING_ADDRESS'])]

In [26]:
def dataset_preprocess(input_df):
    # drop other columns
    # keep only utterance, intent and tags
    df = input_df[['utterance','intent', 'tags']]

    df['encoded_intent'] = df['intent'].astype('category').cat.codes
    df['labels'] = df['tags'].apply(lambda x: [1 if letter in x else 0 for letter in 'QPWKBCIMLEZ'])

    columns_to_drop = ['intent', 'tags']
    new_df = df.drop(columns_to_drop, axis=1)
    # new_df.head()

    return new_df

In [None]:
train_data = dataset_preprocess(training_df)
test_data  = dataset_preprocess(testing_df)

In [28]:
train_data.head()

Unnamed: 0,utterance,encoded_intent,labels
0,how can I cancel purchase 113542617735902?,0,"[0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0]"
1,can you help me canceling purchase 00004587345?,0,"[0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0]"
2,i want assistance to cancel purchase 732201349959,0,"[1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0]"
3,i want assistance to cancel order 732201349959,0,"[1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]"
4,"I don't want my last item, help me cancel orde...",0,"[0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0]"


In [29]:
test_data.head()

Unnamed: 0,utterance,encoded_intent,labels
0,I do not know how I can cancel purchase 00123842,0,"[0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0]"
1,help to cancel purchase 00004587345,0,"[0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0]"
2,cancelling purchase 00123842,0,"[0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0]"
3,cancel purchase 00004587345,0,"[0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0]"
4,I don't know how to cancel order 732201349959,0,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1]"


# PreProcessing and Tokenization

In [52]:
# Set DistilBERT tokenizer
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

In [53]:
# Prepare the text inputs for the model
def preprocess_function(examples):

    intent_labels = np.zeros(19)

    # assigning the labels for intents
    for i in range(len(intent_labels)):
      intent_value = examples["encoded_intent"]
      intent_labels[intent_value] = 1

    # tags labels
    tags = np.array(examples["labels"])

    labels = np.append(intent_labels, tags)

    examples = tokenizer(examples["utterance"], truncation=True, padding="max_length", max_length=128)
    examples["label"] = labels

    # print(len(examples["label"]))

    return examples

In [55]:
# testing the preprocess_function
preprocess_function({
    "utterance": "i want assistance to cancel order 732201349959",
    "encoded_intent": 0,
    "labels": [0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0]
})

{'input_ids': [101, 1045, 2215, 5375, 2000, 17542, 2344, 6421, 19317, 24096, 22022, 2683, 2683, 28154, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'label': array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0.])}

In [56]:
# Creating the dataset and dataloader for the neural network

print("TRAIN Dataset: {}".format(train_data.shape))
print("TEST Dataset: {}".format(test_data.shape))

from datasets import Dataset

train_dataset = Dataset.from_pandas(train_data)
test_dataset = Dataset.from_pandas(test_data)

TRAIN Dataset: (4580, 3)
TEST Dataset: (565, 3)


In [57]:
# Prepare the text inputs for the model

tokenized_train = train_dataset.map(preprocess_function)
tokenized_test = test_dataset.map(preprocess_function)

Map: 100%|█████████████████████| 4580/4580 [00:00<00:00, 6460.40 examples/s]
Map: 100%|███████████████████████| 565/565 [00:00<00:00, 6628.15 examples/s]


# TRAINING

In [98]:
# set the training parameters

LEARNING_RATE = 1e-05
MAX_LENGTH = 128
BATCH_SIZE = 4
EPOCHS = 10
TRAIN_BATCH_SIZE = 4
VALID_BATCH_SIZE = 4

In [99]:
# Use data_collector to convert our samples to PyTorch tensors and concatenate them with the correct amount of padding
from transformers import DataCollatorWithPadding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [100]:
# Define DistilBERT as our base model:
from transformers import AutoModelForSequenceClassification
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=30) #total output features = 19+11 = 30
model.to(device)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'classifier.bias', 'classifier.weight', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
 

## OPTIMIZER

In [None]:
! pip install torch_optimizer

In [102]:
import torch_optimizer as optim
from transformers import AdamW

In [103]:
optimizer = torch.optim.Adam(params =  model.parameters(), lr=LEARNING_RATE)

# Prepare the Metrics

In [44]:
INTENT_INDICES = range(0,19)
TAGS_INDICES   = range(19,30)

In [45]:
# Apply two logics for Multiclass Columns and Multilabel Columns
def get_preds_from_logits(logits):
    ret = np.zeros(logits.shape)

    # The first 19 columns are for customer intents. They should be handled with a multiclass approach
    # i.e. we fill 1 to the class with highest probability, and 0 into the other columns
    best_class = np.argmax(logits[:, INTENT_INDICES], axis=-1)
    ret[list(range(len(ret))), best_class] = 1

    # The other columns are for register tags. They should be handled with multilabel approach.
    # i.e. we fill 1 to every class whose score is higher than some threshold
    # In this example, we choose that threshold = 0
    ret[:, TAGS_INDICES] = (logits[:, TAGS_INDICES] >= 0).astype(int)

    return ret

In [106]:
# Let’s look at an example by generating a random 30-dimensional vector whose columns are between -2 and 2
example = np.random.uniform(-2, 2, (1, 30))
example

array([[-0.50183952,  1.80285723,  0.92797577,  0.39463394, -1.37592544,
        -1.37602192, -1.76766555,  1.46470458,  0.40446005,  0.83229031,
        -1.91766202,  1.87963941,  1.32977056, -1.15064356, -1.27270013,
        -1.26638196, -0.78303103,  0.09902573, -0.27221993, -0.83508344,
         0.44741158, -1.44202456, -0.83142141, -0.53455263, -0.17572006,
         1.14070385, -1.20130487,  0.05693775,  0.36965828, -1.81419835]])

In [107]:
# compute the predicted vector
get_preds_from_logits(example)[0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
       0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 1., 1., 0.])

In [108]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    final_metrics = {}

    # Deduce predictions from logits
    predictions = get_preds_from_logits(logits)

    # Get f1 metrics for global scoring. Notice that f1_micro = accuracy
    final_metrics["f1_micro_for_intents"] = f1_score(labels[:, INTENT_INDICES], predictions[:, INTENT_INDICES], average="micro")
    final_metrics["f1_macro_for_intents"] = f1_score(labels[:, INTENT_INDICES], predictions[:, INTENT_INDICES], average="macro")

    # Get f1 metrics for causes
    final_metrics["f1_micro_for_tags"] = f1_score(labels[:, TAGS_INDICES], predictions[:, TAGS_INDICES], average="micro")
    final_metrics["f1_macro_for_tags"] = f1_score(labels[:, TAGS_INDICES], predictions[:, TAGS_INDICES], average="macro")

    # The global f1_metrics
    final_metrics["f1_micro"] = f1_score(labels, predictions, average="micro")
    final_metrics["f1_macro"] = f1_score(labels, predictions, average="macro")

    # Classification report
    print("Classification report for intents: ")
    print(classification_report(labels[:, INTENT_INDICES], predictions[:, INTENT_INDICES], zero_division=0))
    print("Classification report for tags: ")
    print(classification_report(labels[:, TAGS_INDICES], predictions[:, TAGS_INDICES], zero_division=0))

    return final_metrics

# LOSS FUNCTIONS

In [119]:
class MultiTaskClassificationTrainer(Trainer):
    def __init__(self, group_weights=None, **kwargs):
        super().__init__(**kwargs)
        self.group_weights = group_weights

    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs[0]

        intent_loss = torch.nn.functional.cross_entropy(logits[:, INTENT_INDICES], labels[:, INTENT_INDICES])
        tags_loss = torch.nn.functional.binary_cross_entropy_with_logits(logits[:, TAGS_INDICES], labels[:, TAGS_INDICES])

        loss = self.group_weights[0] * intent_loss + self.group_weights[1] * tags_loss
        
        return (loss, outputs) if return_outputs else loss


In [110]:
# print epoch number at each step
class PrinterCallback(TrainerCallback):
    def on_epoch_end(self, args, state, control, logs=None, **kwargs):
        print(f"Epoch {state.epoch}: ")

# TRAINER

### Accelerator Workaround

In [111]:
import accelerate
import transformers

transformers.__version__, accelerate.__version__

('4.31.0', '0.21.0')

In [122]:
# Define a new Trainer with all the objects we constructed so far

training_args = TrainingArguments(
    output_dir="./models/",
    learning_rate=LEARNING_RATE,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    num_train_epochs=EPOCHS,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=2,
    metric_for_best_model="f1_macro",
    load_best_model_at_end=True,
    weight_decay=0.01,
    optim='adamw_torch',
)

trainer = MultiTaskClassificationTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
    compute_metrics=compute_metrics,
    callbacks=[PrinterCallback],
    data_collator=data_collator,
    group_weights=(0.8, 2),
)

In [123]:
trainer.train()

Epoch,Training Loss,Validation Loss,F1 Micro For Intents,F1 Macro For Intents,F1 Micro For Tags,F1 Macro For Tags,F1 Micro,F1 Macro
1,0.5453,0.442694,0.99115,0.990726,0.86539,0.528233,0.896796,0.821145
2,0.327,0.274998,0.99292,0.992236,0.919195,0.739265,0.937084,0.89948
3,0.2178,0.200191,0.99292,0.992385,0.952671,0.912326,0.96226,0.96303
4,0.1499,0.15998,0.99292,0.992569,0.961103,0.932756,0.968665,0.970637
5,0.1253,0.159863,0.99469,0.994265,0.96327,0.947895,0.970743,0.977263
6,0.1042,0.14966,0.99646,0.996181,0.963187,0.950154,0.971069,0.979305
7,0.0838,0.132744,0.99646,0.996181,0.96599,0.955751,0.973199,0.981357
8,0.0784,0.141871,0.99646,0.996181,0.965593,0.953306,0.972916,0.98046
9,0.075,0.135596,0.99646,0.996181,0.966484,0.956953,0.973585,0.981798
10,0.0691,0.136554,0.99646,0.996181,0.965706,0.956191,0.972984,0.981518


Epoch 1.0: 
Classification report for intents: 
              precision    recall  f1-score   support

           0       0.97      1.00      0.98        29
           1       1.00      0.95      0.98        21
           2       1.00      1.00      1.00        28
           3       1.00      1.00      1.00        20
           4       0.98      1.00      0.99        40
           5       0.98      1.00      0.99        41
           6       1.00      1.00      1.00        36
           7       1.00      0.96      0.98        27
           8       1.00      1.00      1.00        40
           9       0.97      1.00      0.98        28
          10       1.00      0.92      0.96        26
          11       1.00      1.00      1.00        38
          12       0.96      1.00      0.98        27
          13       1.00      1.00      1.00        20
          14       1.00      1.00      1.00        36
          15       1.00      0.97      0.98        32
          16       1.00      1.00

TrainOutput(global_step=11450, training_loss=0.18457267278146536, metrics={'train_runtime': 969.4901, 'train_samples_per_second': 47.241, 'train_steps_per_second': 11.81, 'total_flos': 1517509089792000.0, 'train_loss': 0.18457267278146536, 'epoch': 10.0})

In [124]:
trainer.evaluate()

Classification report for intents: 
              precision    recall  f1-score   support

           0       0.97      1.00      0.98        29
           1       1.00      0.95      0.98        21
           2       1.00      1.00      1.00        28
           3       1.00      1.00      1.00        20
           4       1.00      1.00      1.00        40
           5       1.00      1.00      1.00        41
           6       1.00      1.00      1.00        36
           7       1.00      0.96      0.98        27
           8       0.98      1.00      0.99        40
           9       1.00      1.00      1.00        28
          10       1.00      1.00      1.00        26
          11       1.00      1.00      1.00        38
          12       1.00      1.00      1.00        27
          13       1.00      1.00      1.00        20
          14       1.00      1.00      1.00        36
          15       1.00      1.00      1.00        32
          16       1.00      1.00      1.00  

{'eval_loss': 0.1355958878993988,
 'eval_f1_micro_for_intents': 0.9964601769911504,
 'eval_f1_macro_for_intents': 0.9961814210533968,
 'eval_f1_micro_for_tags': 0.9664835164835165,
 'eval_f1_macro_for_tags': 0.956952796160882,
 'eval_f1_micro': 0.9735849056603774,
 'eval_f1_macro': 0.9817975919261415,
 'eval_runtime': 2.3879,
 'eval_samples_per_second': 236.612,
 'eval_steps_per_second': 59.467,
 'epoch': 10.0}

In [127]:
# Save the model for future use
model.save_pretrained('./models')

# Prediction

In [31]:
from transformers import DistilBertForSequenceClassification, DistilBertTokenizer

In [36]:
import os

# Load the custom pretrained model

current_directory = os.getcwd()
parent_directory = os.path.dirname(current_directory)
grandparent_directory = os.path.dirname(parent_directory)

#navigate to model directory
model_path = os.path.join(grandparent_directory, 'models')

model = DistilBertForSequenceClassification.from_pretrained(model_path)

# Load the tokenizer
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

In [37]:
model = model.to(device)

In [38]:
input_texts =  "i have bought the same item twice cancel order 00123842"

In [40]:
# Encode the text
encoded_input = tokenizer(input_texts,
                          truncation=True,
                          padding="max_length",
                          max_length=128,
                          return_tensors='pt').to(device)

In [41]:
# Call the model to predict under the format of logits of 30 classes
logits = model(**encoded_input).logits.cpu().detach().numpy()

In [46]:
# Decode the result
preds = get_preds_from_logits(logits)

In [47]:
preds

array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 1., 0., 0.]])

## TRANSLATING OUTPUTS FROM OUTPUT LABELS

In [48]:
# LABEL DICTIONARY

intents_map = {
    0: 'cancel_order',
    1: 'change_order',
    2: 'change_shipping_address',
    3: 'check_payment_methods',
    4: 'check_refund_policy',
    5: 'contact_customer_service',
    6: 'contact_human_agent',
    7: 'create_account',
    8: 'delete_account',
    9: 'edit_account',
   10: 'get_refund',
   11: 'payment_issue',
   12: 'place_order',
   13: 'recover_password',
   14: 'registration_problems',
   15: 'set_up_shipping_address',
   16: 'switch_account',
   17: 'track_order',
   18: 'track_refund'
}

tags_labels = np.array(['Q', 'P', 'W', 'K', 'B', 'C', 'I', 'M', 'L', 'E', 'Z'])

In [49]:
intents_array = preds[0][:19]
tags_array = preds[0][19:]

In [50]:
intents_array

array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0.])

In [51]:
tags_array

array([0., 0., 0., 0., 1., 1., 0., 0., 1., 0., 0.])

### Intent Prediction

In [52]:
intent = intents_map[intents_array.argmax()]

In [53]:
intent

'cancel_order'

### Language Prediction

In [54]:
output_tag = ""

for i in np.where(tags_array == 1)[0]:
  output_tag += tags_labels[i]

In [55]:
output_tag

'BCL'