In [None]:
# Install Transformers
!pip install transformers==3
# To get model summary
!pip install torchinfo

Collecting transformers==3
  Using cached transformers-3.0.0-py3-none-any.whl (754 kB)
Collecting tokenizers==0.8.0-rc4 (from transformers==3)
  Using cached tokenizers-0.8.0rc4.tar.gz (96 kB)
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting sentencepiece (from transformers==3)
  Using cached sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
Collecting sacremoses (from transformers==3)
  Using cached sacremoses-0.1.1-py3-none-any.whl (897 kB)
Building wheels for collected packages: tokenizers
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mBuilding wheel for tokenizers [0m[1;32m([0m[32mpyproject.toml[0m[1;32m)[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is 

In [None]:
import numpy as np
import pandas as pd
import re
import torch
import random
import torch.nn as nn
import transformers
import matplotlib.pyplot as plt
# specify GPU
device = torch.device("cuda")

In [None]:
df = pd.read_csv("/content/complaints_data.csv")
df.head()

Unnamed: 0,Customer ID,Date of Complain,Time of Complain,Complaint,Response,Intent
0,1001,2022-01-01,10:30 AM,Received damaged product,We apologize for the inconvenience. Please pro...,Product Issue
1,1002,2022-01-02,02:45 PM,Package not delivered on time,We're sorry for the delay. Please share your o...,Delivery Issue
2,1003,2022-01-03,09:15 AM,Website login not working,Thank you for bringing this to our attention. ...,Product Issue
3,1004,2022-01-04,03:30 PM,Wrong item received in order,We apologize for the mix-up. Please contact ou...,Order Fulfillment Error
4,1005,2022-01-05,11:00 AM,Unable to apply discount code,We're sorry for the inconvenience. Please doub...,Positive Customer Service


In [None]:
df['Intent'].value_counts()

Order Fulfillment Error      61
Product Issue                58
Positive Customer Service    56
Positive Feedback            15
Delivery Issue               10
Name: Intent, dtype: int64

In [None]:
# Converting the labels into encodings
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['label'] = le.fit_transform(df['Intent'])
# check class distribution
df['label'].value_counts(normalize = True)

1    0.305
4    0.290
2    0.280
3    0.075
0    0.050
Name: label, dtype: float64

In [None]:
train_text, train_labels = df["Complaint"], df["label"]

In [None]:
# from transformers import AutoModel, BertTokenizerFast
# # Load the BERT tokenizer
# tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
# # Import BERT-base pretrained model
# bert = AutoModel.from_pretrained('bert-base-uncased')

In [None]:
# from transformers import RobertaTokenizer, RobertaModel
# # Load the Roberta tokenizer
# tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
# # Import Roberta pretrained model
# bert = RobertaModel.from_pretrained('roberta-base')

In [None]:
from transformers import DistilBertTokenizer, DistilBertModel
# Load the DistilBert tokenizer
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
# Import the DistilBert pretrained model
bert = DistilBertModel.from_pretrained('distilbert-base-uncased')

In [None]:
# tokenize and encode sequences in the training set
max_seq_len = 8
tokens_train = tokenizer(
    train_text.tolist(),
    max_length = max_seq_len,
    pad_to_max_length=True,
    truncation=True,
    return_token_type_ids=False
)



In [None]:
train_seq = torch.tensor(tokens_train['input_ids'])
train_mask = torch.tensor(tokens_train['attention_mask'])
train_y = torch.tensor(train_labels.tolist())

In [None]:
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
#define a batch size
batch_size = 16
# wrap tensors
train_data = TensorDataset(train_seq, train_mask, train_y)
# sampler for sampling the data during training
train_sampler = RandomSampler(train_data)
# DataLoader for train set
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

In [None]:
class BERT_Arch(nn.Module):
   def __init__(self, bert):
       super(BERT_Arch, self).__init__()
       self.bert = bert

       # dropout layer
       self.dropout = nn.Dropout(0.2)

       # relu activation function
       self.relu =  nn.ReLU()
       # dense layer
       self.fc1 = nn.Linear(768,512)
       self.fc2 = nn.Linear(512,256)
       self.fc3 = nn.Linear(256,5)
       #softmax activation function
       self.softmax = nn.LogSoftmax(dim=1)
       #define the forward pass
   def forward(self, sent_id, mask):
      #pass the inputs to the model
      cls_hs = self.bert(sent_id, attention_mask=mask)[0][:,0]

      x = self.fc1(cls_hs)
      x = self.relu(x)
      x = self.dropout(x)

      x = self.fc2(x)
      x = self.relu(x)
      x = self.dropout(x)
      # output layer
      x = self.fc3(x)

      # apply softmax activation
      x = self.softmax(x)
      return x

In [None]:
for param in bert.parameters():
      param.requires_grad = False
model = BERT_Arch(bert)
# push the model to GPU
# model = model.to(device)
from torchinfo import summary
summary(model)

Layer (type:depth-idx)                                  Param #
BERT_Arch                                               --
├─DistilBertModel: 1-1                                  --
│    └─Embeddings: 2-1                                  --
│    │    └─Embedding: 3-1                              (23,440,896)
│    │    └─Embedding: 3-2                              (393,216)
│    │    └─LayerNorm: 3-3                              (1,536)
│    │    └─Dropout: 3-4                                --
│    └─Transformer: 2-2                                 --
│    │    └─ModuleList: 3-5                             (42,527,232)
├─Dropout: 1-2                                          --
├─ReLU: 1-3                                             --
├─Linear: 1-4                                           393,728
├─Linear: 1-5                                           131,328
├─Linear: 1-6                                           1,285
├─LogSoftmax: 1-7                                       --
Total 

In [None]:
from transformers import AdamW
# define the optimizer
optimizer = AdamW(model.parameters(), lr = 1e-3)



In [None]:
from sklearn.utils.class_weight import compute_class_weight
#compute the class weights
# print(np.unique(train_labels))
# print(train_labels)

class_labels = np.unique(train_labels)
class_wts = compute_class_weight(class_weight ='balanced', classes=class_labels, y=np.array(train_labels))

class_weights = dict(zip(class_labels, class_wts))
# print(class_wts)

In [None]:
print(class_wts)

[4.         0.6557377  0.71428571 2.66666667 0.68965517]


In [None]:
# convert class weights to tensor
weights= torch.tensor(class_wts,dtype=torch.float)
# weights = weights.to(device)
# loss function
cross_entropy = nn.NLLLoss(weight=weights)

In [None]:
from torch.optim import lr_scheduler
# empty lists to store training and validation loss of each epoch
train_losses=[]
# number of training epochs
epochs = 200
# We can also use learning rate scheduler to achieve better results
lr_sch = lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.1)


In [None]:
def train():

  model.train()
  total_loss = 0

  # empty list to save model predictions
  total_preds=[]

  # iterate over batches
  for step,batch in enumerate(train_dataloader):

    # progress update after every 50 batches.
    if step % 50 == 0 and not step == 0:
      print('  Batch {:>5,}  of  {:>5,}.'.format(step,    len(train_dataloader)))
    # push the batch to gpu
    batch = [r.to(device) for r in batch]
    sent_id, mask, labels = batch
    # get model predictions for the current batch
    preds = model(sent_id, mask)
    # compute the loss between actual and predicted values
    loss = cross_entropy(preds, labels)
    # add on to the total loss
    total_loss = total_loss + loss.item()
    # backward pass to calculate the gradients
    loss.backward()
    # clip the the gradients to 1.0. It helps in preventing the exploding gradient problem
    torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
    # update parameters
    optimizer.step()
    # clear calculated gradients
    optimizer.zero_grad()

    # We are not using learning rate scheduler as of now
    # lr_sch.step()
    # model predictions are stored on GPU. So, push it to CPU
    preds=preds.detach().cpu().numpy()
    # append the model predictions
    total_preds.append(preds)
# compute the training loss of the epoch
  avg_loss = total_loss / len(train_dataloader)

# predictions are in the form of (no. of batches, size of batch, no. of classes).
# reshape the predictions in form of (number of samples, no. of classes)
  total_preds  = np.concatenate(total_preds, axis=0)
#returns the loss and predictions
  return avg_loss, total_preds

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cpu


In [None]:
for epoch in range(epochs):

    print('\n Epoch {:} / {:}'.format(epoch + 1, epochs))

    model.to(device)
    #train model
    train_loss, _ = train()

    # append training and validation loss
    train_losses.append(train_loss)
    # it can make your experiment reproducible, similar to set  random seed to all options where there needs a random seed.
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
print(f'\nTraining Loss: {train_loss:.3f}')


 Epoch 1 / 200

 Epoch 2 / 200

 Epoch 3 / 200

 Epoch 4 / 200

 Epoch 5 / 200

 Epoch 6 / 200

 Epoch 7 / 200

 Epoch 8 / 200

 Epoch 9 / 200

 Epoch 10 / 200

 Epoch 11 / 200

 Epoch 12 / 200

 Epoch 13 / 200

 Epoch 14 / 200

 Epoch 15 / 200

 Epoch 16 / 200

 Epoch 17 / 200

 Epoch 18 / 200

 Epoch 19 / 200

 Epoch 20 / 200

 Epoch 21 / 200

 Epoch 22 / 200

 Epoch 23 / 200

 Epoch 24 / 200

 Epoch 25 / 200

 Epoch 26 / 200

 Epoch 27 / 200

 Epoch 28 / 200

 Epoch 29 / 200

 Epoch 30 / 200

 Epoch 31 / 200

 Epoch 32 / 200

 Epoch 33 / 200

 Epoch 34 / 200

 Epoch 35 / 200

 Epoch 36 / 200

 Epoch 37 / 200

 Epoch 38 / 200

 Epoch 39 / 200

 Epoch 40 / 200

 Epoch 41 / 200

 Epoch 42 / 200

 Epoch 43 / 200

 Epoch 44 / 200

 Epoch 45 / 200

 Epoch 46 / 200

 Epoch 47 / 200

 Epoch 48 / 200

 Epoch 49 / 200

 Epoch 50 / 200

 Epoch 51 / 200

 Epoch 52 / 200

 Epoch 53 / 200

 Epoch 54 / 200

 Epoch 55 / 200

 Epoch 56 / 200

 Epoch 57 / 200

 Epoch 58 / 200

 Epoch 59 / 200

 Epoc

In [None]:
def get_prediction(str):
 str = re.sub(r'[^a-zA-Z ]+', '', str)
 test_text = [str]
 model.eval()

 tokens_test_data = tokenizer(
 test_text,
 max_length = max_seq_len,
 pad_to_max_length=True,
 truncation=True,
 return_token_type_ids=False
 )
 test_seq = torch.tensor(tokens_test_data['input_ids'])
 test_mask = torch.tensor(tokens_test_data['attention_mask'])

 preds = None
 with torch.no_grad():
   preds = model(test_seq.to(device), test_mask.to(device))
 preds = preds.detach().cpu().numpy()
 preds = np.argmax(preds, axis = 1)
 print("Intent Identified: ", le.inverse_transform(preds)[0])
 return le.inverse_transform(preds)[0]

In [None]:
def get_response(message):
  intent = get_prediction(message)
  print(intent)
  for i in data['intents']:
    if i["tag"] == intent:
      result = random.choice(i["responses"])
      break
  print(f"Response : {result}")
  return "Intent: "+ intent + '\n' + "Response: " + result

In [None]:
get_response("why dont you introduce yourself")

Intent Identified:  Product Issue
Product Issue




In [None]:
import json
import pandas as pd

In [None]:
intent_list = [{"tag" : "Product Issue",
                "responses" : []},
               {"tag" : "Delivery Issue",
                "responses" : []},
               {"tag" : "Order Fulfillment Error",
                "responses" : []},
               {"tag" : "Positive Customer Service",
                "responses" : []},
               {"tag" : "Positive Feedback",
                "responses" : []}]

In [None]:
df = pd.read_csv('/content/complaints_data.csv')

In [None]:
df['Intent'].unique()

array(['Product Issue', 'Delivery Issue', 'Order Fulfillment Error',
       'Positive Customer Service', 'Positive Feedback'], dtype=object)

In [None]:
for index, row in df.iterrows():
#     if row['Intent'] == 'Product Issue':
    for index, item in enumerate(intent_list):
        if item["tag"] == "Product Issue":
            intent_list[index]['responses'].append(row['Response'])
        elif item["tag"] == "Delivery Issue":
            intent_list[index]['responses'].append(row['Response'])
        elif item["tag"] == "Order Fulfillment Error":
            intent_list[index]['responses'].append(row['Response'])
        elif item["tag"] == "Positive Customer Service":
            intent_list[index]['responses'].append(row['Response'])
        elif item["tag"] == "Positive Feedback":
            intent_list[index]['responses'].append(row['Response'])

In [None]:
data_dict = {"intents" : intent_list}

In [None]:
def get_response(message):
  intent = get_prediction(message)
  print(intent)
  for i in data_dict['intents']:
    if i["tag"] == intent:
      result = random.choice(i["responses"])
      break
  print(f"Response : {result}")
  return "Intent: "+ intent + '\n' + "Response: " + result

In [None]:
get_response("why dont you introduce yourself")

Intent Identified:  Product Issue
Product Issue
Response : Thank you for bringing this to our attention. We'll investigate the issue and ensure accurate order history is displayed on our website.




"Intent: Product Issue\nResponse: Thank you for bringing this to our attention. We'll investigate the issue and ensure accurate order history is displayed on our website."