# **Installing the dependencies**

In [None]:
!pip install transformers torch

Collecting transformers
  Downloading transformers-4.34.1-py3-none-any.whl (7.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m23.2 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.16.4 (from transformers)
  Downloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.0/302.0 kB[0m [31m37.9 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.15,>=0.14 (from transformers)
  Downloading tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.8/3.8 MB[0m [31m53.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m54.2 MB/s[0m eta [36m0:00:00[0m
Col

In [None]:
!pip install -U SentencePiece

Collecting SentencePiece
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.3 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.1/1.3 MB[0m [31m1.9 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.3/1.3 MB[0m [31m20.3 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: SentencePiece
Successfully installed SentencePiece-0.1.99


In [None]:
!pip install accelerate -U

Collecting accelerate
  Downloading accelerate-0.23.0-py3-none-any.whl (258 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m258.1/258.1 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: accelerate
Successfully installed accelerate-0.23.0


In [None]:
CUDA_LAUNCH_BLOCKING=1 # to stop cuda blocking

# **Connecting google colab to drive**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# **Importing the dependencies**

In [None]:
import json
import random
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import RobertaTokenizer, RobertaForSequenceClassification, T5Tokenizer, T5ForSequenceClassification,TrainingArguments, Trainer

## Data format

2 json files - `surprise.data` contains utterances, `surprise.solution` contains corresponding intents

Format of `surprise.data`

```json
{"indoml_id": "surprise|11109", "id": "11109", "utt": "Can I make a reservation at Buffalo Wild Wings?"}
{"indoml_id": "surprise|11051", "id": "11051", "utt": "Can I book a table for tonight at Bella Vita?"}
....
```

Format of `surprise.solution` (contains 150 labels)

```json
{"indoml_id": "surprise|11109", "intent": "accept reservations"}
{"indoml_id": "surprise|11051", "intent": "accept reservations"}
....
```


# **Loading the data and solution of the surprise dataset**

In [None]:

# Load the data from the JSON files
with open('/content/drive/MyDrive/intent_classification_It_bombay/surprise_data/surprise.data', 'r') as data_file:
    data = [json.loads(line) for line in data_file] # converting string to object using json.loads

with open('/content/drive/MyDrive/intent_classification_It_bombay/surprise_data/surprise.solution', 'r') as solution_file:
    solutions = [json.loads(line) for line in solution_file] # converting string to object using json.loads

In [None]:
print(data[0]) # data of the surpirse dataset having id and the utterances
print(solutions[0]) # labels of the surprise data containing id and the utterances

{'indoml_id': 'surprise|11109', 'id': '11109', 'utt': 'Can I make a reservation at Buffalo Wild Wings?'}
{'indoml_id': 'surprise|11109', 'intent': 'accept reservations'}


In [None]:
utt_list=[]
for data_1 in data:
    utt=data_1['utt']
    utt_list.append(utt)
utt_list

['Can I make a reservation at Buffalo Wild Wings?',
 'Can I book a table for tonight at Bella Vita?',
 'Does the Cheesecake Factory take reservations?',
 'Do they take reservations at Outback Steakhouse in Las Vegas?',
 "does chili's take reservations?",
 'Can I make a reservation at The French Laundry in Yountville?',
 'Can I check availability and make a reservation online?',
 "Can you tell me if Carrabba's in Houston accepts reservations",
 'Do you have any availability for next weekend at the Brasserie?',
 'Does Buffalo Wild Wings take reservations for sports games',
 'Is it possible to reserve a table in advance at this restaurant?',
 'Does Olive Garden in San Francisco take reservations?',
 "Where can I find reservation information for Applebee's?",
 'Can I reserve a table at Alinea in Chicago?',
 'Does the Pizza Parlor accept reservations for parties of five or more?',
 'Why did my brokerage account suddenly get locked?',
 "I've tried to log into my Chase account, but it's still

In [None]:
# Create a dictionary to map indoml_id to intents
intent_map = {item['indoml_id']: item['intent'] for item in solutions}

# Split data into train and test sets (2:1) stratified by intent
indoml_ids = [item['indoml_id'] for item in data]
intents = [intent_map[indoml_id] for indoml_id in indoml_ids]
"""utterances(features) for the dataset"""
utt = [item['utt'] for item in data]

num_classes = len(set(intents))
print("Number of classes")
print(num_classes)

"""Splitting the dataset into train and test set"""
train_data, test_data, train_labels, test_labels = train_test_split(
    utt, intents, test_size=0.20, random_state=42, stratify=intents
)

Number of classes
150


In [None]:
train_labels[0:5], len(train_labels) # these are basically our intents corrosponding to the utterances for trainin g data(5%) of the data

(['insurance',
  'car rental',
  'international visa',
  'definition',
  'report fraud'],
 1798)

In [None]:
test_labels[0:5], len(test_labels) # these are basically our intents corrosponding to the utterances for testing data(5%) of the data

(['car rental', 'repeat', 'confirm reservation', 'todo list', 'gas type'], 450)

#**label encoding- converting labels to numeric values**

In [None]:
labels_list=[]
for label in solutions:
    labels_list.append(label['intent'])
unique_labels_list=[]
for x in labels_list:
    if x not in unique_labels_list:
        unique_labels_list.append(x)
# unique_labels_list, len(unique_labels_list)

label2id={}
id2label={}
for i, label in enumerate(unique_labels_list):
    label2id[label]=i
    id2label[i]=label

In [None]:
id2label

In [None]:
print(list(id2label.items())[:5])
print('\n')
print(list(label2id.items())[:5])


[(0, 'accept reservations'), (1, 'account blocked'), (2, 'alarm'), (3, 'application status'), (4, 'apr')]


[('accept reservations', 0), ('account blocked', 1), ('alarm', 2), ('application status', 3), ('apr', 4)]


# **Loading the pretrained intent classification Alexa XLMRoberta model and the tokenizer**

**Here first we hve trained a model on ntu adl intent dataset containing same number of intents,**

**Then based on the best checkpoint(on the basis of best Validation Score) after training roberta large on that ntu adl dataset obtained from there will be used for fine tuning the model on our surprise dataset**.
this is basically the first stage fine tuning where we are picking pretrained roberta large model on Sequence classification task and fine tuning that on ntu adl intent dataset.

Below is the link of that ntu adl intent dataset

 [https://huggingface.co/datasets/xjlulu/ntu_adl_intent](https://)


*From here after tuning the model on that checkpoint we will saved the model weigths(checkpoints) and best model weights on the basis of validation score will be used for predicting the labels of massive test dataset(this is basically the 2nd stage fine tuning)*

In [None]:

from transformers import XLMRobertaTokenizer, XLMRobertaForSequenceClassification
# 2nd stage fine tuning
model_name="/content/drive/MyDrive/roberta_large_2nd_stage_training_epoch_11/epoch_21" # pretrained model
tokenizer=RobertaTokenizer.from_pretrained(model_name) # loading tokenizer

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [None]:
# Initialize the model
num_classes=150
# defining model
model = RobertaForSequenceClassification.from_pretrained(model_name, num_labels=num_classes, ignore_mismatched_sizes=True)
"""since model have its own id2label mapping and vice versa so, converting them from our id2label and label2id mapping as defined earlier"""
model.label2id=label2id
model.id2label=id2label


In [None]:
# Tokenize the input training data
train_encodings = tokenizer(
    train_data,
    truncation=True,
    padding=True,
    max_length=64,
    return_tensors='pt'    #return type is pytorch tensor
)
#Tokenizing the input testing data
test_encodings = tokenizer(
    test_data,
    truncation=True,
    padding=True,
    max_length=64,
    return_tensors='pt'
)

In [None]:
"""Convert labels to numeric values from label2id mapping"""
train_int_labels = [label2id[label] for label in train_labels]
test_int_labels = [label2id[label] for label in test_labels]
test_int_labels[0:5]

[117, 139, 67, 39, 18]

# **making pytorch datasets**

In [None]:
# Create PyTorch datasets
class IntentDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = IntentDataset(train_encodings, train_int_labels)
test_dataset = IntentDataset(test_encodings, test_int_labels)

In [None]:
from torch.utils.data import DataLoader
batch_size = 64  # Adjust the batch size as needed
train_dataloader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True  # You can shuffle your data for randomness during training
)
eval_dataloader = DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False # You can shuffle your data for randomness during training
)


In [None]:
# Training arguments
# previous_saved_dir='./intent_classification__'
saved_dir='/content/drive/MyDrive/intent_classification_It_bombay/trained_model_10_checkpoints/'
training_args = TrainingArguments(
    output_dir=saved_dir,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    seed=42,
    # weight-decay=0.01
    # gradient_accumulation_steps=40,
    evaluation_strategy="steps", # use 'epoch' for evaluating every epoch
    logging_steps=10,
    eval_steps=10,
    save_total_limit=5,
    save_steps=60,
    learning_rate=7e-6,
    # warmup_steps=400,
    # weight_decay=0.10,
    # adam_epsilon=1e-7,
    # warmup_steps=400,
    num_train_epochs=40,
    logging_dir='./logs',
)

In [None]:
import os
import torch
from transformers import AdamW, get_linear_schedule_with_warmup
from transformers import TrainingArguments
from sklearn.model_selection import KFold
from tqdm import tqdm

# Define your model, tokenizer, and other necessary components here
# Make sure your model is moved to the GPU if available

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Define the learning rate as a float (e.g., 7e-6)
learning_rate = 1e-5

# Define the number of training epochs as an integer (e.g., 40)
num_train_epochs = 40

# Create the optimizer
optimizer = AdamW(model.parameters(), lr=learning_rate)

# Define the number of training steps (you may need to adjust this based on your dataset)
total_train_steps = len(train_dataloader) * num_train_epochs

# Define the number of warmup steps (e.g., 10% of the total training steps)
num_warmup_steps = int(0.1 * total_train_steps)

# Create a learning rate scheduler
lr_scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=num_warmup_steps,
    num_training_steps=total_train_steps,
)

# Define the output directory for saving the model
saved_dir="/content/drive/MyDrive/intent_classification_It_bombay/trained_model_final_2nd_stage_less_lr" # this is the directory where all weights will get saved after training the model for 2nd stage fine tuning
# Training loop with tqdm progress bar
for epoch in range(num_train_epochs):
    model.train()
    train_loss = 0.0

    progress_bar = tqdm(enumerate(train_dataloader), total=len(train_dataloader), desc=f"Epoch {epoch}")

    for step, batch in progress_bar:
        # Move data to the GPU
        batch = {k: v.to(device) for k, v in batch.items()}

        # Forward pass
        outputs = model(**batch)
        loss = outputs.loss

        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        # Update the learning rate
        lr_scheduler.step()

        # Accumulate the total loss
        train_loss += loss.item()

        # Log loss and other metrics if needed
        if step % training_args.logging_steps == 0:
            avg_loss = train_loss / (step + 1)
            progress_bar.set_postfix(loss=avg_loss)

    # Save the model checkpoint at the end of each epoch
    checkpoint_dir = os.path.join(saved_dir, f"epoch_{epoch}")
    os.makedirs(checkpoint_dir, exist_ok=True)

    # Save model weights
    model.save_pretrained(checkpoint_dir)

    # Save optimizer state
    # torch.save(optimizer.state_dict(), os.path.join(checkpoint_dir, "optimizer.pt"))

    # # Save learning rate scheduler state
    # torch.save(lr_scheduler.state_dict(), os.path.join(checkpoint_dir, "scheduler.pt"))

    # Save RNG state (if needed)
    # torch.save(torch.get_rng_state(), os.path.join(checkpoint_dir, "rng_state.pth"))

    # Save config.json (if needed)
    model.config.save_pretrained(checkpoint_dir)

    # Save tokenizer (if needed)
    tokenizer.save_pretrained(checkpoint_dir)

    # Evaluation loop with tqdm progress bar
    model.eval()
    eval_loss = 0.0

    progress_bar_eval = tqdm(enumerate(eval_dataloader), total=len(eval_dataloader), desc=f"Evaluation")

    for step, batch in progress_bar_eval:
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = model(**batch)
            loss = outputs.loss
            eval_loss += loss.item()

    avg_eval_loss = eval_loss / len(eval_dataloader)
    print(f"Epoch {epoch}: Evaluation Loss: {avg_eval_loss}")
    print(f'learning rate is : {learning_rate}')

    # if (epoch + 1) % 3 == 0:
    #     learning_rate *= 0.75
    #     for param_group in optimizer.param_groups:
    #         param_group['lr'] = learning_rate


Epoch 0: 100%|██████████| 29/29 [00:44<00:00,  1.54s/it, loss=8.81]
Evaluation: 100%|██████████| 8/8 [00:02<00:00,  2.91it/s]


Epoch 0: Evaluation Loss: 9.033305525779724
learning rate is : 1e-05


Epoch 1: 100%|██████████| 29/29 [00:45<00:00,  1.59s/it, loss=6.77]
Evaluation: 100%|██████████| 8/8 [00:02<00:00,  2.85it/s]


Epoch 1: Evaluation Loss: 4.901621520519257
learning rate is : 1e-05


Epoch 2: 100%|██████████| 29/29 [00:47<00:00,  1.63s/it, loss=4.94]
Evaluation: 100%|██████████| 8/8 [00:02<00:00,  2.78it/s]


Epoch 2: Evaluation Loss: 4.464589715003967
learning rate is : 1e-05


Epoch 3: 100%|██████████| 29/29 [00:47<00:00,  1.65s/it, loss=4.12]
Evaluation: 100%|██████████| 8/8 [00:02<00:00,  2.80it/s]


Epoch 3: Evaluation Loss: 2.5658906400203705
learning rate is : 1e-05


Epoch 4: 100%|██████████| 29/29 [00:48<00:00,  1.67s/it, loss=2.19]
Evaluation: 100%|██████████| 8/8 [00:02<00:00,  2.76it/s]


Epoch 4: Evaluation Loss: 1.0252541229128838
learning rate is : 1e-05


Epoch 5: 100%|██████████| 29/29 [00:48<00:00,  1.68s/it, loss=0.921]
Evaluation: 100%|██████████| 8/8 [00:02<00:00,  2.72it/s]


Epoch 5: Evaluation Loss: 0.5088640823960304
learning rate is : 1e-05


Epoch 6: 100%|██████████| 29/29 [00:48<00:00,  1.68s/it, loss=0.448]
Evaluation: 100%|██████████| 8/8 [00:02<00:00,  2.74it/s]


Epoch 6: Evaluation Loss: 0.3711647465825081
learning rate is : 1e-05


Epoch 7: 100%|██████████| 29/29 [00:48<00:00,  1.68s/it, loss=0.264]
Evaluation: 100%|██████████| 8/8 [00:02<00:00,  2.73it/s]


Epoch 7: Evaluation Loss: 0.32673659548163414
learning rate is : 1e-05


Epoch 8: 100%|██████████| 29/29 [00:48<00:00,  1.68s/it, loss=0.185]
Evaluation: 100%|██████████| 8/8 [00:02<00:00,  2.72it/s]


Epoch 8: Evaluation Loss: 0.30154803209006786
learning rate is : 1e-05


Epoch 9: 100%|██████████| 29/29 [00:48<00:00,  1.68s/it, loss=0.143]
Evaluation: 100%|██████████| 8/8 [00:02<00:00,  2.73it/s]


Epoch 9: Evaluation Loss: 0.2779128076508641
learning rate is : 1e-05


Epoch 10: 100%|██████████| 29/29 [00:48<00:00,  1.68s/it, loss=0.116]
Evaluation: 100%|██████████| 8/8 [00:02<00:00,  2.72it/s]


Epoch 10: Evaluation Loss: 0.2766836704686284
learning rate is : 1e-05


Epoch 11: 100%|██████████| 29/29 [00:48<00:00,  1.68s/it, loss=0.0981]
Evaluation: 100%|██████████| 8/8 [00:02<00:00,  2.73it/s]


Epoch 11: Evaluation Loss: 0.2740435767918825
learning rate is : 1e-05


Epoch 12: 100%|██████████| 29/29 [00:48<00:00,  1.69s/it, loss=0.0841]
Evaluation: 100%|██████████| 8/8 [00:02<00:00,  2.72it/s]


Epoch 12: Evaluation Loss: 0.27322951424866915
learning rate is : 1e-05


Epoch 13: 100%|██████████| 29/29 [00:48<00:00,  1.68s/it, loss=0.0743]
Evaluation: 100%|██████████| 8/8 [00:02<00:00,  2.71it/s]


Epoch 13: Evaluation Loss: 0.26970956986770034
learning rate is : 1e-05


Epoch 14: 100%|██████████| 29/29 [00:48<00:00,  1.69s/it, loss=0.0669]
Evaluation: 100%|██████████| 8/8 [00:02<00:00,  2.70it/s]


Epoch 14: Evaluation Loss: 0.26575699215754867
learning rate is : 1e-05


Epoch 15: 100%|██████████| 29/29 [00:48<00:00,  1.69s/it, loss=0.0602]
Evaluation: 100%|██████████| 8/8 [00:02<00:00,  2.71it/s]


Epoch 15: Evaluation Loss: 0.271576379891485
learning rate is : 1e-05


Epoch 16: 100%|██████████| 29/29 [00:48<00:00,  1.68s/it, loss=0.0579]
Evaluation: 100%|██████████| 8/8 [00:02<00:00,  2.73it/s]


Epoch 16: Evaluation Loss: 0.26994121773168445
learning rate is : 1e-05


Epoch 17: 100%|██████████| 29/29 [00:48<00:00,  1.69s/it, loss=0.0544]
Evaluation: 100%|██████████| 8/8 [00:02<00:00,  2.69it/s]


Epoch 17: Evaluation Loss: 0.2690772016067058
learning rate is : 1e-05


Epoch 18: 100%|██████████| 29/29 [00:49<00:00,  1.69s/it, loss=0.051]
Evaluation: 100%|██████████| 8/8 [00:02<00:00,  2.71it/s]


Epoch 18: Evaluation Loss: 0.25928995572030544
learning rate is : 1e-05


Epoch 19: 100%|██████████| 29/29 [00:49<00:00,  1.72s/it, loss=0.0476]
Evaluation: 100%|██████████| 8/8 [00:03<00:00,  2.61it/s]


Epoch 19: Evaluation Loss: 0.28044209629297256
learning rate is : 1e-05


Epoch 20:  48%|████▊     | 14/29 [00:26<00:28,  1.89s/it, loss=0.0434]


KeyboardInterrupt: ignored

# **prediction on the test set(whose labels is available)**

In [None]:
# raw_predictions = model.predict(test_dataset)
# predicted_labels = np.argmax(raw_predictions.predictions)
# predicted_labels, raw_predictions
import numpy as np
import torch
model_name="/content/drive/MyDrive/intent_classification_It_bombay/trained_model_final_3rd_stage_less_lr/epoch_18" # Taking the bestcheckpoint from above training for evaluating the performance of the model on the validation dataset
model = RobertaForSequenceClassification.from_pretrained(model_name, num_labels=num_classes, ignore_mismatched_sizes=True)

# Put your test data into a DataLoader or batch format if it's not already
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

model.to(device)
# Set the model to evaluation mode
model.eval()

predicted_labels = []
raw_predictions = []

with torch.no_grad():
    for batch in test_dataloader:
        # Move batch to the GPU if available
        batch = {k: v.to(device) for k, v in batch.items()}

        # Forward pass
        outputs = model(**batch)
        logits = outputs.logits

        # Get predicted labels (argmax)
        batch_predictions = np.argmax(logits.cpu().numpy(), axis=1)

        # Append batch predictions to the list
        predicted_labels.extend(batch_predictions)

        # Append raw logits if needed
        raw_predictions.extend(logits.cpu().numpy())

# Convert the results to numpy arrays
predicted_labels = np.array(predicted_labels)
raw_predictions = np.array(raw_predictions)

# Now, you have predicted labels and raw logits.


In [None]:
predicted_labels, raw_predictions

(array([117, 139,  67,  39,  18, 126,  13,  95,  46,  83,  76, 123, 145,
         32, 112, 136, 106,   2,  36,  38,  55, 149, 106,  45,  92, 108,
         50,  46,  93,  81,  90,  31,  24,  10,  27, 146, 129,  93,  65,
         70,  17, 121,  20,  55,  53,  11,   8, 110, 128, 116,  61,   1,
         88,   7, 141,  62, 137, 105,  68,  64,  27,  66,  47,  76,  54,
         91,  48, 132,  25, 137,  84,  32,  22, 107,  69,  44,  16, 130,
        114, 103,  84,  72,  39, 110, 142,  79,  21, 135, 100,  43, 133,
         51, 107,  25,  74,  95,  87,  55,  58,  21, 106,   6,  42,   9,
         37, 140, 131, 146,  64, 103,  61,  11,  15, 113, 137, 100,  49,
         81,  96,  89, 115, 105, 139,  68, 139,  19,  56,  89,  15, 147,
        132,  17,  38, 147,   5,  13,  12,  65,  86, 129, 146,  49,  80,
         20,  93, 116, 119, 121,  73,  69,  54,  51,  51,  36,  31, 138,
         73, 101,  63,  65, 136, 126,  16, 102,  24,  58,  52,  72,  79,
         52, 134,  75, 147,  91,  56,  83,  44,  82

In [None]:

# # Predict on the test set
# raw_predictions = trainer.predict(test_dataset)
# predicted_labels = np.argmax(raw_predictions.predictions, axis=1)

# # Convert labels back to original intents
# # predicted_intents = [list(label_map.keys())[list(label_map.values()).index(label)] for label in predicted_labels]
# predicted_intents=[id2label[label] for label in predicted_labels]

# # Print one sample prediction
# sample_idx = random.randint(0, len(test_data) - 1)
# sample_text = test_data[sample_idx]
# sample_intent = predicted_intents[sample_idx]

# print(f"Sample Text: {sample_text}")
# print(f"Predicted Intent: {sample_intent}")
import random
import numpy as np

# Put your test data into a DataLoader or batch format if it's not already
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Set the model to evaluation mode
model.eval()

predicted_labels = []

with torch.no_grad():
    for batch in test_dataloader:
        # Move batch to the GPU if available
        batch = {k: v.to(device) for k, v in batch.items()}

        # Forward pass
        outputs = model(**batch)
        logits = outputs.logits

        # Get predicted labels (argmax)
        batch_predictions = np.argmax(logits.cpu().numpy(), axis=1)

        # Append batch predictions to the list
        predicted_labels.extend(batch_predictions)

# Convert the results to numpy arrays
predicted_labels = np.array(predicted_labels)

# Convert labels back to original intents
predicted_intents = [id2label[label] for label in predicted_labels]

# Print one sample prediction
sample_idx = random.randint(0, len(test_data) - 1)
sample_text = test_data[sample_idx]
sample_intent = predicted_intents[sample_idx]

print(f"Sample Text: {sample_text}")
print(f"Predicted Intent: {sample_intent}")


Sample Text: How much will it cost to take an Uber from LAX to downtown LA?
Predicted Intent: uber


# **metrics(accuracy criterias)**

In [None]:
# Calculate accuracy, precision, recall, and F1-score
report = classification_report(test_int_labels, predicted_labels, target_names=list(label2id.keys()), output_dict=True)
print(report)
print('\n')
accuracy = report['accuracy']
precision = report['macro avg']['precision']
recall = report['macro avg']['recall']
f1_score = report['macro avg']['f1-score']

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1_score:.4f}")

{'jump start': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3}, 'schedule meeting': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3}, 'bill due': {'precision': 0.75, 'recall': 1.0, 'f1-score': 0.8571428571428571, 'support': 3}, 'change language': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3}, 'pin change': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3}, 'whisper mode': {'precision': 1.0, 'recall': 0.6666666666666666, 'f1-score': 0.8, 'support': 3}, 'date': {'precision': 1.0, 'recall': 0.6666666666666666, 'f1-score': 0.8, 'support': 3}, 'what is your name': {'precision': 1.0, 'recall': 0.6666666666666666, 'f1-score': 0.8, 'support': 3}, 'translate': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3}, 'sync device': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3}, 'insurance change': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3}, 'who made you': {'precision': 1.0, 'reca

In [None]:
report

{'jump start': {'precision': 1.0,
  'recall': 1.0,
  'f1-score': 1.0,
  'support': 3},
 'schedule meeting': {'precision': 1.0,
  'recall': 1.0,
  'f1-score': 1.0,
  'support': 3},
 'bill due': {'precision': 0.75,
  'recall': 1.0,
  'f1-score': 0.8571428571428571,
  'support': 3},
 'change language': {'precision': 1.0,
  'recall': 1.0,
  'f1-score': 1.0,
  'support': 3},
 'pin change': {'precision': 1.0,
  'recall': 1.0,
  'f1-score': 1.0,
  'support': 3},
 'whisper mode': {'precision': 1.0,
  'recall': 0.6666666666666666,
  'f1-score': 0.8,
  'support': 3},
 'date': {'precision': 1.0,
  'recall': 0.6666666666666666,
  'f1-score': 0.8,
  'support': 3},
 'what is your name': {'precision': 1.0,
  'recall': 0.6666666666666666,
  'f1-score': 0.8,
  'support': 3},
 'translate': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3},
 'sync device': {'precision': 1.0,
  'recall': 1.0,
  'f1-score': 1.0,
  'support': 3},
 'insurance change': {'precision': 1.0,
  'recall': 1.0,
  'f1-

In [None]:
del report['accuracy']

In [None]:
import pandas as pd

# Initialize an empty list to store the data as dictionaries
data_list = []

# Iterate through the data_dict and convert each entry into a dictionary
for label, metrics in report.items():
    entry = {'label': label}
    print(entry)
    entry.update(metrics)

    data_list.append(entry)

# Create a DataFrame from the list of dictionaries
df = pd.DataFrame(data_list)

# Print the resulting DataFrame
print(df)


{'label': 'reminder'}
{'label': 'book hotel'}
{'label': 'translate'}
{'label': 'international fees'}
{'label': 'who do you work for'}
{'label': 'routing'}
{'label': 'recipe'}
{'label': 'redeem rewards'}
{'label': 'change accent'}
{'label': 'pto request'}
{'label': 'pto used'}
{'label': 'calculator'}
{'label': 'are you a bot'}
{'label': 'pto balance'}
{'label': 'rewards balance'}
{'label': 'improve credit score'}
{'label': 'traffic'}
{'label': 'roll dice'}
{'label': 'replacement card duration'}
{'label': 'time'}
{'label': 'pin change'}
{'label': 'meal suggestion'}
{'label': 'rollover 401k'}
{'label': 'taxes'}
{'label': 'change ai name'}
{'label': 'credit score'}
{'label': 'carry on'}
{'label': 'ingredients list'}
{'label': 'expiration date'}
{'label': 'order checks'}
{'label': 'whisper mode'}
{'label': 'play music'}
{'label': 'restaurant reviews'}
{'label': 'sync device'}
{'label': 'spending history'}
{'label': 'schedule maintenance'}
{'label': 'spelling'}
{'label': 'payday'}
{'label': 

TypeError: ignored