# **Installing the dependencies**

In [None]:
!pip install transformers torch

Collecting transformers
  Downloading transformers-4.34.1-py3-none-any.whl (7.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m49.4 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.16.4 (from transformers)
  Downloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.0/302.0 kB[0m [31m34.7 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.15,>=0.14 (from transformers)
  Downloading tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.8/3.8 MB[0m [31m99.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m41.1 MB/s[0m eta [36m0:00:00[0m
Col

In [None]:
!pip install -U SentencePiece

Collecting SentencePiece
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.3 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.1/1.3 MB[0m [31m1.8 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━[0m [32m0.6/1.3 MB[0m [31m9.8 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m14.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: SentencePiece
Successfully installed SentencePiece-0.1.99


In [None]:
!pip install accelerate -U

Collecting accelerate
  Downloading accelerate-0.23.0-py3-none-any.whl (258 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m258.1/258.1 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: accelerate
Successfully installed accelerate-0.23.0


In [None]:
CUDA_LAUNCH_BLOCKING=1 # to stop cuda blocking

# **Connecting google colab to drive**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# **Importing the dependencies**

In [None]:
import json
import random
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import RobertaTokenizer, RobertaForSequenceClassification, T5Tokenizer, T5ForSequenceClassification,TrainingArguments, Trainer

## Data format

2 json files - `surprise.data` contains utterances, `surprise.solution` contains corresponding intents

Format of `surprise.data`

```json
{"indoml_id": "surprise|11109", "id": "11109", "utt": "Can I make a reservation at Buffalo Wild Wings?"}
{"indoml_id": "surprise|11051", "id": "11051", "utt": "Can I book a table for tonight at Bella Vita?"}
....
```

Format of `surprise.solution` (contains 150 labels)

```json
{"indoml_id": "surprise|11109", "intent": "accept reservations"}
{"indoml_id": "surprise|11051", "intent": "accept reservations"}
....
```


# **Loading the data and solution of the surprise dataset**

In [None]:
# Load the data from the JSON files
with open("/content/drive/MyDrive/intent_classification_It_bombay/surprise_data/surprise.data", 'r') as data_file:
    data = [json.loads(line) for line in data_file] # converting string to object using json.loads

with open("/content/drive/MyDrive/intent_classification_It_bombay/surprise_data/surprise.solution", 'r') as solution_file:
    solutions = [json.loads(line) for line in solution_file] # converting string to object using json.loads

# with open('/content/drive/MyDrive/Intent_classification_IIT_BOMBAY/Dataset/massive_test_phase2_data/massive_test.data', 'r') as test_file:
#     test = [json.loads(line) for line in test_file]



In [None]:
print(data[0]) # data of the surpirse dataset having id and the utterances

{'indoml_id': 'surprise|11109', 'id': '11109', 'utt': 'Can I make a reservation at Buffalo Wild Wings?'}


In [None]:
utt_list=[]
for data_1 in data:
    utt=data_1['utt']
    utt_list.append(utt)
utt_list

['Can I make a reservation at Buffalo Wild Wings?',
 'Can I book a table for tonight at Bella Vita?',
 'Does the Cheesecake Factory take reservations?',
 'Do they take reservations at Outback Steakhouse in Las Vegas?',
 "does chili's take reservations?",
 'Can I make a reservation at The French Laundry in Yountville?',
 'Can I check availability and make a reservation online?',
 "Can you tell me if Carrabba's in Houston accepts reservations",
 'Do you have any availability for next weekend at the Brasserie?',
 'Does Buffalo Wild Wings take reservations for sports games',
 'Is it possible to reserve a table in advance at this restaurant?',
 'Does Olive Garden in San Francisco take reservations?',
 "Where can I find reservation information for Applebee's?",
 'Can I reserve a table at Alinea in Chicago?',
 'Does the Pizza Parlor accept reservations for parties of five or more?',
 'Why did my brokerage account suddenly get locked?',
 "I've tried to log into my Chase account, but it's still

In [None]:

print(solutions[0]) # Solutions of the surpise dataset having id and intent for the previous teerances

{'indoml_id': 'surprise|11109', 'intent': 'accept reservations'}


In [None]:
# Create a dictionary to map indoml_id to intents
intent_map = {item['indoml_id']: item['intent'] for item in solutions}

# Split data into train and test sets (2:1) stratified by intent
indoml_ids = [item['indoml_id'] for item in data]
intents = [intent_map[indoml_id] for indoml_id in indoml_ids]
"""utterances(features) for the dataset"""
utt = [item['utt'] for item in data]

num_classes = len(set(intents))
print("Number of classes")
print(num_classes)

"""Splitting the dataset into train and test set"""
train_data, test_data, train_labels, test_labels = train_test_split(
    utt, intents, test_size=0.07, random_state=42, stratify=intents
)

Number of classes
150


In [None]:
train_labels[0:5], len(train_labels) # these are basically our intents corrosponding to the utterances for trainin g data(5%) of the data

(['w2', 'car rental', 'income', 'change speed', 'rewards balance'], 2090)

In [None]:
test_labels[0:5], len(test_labels) # these are basically our intents corrosponding to the utterances for testing data(5%) of the data

(['travel alert',
  'timezone',
  'shopping list update',
  'definition',
  'spelling'],
 158)

#**label encoding- converting labels to numeric values**

In [None]:
labels_list=[]
for label in solutions:
    labels_list.append(label['intent'])
unique_labels_list=[]
for x in labels_list:
    if x not in unique_labels_list:
        unique_labels_list.append(x)
# unique_labels_list, len(unique_labels_list)

label2id={}
id2label={}
for i, label in enumerate(unique_labels_list):
    label2id[label]=i
    id2label[i]=label

In [None]:
intents
intents_set=set(intents) # basically represent the total no. of disntics intents(labels) in our surprise dataset(150)
print(intents_set)
len(intents_set)

{'nutrition info', 'min payment', 'schedule maintenance', 'meaning of life', 'current location', 'ingredients list', 'payday', 'greeting', 'rewards balance', 'credit score', 'order status', 'how old are you', 'calendar update', 'calories', 'reset settings', 'improve credit score', 'play music', 'translate', 'account blocked', 'tire pressure', 'card declined', 'cancel reservation', 'plug type', 'sync device', 'directions', 'timezone', 'flip coin', 'next holiday', 'lost luggage', 'who made you', 'pto balance', 'tire change', 'calendar', 'food last', 'pto used', 'smart home', 'car rental', 'routing', 'todo list update', 'uber', 'what is your name', 'maybe', 'freeze account', 'book flight', 'tell joke', 'no', 'accept reservations', 'definition', 'meeting schedule', 'w2', 'interest rate', 'oil change how', 'distance', 'book hotel', 'change language', 'redeem rewards', 'expiration date', 'timer', 'user name', 'rollover 401k', 'cancel', 'replacement card duration', 'order checks', 'goodbye', 

150

In [None]:
id2label

{0: 'accept reservations',
 1: 'account blocked',
 2: 'alarm',
 3: 'application status',
 4: 'apr',
 5: 'are you a bot',
 6: 'balance',
 7: 'bill balance',
 8: 'bill due',
 9: 'book flight',
 10: 'book hotel',
 11: 'calculator',
 12: 'calendar',
 13: 'calendar update',
 14: 'calories',
 15: 'cancel',
 16: 'cancel reservation',
 17: 'car rental',
 18: 'card declined',
 19: 'carry on',
 20: 'change accent',
 21: 'change ai name',
 22: 'change language',
 23: 'change speed',
 24: 'change user name',
 25: 'change volume',
 26: 'confirm reservation',
 27: 'cook time',
 28: 'credit limit',
 29: 'credit limit change',
 30: 'credit score',
 31: 'current location',
 32: 'damaged card',
 33: 'date',
 34: 'definition',
 35: 'direct deposit',
 36: 'directions',
 37: 'distance',
 38: 'do you have pets',
 39: 'exchange rate',
 40: 'expiration date',
 41: 'find phone',
 42: 'flight status',
 43: 'flip coin',
 44: 'food last',
 45: 'freeze account',
 46: 'fun fact',
 47: 'gas',
 48: 'gas type',
 49: '

In [None]:
print(list(id2label.items())[:5])
print('\n')
print(list(label2id.items())[:5])


[(0, 'accept reservations'), (1, 'account blocked'), (2, 'alarm'), (3, 'application status'), (4, 'apr')]


[('accept reservations', 0), ('account blocked', 1), ('alarm', 2), ('application status', 3), ('apr', 4)]


# **Loading the pretrained intent classification Alexa XLMRoberta model and the tokenizer**

In [None]:
# Define the BERT model and tokenizer
# keep `ignore_mismatched_sizes=True` so that the classification layer is randomly initialized
from transformers import XLMRobertaTokenizer, XLMRobertaForSequenceClassification
model_name="ibm/roberta-large-vira-intents" # by the IBM on 180 labels
tokenizer=RobertaTokenizer.from_pretrained(model_name)

In [None]:
# Initialize the model
num_classes=150
# model = XLMRobertaForSequenceClassification.from_pretrained(model_name, num_labels=num_classes, ignore_mismatched_sizes=True) # ,output_hidden_states=True
model = RobertaForSequenceClassification.from_pretrained(model_name, num_labels=num_classes, ignore_mismatched_sizes=True)
"""since model have its own id2label mapping and vice versa so, converting them from our id2label and label2id mapping as defined earlier"""
model.label2id=label2id
model.id2label=id2label


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# Tokenize the input training data
train_encodings = tokenizer(
    train_data,
    truncation=True,
    padding=True,
    max_length=64,
    return_tensors='pt'    #return type is pytorch tensor
)
#Tokenizing the input testing data
test_encodings = tokenizer(
    test_data,
    truncation=True,
    padding=True,
    max_length=64,
    return_tensors='pt'
)

In [None]:
"""Convert labels to numeric values from label2id mapping"""
train_int_labels = [label2id[label] for label in train_labels]
test_int_labels = [label2id[label] for label in test_labels]
test_int_labels[0:5]

[26, 45, 40, 2, 128]

# **making pytorch datasets**

In [None]:
# Create PyTorch datasets
class IntentDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = IntentDataset(train_encodings, train_int_labels)
test_dataset = IntentDataset(test_encodings, test_int_labels)

In [None]:
from torch.utils.data import DataLoader
batch_size = 64  # Adjust the batch size as needed
train_dataloader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True  # You can shuffle your data for randomness during training
)
eval_dataloader = DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False # You can shuffle your data for randomness during training
)


# **initializing training arguments and training the model**

In [None]:
# Training arguments
# previous_saved_dir='./intent_classification__'
# saved_dir='/home/naive123/nlp/Sumit/massive_git/massive/phase_2_jupter_files/trained_model_11_checkpoints'
saved_dir='/home/naive123/nlp/Sumit/massive_git/massive/phase_2_jupter_files/trained_model_3_again_fine_tuning_checkpoints/'

training_args = TrainingArguments(
    output_dir=saved_dir,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    seed=42,
    # weight-decay=0.01
    # gradient_accumulation_steps=40,
    evaluation_strategy="steps", # use 'epoch' for evaluating every epoch
    logging_steps=10,
    eval_steps=10,
    save_total_limit=5,
    save_steps=60,
    learning_rate=2e-5,
    # warmup_steps=400,
    # weight_decay=0.10,
    # adam_epsilon=1e-7,
    # warmup_steps=400,
    num_train_epochs=40,
    logging_dir='./logs',
)

In [None]:
import os
import torch
from transformers import AdamW, get_linear_schedule_with_warmup
from transformers import TrainingArguments
from sklearn.model_selection import KFold
from tqdm import tqdm

# Define your model, tokenizer, and other necessary components here
# Make sure your model is moved to the GPU if available

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Define the learning rate as a float (e.g., 7e-6)
# learning_rate = 7e-5
learning_rate=9e-5

# Define the number of training epochs as an integer (e.g., 40)
num_train_epochs = 40

# Create the optimizer
optimizer = AdamW(model.parameters(), lr=learning_rate)

# Define the number of training steps (you may need to adjust this based on your dataset)
total_train_steps = len(train_dataloader) * num_train_epochs

# Define the number of warmup steps (e.g., 10% of the total training steps)
num_warmup_steps = int(0.1 * total_train_steps)

# Create a learning rate scheduler
lr_scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=num_warmup_steps,
    num_training_steps=total_train_steps,
)

# Define the output directory for saving the model
# saved_dir='/home/naive123/nlp/Sumit/massive_git/massive/phase_2_jupter_files/trained_model_3_again_fine_tuning_checkpoints/'
saved_dir='/home/naive123/nlp/Sumit/massive_git/massive/phase_2_jupter_files/trained_model_1_direct_roberta_base_checkpoints/'
# Training loop with tqdm progress bar
for epoch in range(num_train_epochs):
    model.train()
    train_loss = 0.0

    progress_bar = tqdm(enumerate(train_dataloader), total=len(train_dataloader), desc=f"Epoch {epoch}")

    for step, batch in progress_bar:
        # Move data to the GPU
        batch = {k: v.to(device) for k, v in batch.items()}

        # Forward pass
        outputs = model(**batch)
        loss = outputs.loss

        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        # Update the learning rate
        lr_scheduler.step()

        # Accumulate the total loss
        train_loss += loss.item()

        # Log loss and other metrics if needed
        if step % training_args.logging_steps == 0:
            avg_loss = train_loss / (step + 1)
            progress_bar.set_postfix(loss=avg_loss)

    # Save the model checkpoint at the end of each epoch
    checkpoint_dir = os.path.join(saved_dir, f"epoch_{epoch}")
    os.makedirs(checkpoint_dir, exist_ok=True)

    # Save model weights
    model.save_pretrained(checkpoint_dir)

    # Save optimizer state
    # torch.save(optimizer.state_dict(), os.path.join(checkpoint_dir, "optimizer.pt"))

    # # Save learning rate scheduler state
    # torch.save(lr_scheduler.state_dict(), os.path.join(checkpoint_dir, "scheduler.pt"))

    # Save RNG state (if needed)
    # torch.save(torch.get_rng_state(), os.path.join(checkpoint_dir, "rng_state.pth"))

    # Save config.json (if needed)
    model.config.save_pretrained(checkpoint_dir)

    # Save tokenizer (if needed)
    tokenizer.save_pretrained(checkpoint_dir)

    # Evaluation loop with tqdm progress bar
    model.eval()
    eval_loss = 0.0

    progress_bar_eval = tqdm(enumerate(eval_dataloader), total=len(eval_dataloader), desc=f"Evaluation")

    for step, batch in progress_bar_eval:
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = model(**batch)
            loss = outputs.loss
            eval_loss += loss.item()

    avg_eval_loss = eval_loss / len(eval_dataloader)
    print(f"Epoch {epoch}: Evaluation Loss: {avg_eval_loss}")
    print(f'learning rate is : {learning_rate}')

    # if (epoch + 1) % 3 == 0:
    #     learning_rate *= 0.75
    #     for param_group in optimizer.param_groups:
    #         param_group['lr'] = learning_rate


Epoch 0: 100%|██████████| 33/33 [00:06<00:00,  5.32it/s, loss=5.02]
Evaluation: 100%|██████████| 3/3 [00:00<00:00, 33.13it/s]


Epoch 0: Evaluation Loss: 5.015932401021321
learning rate is : 9e-05


Epoch 1: 100%|██████████| 33/33 [00:05<00:00,  5.76it/s, loss=4.91]
Evaluation: 100%|██████████| 3/3 [00:00<00:00, 32.62it/s]


Epoch 1: Evaluation Loss: 4.354719956715901
learning rate is : 9e-05


Epoch 2: 100%|██████████| 33/33 [00:05<00:00,  5.71it/s, loss=3.87]
Evaluation: 100%|██████████| 3/3 [00:00<00:00, 32.82it/s]


Epoch 2: Evaluation Loss: 2.9020397663116455
learning rate is : 9e-05


Epoch 3: 100%|██████████| 33/33 [00:05<00:00,  5.67it/s, loss=2.51]
Evaluation: 100%|██████████| 3/3 [00:00<00:00, 32.36it/s]


Epoch 3: Evaluation Loss: 1.6637887159983318
learning rate is : 9e-05


Epoch 4: 100%|██████████| 33/33 [00:05<00:00,  5.68it/s, loss=1.37]
Evaluation: 100%|██████████| 3/3 [00:00<00:00, 32.14it/s]


Epoch 4: Evaluation Loss: 0.9011531273523966
learning rate is : 9e-05


Epoch 5: 100%|██████████| 33/33 [00:05<00:00,  5.63it/s, loss=0.692]
Evaluation: 100%|██████████| 3/3 [00:00<00:00, 31.92it/s]


Epoch 5: Evaluation Loss: 0.5209542115529379
learning rate is : 9e-05


Epoch 6: 100%|██████████| 33/33 [00:05<00:00,  5.60it/s, loss=0.382]
Evaluation: 100%|██████████| 3/3 [00:00<00:00, 31.82it/s]


Epoch 6: Evaluation Loss: 0.41068193813165027
learning rate is : 9e-05


Epoch 7: 100%|██████████| 33/33 [00:05<00:00,  5.59it/s, loss=0.253]
Evaluation: 100%|██████████| 3/3 [00:00<00:00, 31.92it/s]


Epoch 7: Evaluation Loss: 0.33546775082747143
learning rate is : 9e-05


Epoch 8: 100%|██████████| 33/33 [00:05<00:00,  5.58it/s, loss=0.172]
Evaluation: 100%|██████████| 3/3 [00:00<00:00, 31.88it/s]


Epoch 8: Evaluation Loss: 0.3348437324166298
learning rate is : 9e-05


Epoch 9: 100%|██████████| 33/33 [00:05<00:00,  5.57it/s, loss=0.128]
Evaluation: 100%|██████████| 3/3 [00:00<00:00, 31.73it/s]


Epoch 9: Evaluation Loss: 0.3698279360930125
learning rate is : 9e-05


Epoch 10: 100%|██████████| 33/33 [00:05<00:00,  5.56it/s, loss=0.101]
Evaluation: 100%|██████████| 3/3 [00:00<00:00, 31.84it/s]


Epoch 10: Evaluation Loss: 0.31436727320154506
learning rate is : 9e-05


Epoch 11: 100%|██████████| 33/33 [00:05<00:00,  5.55it/s, loss=0.0764]
Evaluation: 100%|██████████| 3/3 [00:00<00:00, 31.52it/s]


Epoch 11: Evaluation Loss: 0.29563751071691513
learning rate is : 9e-05


Epoch 12: 100%|██████████| 33/33 [00:05<00:00,  5.55it/s, loss=0.0682]
Evaluation: 100%|██████████| 3/3 [00:00<00:00, 31.64it/s]


Epoch 12: Evaluation Loss: 0.311932689199845
learning rate is : 9e-05


Epoch 13: 100%|██████████| 33/33 [00:05<00:00,  5.54it/s, loss=0.0709]
Evaluation: 100%|██████████| 3/3 [00:00<00:00, 31.37it/s]


Epoch 13: Evaluation Loss: 0.3236170969903469
learning rate is : 9e-05


Epoch 14: 100%|██████████| 33/33 [00:05<00:00,  5.55it/s, loss=0.0613]
Evaluation: 100%|██████████| 3/3 [00:00<00:00, 31.38it/s]


Epoch 14: Evaluation Loss: 0.30221828632056713
learning rate is : 9e-05


Epoch 15: 100%|██████████| 33/33 [00:05<00:00,  5.54it/s, loss=0.0574]
Evaluation: 100%|██████████| 3/3 [00:00<00:00, 31.67it/s]


Epoch 15: Evaluation Loss: 0.3149873713652293
learning rate is : 9e-05


Epoch 16: 100%|██████████| 33/33 [00:05<00:00,  5.54it/s, loss=0.0409]
Evaluation: 100%|██████████| 3/3 [00:00<00:00, 31.57it/s]


Epoch 16: Evaluation Loss: 0.3227964515487353
learning rate is : 9e-05


Epoch 17: 100%|██████████| 33/33 [00:05<00:00,  5.54it/s, loss=0.0372]
Evaluation: 100%|██████████| 3/3 [00:00<00:00, 31.58it/s]


Epoch 17: Evaluation Loss: 0.3069939458121856
learning rate is : 9e-05


Epoch 18: 100%|██████████| 33/33 [00:05<00:00,  5.54it/s, loss=0.0338]
Evaluation: 100%|██████████| 3/3 [00:00<00:00, 31.67it/s]


Epoch 18: Evaluation Loss: 0.3142201378941536
learning rate is : 9e-05


Epoch 19:  79%|███████▉  | 26/33 [00:04<00:01,  5.49it/s, loss=0.0325]

# **prediction on the test set(whose labels is available)**

In [None]:
# raw_predictions = model.predict(test_dataset)
# predicted_labels = np.argmax(raw_predictions.predictions)
# predicted_labels, raw_predictions
import numpy as np
import torch
# model_name="/home/naive123/nlp/Sumit/massive_git/massive/phase_2_jupter_files/trained_model_3_again_fine_tuning_checkpoints/epoch_15"
# model = RobertaForSequenceClassification.from_pretrained(model_name, num_labels=num_classes, ignore_mismatched_sizes=True)

# Put your test data into a DataLoader or batch format if it's not already
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

model.to(device)
# Set the model to evaluation mode
model.eval()

predicted_labels = []
raw_predictions = []

with torch.no_grad():
    for batch in test_dataloader:
        # Move batch to the GPU if available
        batch = {k: v.to(device) for k, v in batch.items()}

        # Forward pass
        outputs = model(**batch)
        logits = outputs.logits

        # Get predicted labels (argmax)
        batch_predictions = np.argmax(logits.cpu().numpy(), axis=1)

        # Append batch predictions to the list
        predicted_labels.extend(batch_predictions)

        # Append raw logits if needed
        raw_predictions.extend(logits.cpu().numpy())

# Convert the results to numpy arrays
predicted_labels = np.array(predicted_labels)
raw_predictions = np.array(raw_predictions)

# Now, you have predicted labels and raw logits.


In [None]:
predicted_labels, raw_predictions

(array([129,  78,  11, 105,  90, 108,  15,  70,   3,  69, 145,  66,  32,
        117,  16, 123, 142,  17, 119, 103,  20,  43, 102,  91,  24, 147,
         56,  57, 123, 120, 119,  56,  34, 106,  14, 127, 143,  35,  44,
         53, 125, 149,   8,  99,  38,  78, 109,  75, 115,  29, 103,  48,
        148, 136,   0,  49, 115,  82, 116,  27, 139, 139,  68,  60,  63,
        100,  68,   6, 123,   4, 122,  24, 131,  40,  50, 131, 118,  47,
        109,  37,  35, 126,  94, 140,  18,   8,   5,  19,  36,  64,  52,
         58,  82, 144,  88, 108,  59, 134,   9,   2,  48,  31,  95,  77,
         12,  61,  44,  92, 146,  12, 128,  22,  38,  23, 118,  67,  54,
         62,  69,  45,  33,  83,  76,  26,  63,  90,  71, 132,  96,   1,
         96,  46, 130,  70, 113,  65,  80,  32,  85, 124,  46,  94, 149,
         81,  41,  39, 122,   6, 121, 112,  23,  98,  42,  25,  47,  34,
         85,  21, 142, 107,  86, 137, 130, 100, 145, 138, 141, 101,  11,
         15,  76,   7,  36, 110, 136,  26, 101,  95

In [None]:
# len(raw_predictions.predictions), raw_predictions.predictions

In [None]:
# raw_predictions = trainer.predict(test_dataset)

In [None]:

# # Predict on the test set
# raw_predictions = trainer.predict(test_dataset)
# predicted_labels = np.argmax(raw_predictions.predictions, axis=1)

# # Convert labels back to original intents
# # predicted_intents = [list(label_map.keys())[list(label_map.values()).index(label)] for label in predicted_labels]
# predicted_intents=[id2label[label] for label in predicted_labels]

# # Print one sample prediction
# sample_idx = random.randint(0, len(test_data) - 1)
# sample_text = test_data[sample_idx]
# sample_intent = predicted_intents[sample_idx]

# print(f"Sample Text: {sample_text}")
# print(f"Predicted Intent: {sample_intent}")
import random
import numpy as np

# Put your test data into a DataLoader or batch format if it's not already
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Set the model to evaluation mode
model.eval()

predicted_labels = []

with torch.no_grad():
    for batch in test_dataloader:
        # Move batch to the GPU if available
        batch = {k: v.to(device) for k, v in batch.items()}

        # Forward pass
        outputs = model(**batch)
        logits = outputs.logits

        # Get predicted labels (argmax)
        batch_predictions = np.argmax(logits.cpu().numpy(), axis=1)

        # Append batch predictions to the list
        predicted_labels.extend(batch_predictions)

# Convert the results to numpy arrays
predicted_labels = np.array(predicted_labels)

# Convert labels back to original intents
predicted_intents = [id2label[label] for label in predicted_labels]

# Print one sample prediction
sample_idx = random.randint(0, len(test_data) - 1)
sample_text = test_data[sample_idx]
sample_intent = predicted_intents[sample_idx]

print(f"Sample Text: {sample_text}")
print(f"Predicted Intent: {sample_intent}")


Sample Text: Can you please tell me how much I need to pay for my water bill this month?
Predicted Intent: bill balance


# **metrics(accuracy criterias)**

In [None]:
# Calculate accuracy, precision, recall, and F1-score
report = classification_report(test_int_labels, predicted_labels, target_names=list(label2id.keys()), output_dict=True)
print(report)
print('\n')
accuracy = report['accuracy']
precision = report['macro avg']['precision']
recall = report['macro avg']['recall']
f1_score = report['macro avg']['f1-score']

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1_score:.4f}")

{'interest rate': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 2}, 'roll dice': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 1}, 'redeem rewards': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 1}, 'translate': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 2}, 'who do you work for': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 2}, 'improve credit score': {'precision': 1.0, 'recall': 0.5, 'f1-score': 0.6666666666666666, 'support': 2}, 'shopping list update': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 2}, 'exchange rate': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 1}, 'pto balance': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 2}, 'change speed': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 1}, 'are you a bot': {'precision': 1.0, 'recall': 0.5, 'f1-score': 0.6666666666666666, 'support': 2}, 'how old are you': {'precision': 1.0, 'recall'

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
report

NameError: name 'report' is not defined

In [None]:
del report['accuracy']

In [None]:
import pandas as pd

# Initialize an empty list to store the data as dictionaries
data_list = []

# Iterate through the data_dict and convert each entry into a dictionary
for label, metrics in report.items():
    entry = {'label': label}
    print(entry)
    entry.update(metrics)

    data_list.append(entry)

# Create a DataFrame from the list of dictionaries
df = pd.DataFrame(data_list)

# Print the resulting DataFrame
print(df)


{'label': 'timezone'}
{'label': 'spending history'}
{'label': 'make call'}
{'label': 'play music'}
{'label': 'yes'}
{'label': 'smart home'}
{'label': 'taxes'}
{'label': 'income'}
{'label': 'account blocked'}
{'label': 'travel alert'}
{'label': 'credit score'}
{'label': 'distance'}
{'label': 'cancel reservation'}
{'label': 'vaccines'}
{'label': 'next holiday'}
{'label': 'min payment'}
{'label': 'meeting schedule'}
{'label': 'report fraud'}
{'label': 'pto request status'}
{'label': 'car rental'}
{'label': 'tire change'}
{'label': 'flip coin'}
{'label': 'what are your hobbies'}
{'label': 'credit limit change'}
{'label': 'order status'}
{'label': 'apr'}
{'label': 'uber'}
{'label': 'fun fact'}
{'label': 'directions'}
{'label': 'calories'}
{'label': 'greeting'}
{'label': 'meal suggestion'}
{'label': 'transactions'}
{'label': 'balance'}
{'label': 'are you a bot'}
{'label': 'replacement card duration'}
{'label': 'time'}
{'label': 'change speed'}
{'label': 'payday'}
{'label': 'sync device'}
{'l

In [None]:
df.to_csv('label_list.csv')

In [None]:
len(report.keys())

152