In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from transformers import RobertaTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments
from transformers import XLNetTokenizer, XLNetForSequenceClassification
from transformers import AutoTokenizer, OpenAIGPTForSequenceClassification
from transformers import AutoTokenizer, DebertaForSequenceClassification
from transformers import AutoTokenizer, RobertaPreLayerNormForSequenceClassification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from transformers import BertConfig
import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import torch

# Check if CUDA (GPU support) is available
if torch.cuda.is_available():
    # Get the number of available GPUs
    num_gpus = torch.cuda.device_count()

    # List the available GPUs
    for i in range(num_gpus):
        gpu_name = torch.cuda.get_device_name(i)
        print(f"GPU {i}: {gpu_name}")
else:
    print("No GPU devices are available.")


GPU 0: NVIDIA GeForce RTX 3060
GPU 1: NVIDIA GeForce RTX 3060


In [3]:
# Specify the GPU devices you want to use
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"  # Use GPU devices 0 and 1

In [4]:
# Load the training dataset from local CSV file
train_df = pd.read_csv("train.csv")
queries = train_df['dialog'].tolist()
labels = train_df['intent'].tolist()
# added later
unique_intents = train_df['intent'].unique()
intent_to_label = {intent: label for label, intent in enumerate(unique_intents)}

# Load the inference dataset from local CSV file
test_file_path = "test.csv"

# Load test queries from the file
with open(test_file_path, "r") as file:
    inference_queries = [line.strip() for line in file]

# Define the number of intent classes
num_classes = len(train_df['intent'].unique())
print(num_classes)
print(intent_to_label)

150
{'translate': 0, 'transfer': 1, 'timer': 2, 'definition': 3, 'meaning_of_life': 4, 'insurance_change': 5, 'find_phone': 6, 'travel_alert': 7, 'pto_request': 8, 'improve_credit_score': 9, 'fun_fact': 10, 'change_language': 11, 'payday': 12, 'replacement_card_duration': 13, 'time': 14, 'application_status': 15, 'flight_status': 16, 'flip_coin': 17, 'change_user_name': 18, 'where_are_you_from': 19, 'shopping_list_update': 20, 'what_can_i_ask_you': 21, 'maybe': 22, 'oil_change_how': 23, 'restaurant_reservation': 24, 'balance': 25, 'confirm_reservation': 26, 'freeze_account': 27, 'rollover_401k': 28, 'who_made_you': 29, 'distance': 30, 'user_name': 31, 'timezone': 32, 'next_song': 33, 'transactions': 34, 'restaurant_suggestion': 35, 'rewards_balance': 36, 'pay_bill': 37, 'spending_history': 38, 'pto_request_status': 39, 'credit_score': 40, 'new_card': 41, 'lost_luggage': 42, 'repeat': 43, 'mpg': 44, 'oil_change_when': 45, 'yes': 46, 'travel_suggestion': 47, 'insurance': 48, 'todo_list_u

In [5]:
#Load a pre-trained BERT model and tokenizer
# model_name = "bert-base-uncased"
# model_name = "bert-large-uncased-whole-word-masking"
# tokenizer = BertTokenizer.from_pretrained(model_name)
# model = BertForSequenceClassification.from_pretrained(model_name, num_labels=num_classes)

model_name = "roberta-base"
# model_name = "roberta-large"
tokenizer = RobertaTokenizer.from_pretrained(model_name)
model = RobertaForSequenceClassification.from_pretrained(model_name, num_labels=num_classes)


# Move the model to GPUs
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
# Encode the queries and labels using the BERT tokenizer
encoded_inputs = tokenizer(queries, padding=True, truncation=True, return_tensors="pt")
input_ids = encoded_inputs["input_ids"]
attention_mask = encoded_inputs["attention_mask"]
labels = torch.tensor([intent_to_label[label] for label in labels]).to(device)
print(labels)

tensor([  0,   0,   0,  ..., 149, 149, 149], device='cuda:0')


In [7]:
# Split the data into training and validation sets
train_inputs, val_inputs, train_labels, val_labels = train_test_split(input_ids, labels, test_size=0.00000000001, random_state=21)
train_masks, val_masks, _, _ = train_test_split(attention_mask, input_ids, test_size=0.00000000001, random_state=21)

In [8]:
# Create DataLoaders
batch_size = 760

train_data = TensorDataset(train_inputs, train_masks, train_labels)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

val_data = TensorDataset(val_inputs, val_masks, val_labels)
val_sampler = SequentialSampler(val_data)
val_dataloader = DataLoader(val_data, sampler=val_sampler, batch_size=batch_size)


In [9]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)


In [10]:
# Fine-tune the BERT model
optimizer = AdamW(model.parameters(), lr=1e-5, eps=1e-8)
num_epochs = 64

# Wrap the model with DataParallel to use multiple GPUs
model = torch.nn.DataParallel(model)

for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for batch in tqdm(train_dataloader, desc=f"Epoch {epoch + 1}"):
        inputs, masks, labels = batch
        optimizer.zero_grad()

        outputs = model(inputs, attention_mask=masks, labels=labels)

        # Get the per-GPU loss
        loss = outputs.loss

        # Reduce the per-GPU loss to a scalar
        if isinstance(loss, torch.Tensor):
            loss = loss.mean()  # Average the loss if it's a tensor

        total_loss += loss.item()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        optimizer.step()

    avg_train_loss = total_loss / len(train_dataloader)
    print(f"Average training loss for epoch {epoch + 1}: {avg_train_loss}")

Epoch 1: 100%|██████████| 15/15 [00:19<00:00,  1.32s/it]


Average training loss for epoch 1: 5.005773671468099


Epoch 2: 100%|██████████| 15/15 [00:17<00:00,  1.15s/it]


Average training loss for epoch 2: 4.786820793151856


Epoch 3: 100%|██████████| 15/15 [00:17<00:00,  1.16s/it]


Average training loss for epoch 3: 4.36770699818929


Epoch 4: 100%|██████████| 15/15 [00:17<00:00,  1.16s/it]


Average training loss for epoch 4: 3.9195431391398112


Epoch 5: 100%|██████████| 15/15 [00:17<00:00,  1.16s/it]


Average training loss for epoch 5: 3.4927379926045736


Epoch 6: 100%|██████████| 15/15 [00:17<00:00,  1.16s/it]


Average training loss for epoch 6: 3.112372318903605


Epoch 7: 100%|██████████| 15/15 [00:17<00:00,  1.17s/it]


Average training loss for epoch 7: 2.7689308961232504


Epoch 8: 100%|██████████| 15/15 [00:17<00:00,  1.17s/it]


Average training loss for epoch 8: 2.451499032974243


Epoch 9: 100%|██████████| 15/15 [00:17<00:00,  1.17s/it]


Average training loss for epoch 9: 2.171376434961955


Epoch 10: 100%|██████████| 15/15 [00:17<00:00,  1.17s/it]


Average training loss for epoch 10: 1.9170511643091837


Epoch 11: 100%|██████████| 15/15 [00:17<00:00,  1.17s/it]


Average training loss for epoch 11: 1.6889657020568847


Epoch 12: 100%|██████████| 15/15 [00:17<00:00,  1.17s/it]


Average training loss for epoch 12: 1.4788484255472818


Epoch 13: 100%|██████████| 15/15 [00:17<00:00,  1.17s/it]


Average training loss for epoch 13: 1.2840496063232423


Epoch 14: 100%|██████████| 15/15 [00:17<00:00,  1.17s/it]


Average training loss for epoch 14: 1.1143947998682657


Epoch 15: 100%|██████████| 15/15 [00:17<00:00,  1.17s/it]


Average training loss for epoch 15: 0.9630171378453573


Epoch 16: 100%|██████████| 15/15 [00:17<00:00,  1.17s/it]


Average training loss for epoch 16: 0.8308391571044922


Epoch 17: 100%|██████████| 15/15 [00:17<00:00,  1.17s/it]


Average training loss for epoch 17: 0.7142441789309184


Epoch 18: 100%|██████████| 15/15 [00:17<00:00,  1.18s/it]


Average training loss for epoch 18: 0.613876211643219


Epoch 19: 100%|██████████| 15/15 [00:17<00:00,  1.15s/it]


Average training loss for epoch 19: 0.5254504680633545


Epoch 20: 100%|██████████| 15/15 [00:17<00:00,  1.17s/it]


Average training loss for epoch 20: 0.44997575084368385


Epoch 21: 100%|██████████| 15/15 [00:17<00:00,  1.19s/it]


Average training loss for epoch 21: 0.3882920523484548


Epoch 22: 100%|██████████| 15/15 [00:17<00:00,  1.16s/it]


Average training loss for epoch 22: 0.3355198323726654


Epoch 23: 100%|██████████| 15/15 [00:17<00:00,  1.15s/it]


Average training loss for epoch 23: 0.2917487343152364


Epoch 24: 100%|██████████| 15/15 [00:17<00:00,  1.14s/it]


Average training loss for epoch 24: 0.2573720574378967


Epoch 25: 100%|██████████| 15/15 [00:17<00:00,  1.15s/it]


Average training loss for epoch 25: 0.2284652014573415


Epoch 26: 100%|██████████| 15/15 [00:17<00:00,  1.19s/it]


Average training loss for epoch 26: 0.20422271986802418


Epoch 27: 100%|██████████| 15/15 [00:17<00:00,  1.18s/it]


Average training loss for epoch 27: 0.18311154047648112


Epoch 28: 100%|██████████| 15/15 [00:17<00:00,  1.18s/it]


Average training loss for epoch 28: 0.1665387213230133


Epoch 29: 100%|██████████| 15/15 [00:17<00:00,  1.17s/it]


Average training loss for epoch 29: 0.15150078137715658


Epoch 30: 100%|██████████| 15/15 [00:17<00:00,  1.19s/it]


Average training loss for epoch 30: 0.13829084634780883


Epoch 31: 100%|██████████| 15/15 [00:17<00:00,  1.17s/it]


Average training loss for epoch 31: 0.1267171988884608


Epoch 32: 100%|██████████| 15/15 [00:17<00:00,  1.17s/it]


Average training loss for epoch 32: 0.11764918814102808


Epoch 33: 100%|██████████| 15/15 [00:18<00:00,  1.21s/it]


Average training loss for epoch 33: 0.10862083385388056


Epoch 34: 100%|██████████| 15/15 [00:17<00:00,  1.18s/it]


Average training loss for epoch 34: 0.10057244847218196


Epoch 35: 100%|██████████| 15/15 [00:17<00:00,  1.15s/it]


Average training loss for epoch 35: 0.09339784185091654


Epoch 36: 100%|██████████| 15/15 [00:17<00:00,  1.14s/it]


Average training loss for epoch 36: 0.08764501909414928


Epoch 37: 100%|██████████| 15/15 [00:17<00:00,  1.16s/it]


Average training loss for epoch 37: 0.08337118277947107


Epoch 38: 100%|██████████| 15/15 [00:17<00:00,  1.15s/it]


Average training loss for epoch 38: 0.0779101217786471


Epoch 39: 100%|██████████| 15/15 [00:17<00:00,  1.15s/it]


Average training loss for epoch 39: 0.0730539932847023


Epoch 40: 100%|██████████| 15/15 [00:17<00:00,  1.16s/it]


Average training loss for epoch 40: 0.06921570052703221


Epoch 41: 100%|██████████| 15/15 [00:17<00:00,  1.15s/it]


Average training loss for epoch 41: 0.06542663872241974


Epoch 42: 100%|██████████| 15/15 [00:17<00:00,  1.16s/it]


Average training loss for epoch 42: 0.06113864630460739


Epoch 43: 100%|██████████| 15/15 [00:17<00:00,  1.17s/it]


Average training loss for epoch 43: 0.05847327634692192


Epoch 44: 100%|██████████| 15/15 [00:17<00:00,  1.16s/it]


Average training loss for epoch 44: 0.05527334536115328


Epoch 45: 100%|██████████| 15/15 [00:17<00:00,  1.16s/it]


Average training loss for epoch 45: 0.05268031135201454


Epoch 46: 100%|██████████| 15/15 [00:17<00:00,  1.15s/it]


Average training loss for epoch 46: 0.050323533018430074


Epoch 47: 100%|██████████| 15/15 [00:17<00:00,  1.16s/it]


Average training loss for epoch 47: 0.048782489448785785


Epoch 48: 100%|██████████| 15/15 [00:17<00:00,  1.16s/it]


Average training loss for epoch 48: 0.046229083091020584


Epoch 49: 100%|██████████| 15/15 [00:17<00:00,  1.16s/it]


Average training loss for epoch 49: 0.04417743409673373


Epoch 50: 100%|██████████| 15/15 [00:17<00:00,  1.17s/it]


Average training loss for epoch 50: 0.04212869107723236


Epoch 51: 100%|██████████| 15/15 [00:17<00:00,  1.16s/it]


Average training loss for epoch 51: 0.04075548872351646


Epoch 52: 100%|██████████| 15/15 [00:17<00:00,  1.18s/it]


Average training loss for epoch 52: 0.03939661433299382


Epoch 53: 100%|██████████| 15/15 [00:17<00:00,  1.18s/it]


Average training loss for epoch 53: 0.037868618965148926


Epoch 54: 100%|██████████| 15/15 [00:17<00:00,  1.19s/it]


Average training loss for epoch 54: 0.03609771877527237


Epoch 55: 100%|██████████| 15/15 [00:17<00:00,  1.17s/it]


Average training loss for epoch 55: 0.03511850635210673


Epoch 56: 100%|██████████| 15/15 [00:17<00:00,  1.17s/it]


Average training loss for epoch 56: 0.03355687260627747


Epoch 57: 100%|██████████| 15/15 [00:17<00:00,  1.17s/it]


Average training loss for epoch 57: 0.03172341547906399


Epoch 58: 100%|██████████| 15/15 [00:17<00:00,  1.17s/it]


Average training loss for epoch 58: 0.03099258281290531


Epoch 59: 100%|██████████| 15/15 [00:17<00:00,  1.18s/it]


Average training loss for epoch 59: 0.029849824557701746


Epoch 60: 100%|██████████| 15/15 [00:17<00:00,  1.17s/it]


Average training loss for epoch 60: 0.028909991309046746


Epoch 61: 100%|██████████| 15/15 [00:17<00:00,  1.16s/it]


Average training loss for epoch 61: 0.02810343454281489


Epoch 62: 100%|██████████| 15/15 [00:17<00:00,  1.18s/it]


Average training loss for epoch 62: 0.027244438603520393


Epoch 63: 100%|██████████| 15/15 [00:17<00:00,  1.16s/it]


Average training loss for epoch 63: 0.026291109745701154


Epoch 64: 100%|██████████| 15/15 [00:17<00:00,  1.16s/it]

Average training loss for epoch 64: 0.025195595994591712





In [11]:
# Save or serialize the fine-tuned model for inference
model.module.save_pretrained("saved_model_acc_97.02_epoch64_batch_760_roberta_random21_lr-1e5")

In [12]:
# Evaluate the model on the validation set
model.module.eval()
val_predictions = []
val_true_labels = []

for batch in tqdm(val_dataloader, desc="Evaluating"):
    inputs, masks, labels = batch
    with torch.no_grad():
        outputs = model(inputs, attention_mask=masks)
    logits = outputs.logits.to("cpu")  # Move the logits to CPU
    predictions = torch.argmax(logits, dim=1)  # Use torch.argmax instead of np.argmax
    val_predictions.extend(predictions.tolist())
    val_true_labels.extend(labels.to("cpu").tolist())  # Move labels to CPU

accuracy = accuracy_score(val_true_labels, val_predictions)
classification_rep = classification_report(val_true_labels, val_predictions)

print(f"Validation accuracy: {accuracy}")
print("Classification report:\n", classification_rep)


Evaluating: 100%|██████████| 1/1 [00:00<00:00, 95.16it/s]

Validation accuracy: 1.0
Classification report:
               precision    recall  f1-score   support

          91       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1






In [13]:
# Use the fine-tuned model for inference
with open("asnwer.txt", "w") as file:
    for query in inference_queries:
        encoded_input = tokenizer(query, padding=True, truncation=True, return_tensors="pt")
        input_ids = encoded_input["input_ids"]
        attention_mask = encoded_input["attention_mask"]

        with torch.no_grad():
            outputs = model(input_ids, attention_mask=attention_mask)

        logits = outputs.logits
        predicted_intent_id = torch.argmax(logits, dim=1).item()
        predicted_intent = train_df['intent'].unique()[predicted_intent_id]

        # Write the predicted intent label to the text file
        file.write(f"{predicted_intent}\n")