# Fine-Tune Intent Recognition Model in LoRA way 

In [1]:
import os
import torch
import pandas as pd
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from peft import get_peft_model, LoraConfig, TaskType
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import f1_score, accuracy_score, classification_report, confusion_matrix

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
pd.set_option("display.max_columns", 101)
pd.set_option('display.max_colwidth', 100)

### 1. Load dataset

In [3]:
data = pd.read_csv(os.path.join("datasets","generated_inquiries.csv"))
data

Unnamed: 0,instruction,category
0,i dont have a user account can ya help me open it,Bank
1,"I'd like to close a user account, where to do it?",Bank
2,I'm trying to find informayion about the current balance of my account,Bank
3,i dotn wanna keep my fucking account help me close it,Bank
4,i got to close a fucking user account how to do it,Bank
...,...,...
1315,How do I register to vote?,Nonbank
1316,What are the requirements for a name change?,Nonbank
1317,Where can I get a copy of my birth certificate?,Nonbank
1318,How do I apply for social security benefits?,Nonbank


In [4]:
# change label to binary
data.loc[data['category'] == 'Nonbank', 'category'] = 0
data.loc[data['category'] == 'Bank', 'category'] = 1
data['category'] = data['category'].astype(int)

In [5]:
data.head()

Unnamed: 0,instruction,category
0,i dont have a user account can ya help me open it,1
1,"I'd like to close a user account, where to do it?",1
2,I'm trying to find informayion about the current balance of my account,1
3,i dotn wanna keep my fucking account help me close it,1
4,i got to close a fucking user account how to do it,1


In [6]:
# samples are balanced
data['category'].value_counts()

category
1    720
0    600
Name: count, dtype: int64

### 2. Tokenization with Bert

In [7]:
PreTrained_Model = 'bert-base-uncased'

In [8]:
tokenizer = BertTokenizer.from_pretrained(PreTrained_Model)

### 3. Prepare Training/test dataset

In [9]:
# build dataset
class NewsDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, item):
        text = str(self.texts[item])
        label = self.labels[item]
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,  # Add '[CLS]' and '[SEP]'
            max_length=self.max_len,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )

        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'label': torch.tensor(label, dtype=torch.long)
        }

In [10]:
MAX_LEN = 128
BATCH_SIZE = 8

In [11]:
train_texts, val_texts, train_labels, val_labels = train_test_split(data['instruction'].to_numpy(), data['category'].to_numpy(), test_size=0.2)

train_dataset = NewsDataset(train_texts, train_labels, tokenizer, MAX_LEN)
val_dataset = NewsDataset(val_texts, val_labels, tokenizer, MAX_LEN)

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

In [12]:
train_labels

array([1, 1, 0, ..., 0, 0, 0])

### 4. Load Pre-Trained Model

In [13]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [14]:
from transformers import BertTokenizer, BertForSequenceClassification

# Load the pre-trained BERT model
model = BertForSequenceClassification.from_pretrained(PreTrained_Model, num_labels=2)

model = model.to(device)

# Freeze BERT parameters
for param in model.base_model.parameters():
    param.requires_grad = False

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


### 5. Model Training

In [15]:
# Set up LoRA configuration
lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,  # Define the task type
    r=8,  # Rank of low-rank matrices (you can adjust based on your needs)
    lora_alpha=16,  # Scaling factor (can be adjusted)
    lora_dropout=0.1,  # Dropout for LoRA layers
    bias="none"  # Specify whether to include bias terms in the low-rank matrices
)

# Get the LoRA-enhanced model
model_with_lora = get_peft_model(model, lora_config)

# Set up training arguments and Trainer
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=1e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=5,
    weight_decay=0.01,
    warmup_steps=100,
    logging_steps=10,
    save_strategy="epoch",
    max_grad_norm=1.0,  # Added gradient clipping
    load_best_model_at_end=True,  # Load best model at end of training
    metric_for_best_model="eval_loss",  # Use validation loss to select best model
)



In [16]:
# Define Trainer for evaluation
trainer = Trainer(
    model=model_with_lora,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    compute_metrics=lambda eval_pred: {
        "accuracy": accuracy_score(eval_pred.label_ids, eval_pred.predictions.argmax(-1)),
        "f1": f1_score(eval_pred.label_ids, eval_pred.predictions.argmax(-1), average='binary')
    }
)

  trainer = Trainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [17]:
# Train the model
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.7081,0.679211,0.560606,0.718447
2,0.6468,0.654971,0.606061,0.734694
3,0.6414,0.621394,0.916667,0.924138
4,0.6249,0.586556,0.897727,0.899628
5,0.613,0.57287,0.886364,0.887218


TrainOutput(global_step=660, training_loss=0.6430802677616928, metrics={'train_runtime': 27.2259, 'train_samples_per_second': 193.933, 'train_steps_per_second': 24.242, 'total_flos': 348508709683200.0, 'train_loss': 0.6430802677616928, 'epoch': 5.0})

### 5. Model Evaluation

In [18]:
model_with_lora.eval()
all_predictions = []
all_labels = []

In [19]:
with torch.no_grad():
    for batch in val_dataloader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)

        outputs = model_with_lora(input_ids=input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        all_predictions.extend(predictions.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

In [20]:
accuracy = accuracy_score(all_labels, all_predictions)
f1 = f1_score(all_labels, all_predictions, average='binary')
conf_matrix = confusion_matrix(all_labels, all_predictions)
class_report = classification_report(all_labels, all_predictions)

In [21]:
print(f"Validation accuracy: {accuracy * 100:.2f}%")
print(f"Validation F1 Score: {f1:.2f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(class_report)

Validation accuracy: 88.64%
Validation F1 Score: 0.89

Confusion Matrix:
[[116   0]
 [ 30 118]]

Classification Report:
              precision    recall  f1-score   support

           0       0.79      1.00      0.89       116
           1       1.00      0.80      0.89       148

    accuracy                           0.89       264
   macro avg       0.90      0.90      0.89       264
weighted avg       0.91      0.89      0.89       264



### 6. Model Prediction

In [22]:
def predict(model, texts, tokenizer):
    model.eval()
    encodings = tokenizer(texts, padding=True, truncation=True, return_tensors='pt')
    input_ids = encodings['input_ids'].to(device)
    attention_mask = encodings['attention_mask'].to(device)

    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)

    return predictions

#### 6.1 Predict on Training dataset

In [28]:
train_pred_list = []

for text in train_texts:
    pred = predict(model_with_lora, text, tokenizer)
    train_pred_list.append(pred.item())

In [29]:
accuracy = accuracy_score(train_labels, train_pred_list)
print(f'Accuracy of training dataset: {accuracy:.4f}')

Accuracy of training dataset: 0.9223


In [30]:
f1 = f1_score(train_labels, train_pred_list, average='binary')
print(f"F1 Score of training dataset: {f1:.2f}")

F1 Score of training dataset: 0.92


#### 6.2 Predict on Validation dataset

In [32]:
val_pred_list = []

for text in val_texts:
    pred = predict(model_with_lora, text, tokenizer)
    val_pred_list.append(pred.item())

In [33]:
accuracy = accuracy_score(val_labels, val_pred_list)
print(f'Accuracy of training dataset: {accuracy:.4f}')

Accuracy of training dataset: 0.8864


In [34]:
f1 = f1_score(val_labels, val_pred_list, average='binary')
print(f"F1 Score of training dataset: {f1:.2f}")

F1 Score of training dataset: 0.89


#### 6.3 Test dataset

In [42]:
predict(model_with_lora, "may i book a hotel number?", tokenizer)

tensor([0], device='cuda:0')

In [64]:
predict(model_with_lora, "i dotn wanna keep my fucking account help me close it", tokenizer)

tensor([1], device='cuda:0')

In [66]:
predict(model_with_lora, "I'd like information transaction details", tokenizer)

tensor([1], device='cuda:0')