# deep learning based intent classifier

In [15]:
import sys
sys.path.append("G:/Cdac/ML_Final_Project/Multi-Modal-multi-Purpose-AI-agent/intent_classifier")
import os
import glob
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from myutils.data_preprocessor import load_and_prepare_data
from transformers import BertTokenizer
import torch
from torch.utils.data import Dataset, DataLoader
from torch.utils.data import random_split


In [16]:
data_path = "../../data/raw"
df = load_and_prepare_data(data_path)
X = df['text']
y = df['intent']



In [17]:
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(y)

In [18]:
intent_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
print("Intent to Label Mapping:", intent_mapping)

Intent to Label Mapping: {'analyse_product_sentiment': np.int64(0), 'convert_to_audio': np.int64(1), 'get_weather': np.int64(2), 'gmail_operations': np.int64(3), 'make_notes': np.int64(4), 'stock_sentiment': np.int64(5), 'voice_summary': np.int64(6)}


In [19]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

max_len = 64 
inputs = tokenizer(texts, 
                   padding='max_length',
                   truncation=True,
                   max_length=max_len,
                   return_tensors='pt') 

input_ids = inputs['input_ids']
attention_mask = inputs['attention_mask']

labels_tensor = torch.tensor(encoded_labels)


In [21]:
class IntentDataset(Dataset):
    def __init__(self, input_ids, attention_mask, labels):
        self.input_ids = input_ids
        self.attention_mask = attention_mask
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return {
            'input_ids': self.input_ids[idx],
            'attention_mask': self.attention_mask[idx],
            'labels': self.labels[idx]
        }


dataset = IntentDataset(input_ids, attention_mask, labels_tensor)

In [22]:
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

In [23]:
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8)

In [25]:
import torch.nn as nn
from transformers import BertModel

class BERTIntentClassifier(nn.Module):
    def __init__(self, num_classes):
        super(BERTIntentClassifier, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.dropout = nn.Dropout(0.3)
        self.fc = nn.Linear(self.bert.config.hidden_size, num_classes)

    def forward(self, input_ids, attention_mask):
        # Get pooled output from BERT
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output

        # Apply dropout and final linear layer
        x = self.dropout(pooled_output)
        x = self.fc(x)
        return x


In [26]:
num_classes = len(label_encoder.classes_)
model = BERTIntentClassifier(num_classes)


In [27]:
from transformers import AdamW, get_scheduler

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Optimizer and loss
optimizer = AdamW(model.parameters(), lr=2e-5)
loss_fn = nn.CrossEntropyLoss()

# Learning rate scheduler (optional but useful)
num_epochs = 3
num_training_steps = num_epochs * len(train_dataloader)
lr_scheduler = get_scheduler(
    "linear", optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=num_training_steps
)


ImportError: cannot import name 'AdamW' from 'transformers' (C:\Users\91966\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.13_qbz5n2kfra8p0\LocalCache\local-packages\Python313\site-packages\transformers\__init__.py)