In [1]:
from transformers import BertModel, BertTokenizer

# Load pre-trained BERT model and tokenizer
model_name = 'bert-base-uncased'
bert_model = BertModel.from_pretrained(model_name)
tokenizer = BertTokenizer.from_pretrained(model_name)


In [2]:
import torch
import torch.nn as nn

class IntentSlotModel(nn.Module):
    def __init__(self, bert_model, num_intent_labels, num_slot_labels):
        super(IntentSlotModel, self).__init__()
        self.bert = bert_model
        self.intent_classifier = nn.Linear(bert_model.config.hidden_size, num_intent_labels)
        self.slot_classifier = nn.Linear(bert_model.config.hidden_size, num_slot_labels)
    
    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids, attention_mask=attention_mask)
        sequence_output = outputs.last_hidden_state
        pooled_output = outputs.pooler_output
        
        intent_logits = self.intent_classifier(pooled_output)
        slot_logits = self.slot_classifier(sequence_output)
        
        return intent_logits, slot_logits

# Example: Assuming we have 10 intents and 20 slot labels
num_intent_labels = 10
num_slot_labels = 20
model = IntentSlotModel(bert_model, num_intent_labels, num_slot_labels)


In [5]:
inputs = ["book a flight to New York", "schedule a meeting at 3 PM"]
intent_labels = [0, 1]  # Example intent labels
slot_labels = [[0, 0, 0, 0, 1, 2, 3], [0, 0, 0, 0, 4, 5, 6]]  # Example slot labels

# Tokenize inputs
encoded_inputs = tokenizer(inputs, padding=True, truncation=True, return_tensors="pt")
input_ids = encoded_inputs['input_ids']
attention_mask = encoded_inputs['attention_mask']

# Pad slot labels to match the length of input_ids
max_len = input_ids.shape[1]
padded_slot_labels = []
for label in slot_labels:
    padded_label = label + [0] * (max_len - len(label))  # Padding with 0 (or any other pad token you use)
    padded_slot_labels.append(padded_label)
slot_labels = torch.tensor(padded_slot_labels)

# Convert intent labels to tensors
intent_labels = torch.tensor(intent_labels)


In [6]:
from torch.utils.data import DataLoader, TensorDataset
from transformers import AdamW

# Create a DataLoader
dataset = TensorDataset(input_ids, attention_mask, intent_labels, slot_labels)
dataloader = DataLoader(dataset, batch_size=2)

# Optimizer
optimizer = AdamW(model.parameters(), lr=5e-5)

# Loss functions
intent_loss_fn = nn.CrossEntropyLoss()
slot_loss_fn = nn.CrossEntropyLoss(ignore_index=0)  # Ignore pad token in slot loss

# Training loop
model.train()
for epoch in range(3):  # Example: 3 epochs
    for batch in dataloader:
        input_ids, attention_mask, intent_labels, slot_labels = batch
        
        # Forward pass
        intent_logits, slot_logits = model(input_ids, attention_mask)
        
        # Compute losses
        intent_loss = intent_loss_fn(intent_logits, intent_labels)
        slot_loss = slot_loss_fn(slot_logits.view(-1, num_slot_labels), slot_labels.view(-1))
        loss = intent_loss + slot_loss
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        print(f"Epoch: {epoch}, Loss: {loss.item()}")




Epoch: 0, Loss: 5.883545875549316
Epoch: 1, Loss: 4.518344879150391
Epoch: 2, Loss: 3.9565749168395996
