In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
from sklearn.model_selection import train_test_split
import random

In [28]:
dataset = [
    {"text": "Hello, how are you?", "intent": "greeting"},
    {"text": "What's your name?", "intent": "question"},
    {"text": "I'm feeling sad today.", "intent": "emotion"},
    {"text": "Can you tell me a joke?", "intent": "entertainment"},
    {"text": "Goodbye, see you later!", "intent": "greeting"},
    {"text": "How old are you?", "intent": "question"},
    {"text": "I'm feeling happy today.", "intent": "emotion"},
    {"text": "Can you recommend a movie?", "intent": "entertainment"},
    {"text": "I want to book a flight from New York to Los Angeles.", "intent": "flight_booking"}
]

intent_to_label = {
    "greeting": 0,
    "question": 1,
    "emotion": 2,
    "entertainment": 3,
    "flight_booking": 4
}

labels = []
for example in dataset:
    labels.append(intent_to_label[example["intent"]])

train_texts, val_texts, train_labels, val_labels = train_test_split([example["text"] for example in dataset], labels, test_size=0.2, random_state=42)

In [33]:
model_name = "bert-base-uncased"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=5)
tokenizer = AutoTokenizer.from_pretrained(model_name)

train_encodings = tokenizer(train_texts, truncation=True, padding=True)
val_encodings = tokenizer(val_texts, truncation=True, padding=True)

train_input_ids = torch.tensor(train_encodings['input_ids'])
train_attention_mask = torch.tensor(train_encodings['attention_mask'])
train_labels = torch.tensor(train_labels)

val_input_ids = torch.tensor(val_encodings['input_ids'])
val_attention_mask = torch.tensor(val_encodings['attention_mask'])
val_labels = torch.tensor(val_labels)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  train_labels = torch.tensor(train_labels)
  val_labels = torch.tensor(val_labels)


In [34]:
class ConversationalDataset(torch.utils.data.Dataset):
    def __init__(self, input_ids, attention_mask, labels):
        self.input_ids = input_ids
        self.attention_mask = attention_mask
        self.labels = labels

    def __getitem__(self, idx):
        return {
            'input_ids': self.input_ids[idx],
            'attention_mask': self.attention_mask[idx],
            'labels': self.labels[idx]
        }

    def __len__(self):
        return len(self.labels)

In [35]:
train_dataset = ConversationalDataset(train_input_ids, train_attention_mask, train_labels)
val_dataset = ConversationalDataset(val_input_ids, val_attention_mask, val_labels)

batch_size = 16
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

optimizer = optim.Adam(model.parameters(), lr=1e-5)
criterion = nn.CrossEntropyLoss()

In [38]:
for epoch in range(50):
    model.train()
    total_loss = 0
    for batch in train_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        optimizer.zero_grad()

        outputs = model(input_ids, attention_mask=attention_mask)
        loss = criterion(outputs.logits, labels)

        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    print(f"epoch {epoch+1}, loss: {total_loss / len(train_loader)}")

    model.eval()
    with torch.no_grad():
        total_correct = 0
        for batch in val_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids, attention_mask=attention_mask)
            _, predicted = torch.max(outputs.logits, dim=1)
            total_correct += (predicted == labels).sum().item()

ner_model = pipeline("ner", model="bert-base-uncased")

epoch 1, loss: 1.2405967712402344
epoch 2, loss: 1.1747289896011353
epoch 3, loss: 1.2043672800064087
epoch 4, loss: 1.1451902389526367
epoch 5, loss: 1.0798234939575195
epoch 6, loss: 1.1614047288894653
epoch 7, loss: 1.1084938049316406
epoch 8, loss: 0.9849572777748108
epoch 9, loss: 1.0009880065917969
epoch 10, loss: 1.0408557653427124
epoch 11, loss: 0.9276723265647888
epoch 12, loss: 0.8964638113975525
epoch 13, loss: 0.9480992555618286
epoch 14, loss: 0.8420810103416443
epoch 15, loss: 0.8750584721565247
epoch 16, loss: 0.8209642171859741
epoch 17, loss: 0.8008236885070801
epoch 18, loss: 0.7374863028526306
epoch 19, loss: 0.8163710832595825
epoch 20, loss: 0.8957806825637817
epoch 21, loss: 0.8373875021934509
epoch 22, loss: 0.7354558706283569
epoch 23, loss: 0.8144745826721191
epoch 24, loss: 0.6359387636184692
epoch 25, loss: 0.6128525733947754
epoch 26, loss: 0.6850585341453552
epoch 27, loss: 0.6907614469528198
epoch 28, loss: 0.6368293762207031
epoch 29, loss: 0.70759254693

Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [39]:
def extract_entities(text):
    entities = ner_model(text)
    entity_list = [(entity["word"], entity["score"], entity["entity"]) for entity in entities]
    return entity_list

def recognize_intent(text):
    encoding = tokenizer.encode_plus(
        text,
        max_length=512,
        padding="max_length",
        truncation=True,
        return_attention_mask=True,
        return_tensors="pt"
    )
    input_ids = encoding["input_ids"].flatten()
    attention_mask = encoding["attention_mask"].flatten()

    input_ids = input_ids.to(device)
    attention_mask = attention_mask.to(device)

    outputs = model(input_ids.unsqueeze(0), attention_mask=attention_mask.unsqueeze(0))
    _, predicted = torch.max(outputs.logits, dim=1)
    intent = list(intent_to_label.keys())[list(intent_to_label.values()).index(predicted.item())]
    return intent

In [40]:
text = input("Say something: ")
entities = extract_entities(text.lower())
print("Entities:", entities)

intent = recognize_intent(text)
print("Intent:", intent)

Say something: I want to book a flight from New York to Vancouver.
Entities: [('i', 0.6763418, 'LABEL_0'), ('want', 0.6037237, 'LABEL_0'), ('to', 0.63456213, 'LABEL_0'), ('book', 0.5630288, 'LABEL_0'), ('a', 0.54782164, 'LABEL_0'), ('flight', 0.66510147, 'LABEL_0'), ('from', 0.5150968, 'LABEL_0'), ('new', 0.72696835, 'LABEL_0'), ('york', 0.64996743, 'LABEL_0'), ('to', 0.5500471, 'LABEL_0'), ('vancouver', 0.60047096, 'LABEL_0'), ('.', 0.57571155, 'LABEL_0')]
Intent: flight_booking
