In [21]:
import json
from transformers import BertTokenizer, BertModel
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import f1_score, classification_report
from scripts.extraction import load_data_from_json
from models.bert_model import BERTClassifier, DialogueActDataset

In [22]:
# Training and evaluation function
def train_and_evaluate_model(model, train_loader, val_loader, test_loader, optimizer, criterion, num_epochs=20, device='cpu'):
    model = model.to(device)
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for input_ids, attention_mask, labels in train_loader:
            input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)

            optimizer.zero_grad()
            logits = model(input_ids, attention_mask)  # Chuyển cả input_ids và attention_mask vào mô hình
            loss = criterion(logits, labels)  # BCEWithLogitsLoss cho multi-label classification
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        print(f"Epoch {epoch + 1}/{num_epochs}, Training Loss: {total_loss / len(train_loader):.4f}")

        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for input_ids, attention_mask, labels in val_loader:
                input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)
                logits = model(input_ids, attention_mask)  # Chuyển cả input_ids và attention_mask vào mô hình
                loss = criterion(logits, labels)
                val_loss += loss.item()

        print(f"Epoch {epoch + 1}/{num_epochs}, Validation Loss: {val_loss / len(val_loader):.4f}")

    # Evaluation trên test data
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for input_ids, attention_mask, labels in test_loader:
            input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)
            preds = torch.sigmoid(model(input_ids, attention_mask))  # Chuyển cả input_ids và attention_mask vào mô hình
            all_preds.append(preds)
            all_labels.append(labels)

    # Flatten predictions và labels
    all_preds = torch.cat(all_preds, dim=0)
    all_labels = torch.cat(all_labels, dim=0)

    # Threshold predictions (ví dụ: nếu sigmoid output > 0.5, coi là 1)
    all_preds = (all_preds > 0.5).float()

    # Tính toán F1-score cho từng lớp
    f1 = f1_score(all_labels.cpu(), all_preds.cpu(), average='macro')  # Tính F1-score với average là 'macro'
    print(f"F1 Score (Macro): {f1:.4f}")

    # Cũng có thể sử dụng classification_report để hiển thị các chỉ số như precision, recall và F1 cho mỗi lớp
    report = classification_report(all_labels.cpu(), all_preds.cpu(), target_names=[f"Class {i}" for i in range(all_labels.size(1))])
    print("Classification Report:")
    print(report)

    # Evaluation metric (ví dụ: accuracy)
    accuracy = (all_preds == all_labels).float().mean()
    print(f"Accuracy on test set: {accuracy:.4f}")

In [23]:
# Predict function to test with any utterance
def predict(model, utterance, tokenizer, label_list, max_len=50, device='cpu'):
    model.eval()  # Chuyển sang chế độ đánh giá
    tokens = tokenizer(utterance, truncation=True, padding='max_length', max_length=max_len, return_tensors='pt')
    input_ids = tokens['input_ids'].squeeze(0).to(device)
    attention_mask = tokens['attention_mask'].squeeze(0).to(device)

    with torch.no_grad():
        logits = model(input_ids.unsqueeze(0), attention_mask.unsqueeze(0))

    preds = torch.sigmoid(logits).cpu().numpy()
    preds = (preds > 0.5).astype(int)  # Chuyển kết quả thành 0 hoặc 1

    predicted_labels = [label_list[i] for i in range(len(preds[0])) if preds[0][i] == 1]
    return predicted_labels


In [27]:
# Main function
def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Load the train, validation, and test data from JSON files
    train_data = load_data_from_json('C:/Users/thang/Desktop/NLP/assignment/dialogue_systems/data/processed_data/train.json')
    val_data = load_data_from_json('C:/Users/thang/Desktop/NLP/assignment/dialogue_systems/data/processed_data/val.json')
    test_data = load_data_from_json('C:/Users/thang/Desktop/NLP/assignment/dialogue_systems/data/processed_data/test.json')

    label_list = list(set([label for labels in train_data.values() for label in labels]))

    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    max_len = 40

    train_dataset = DialogueActDataset(list(train_data.items()), tokenizer, max_len, label_list)
    val_dataset = DialogueActDataset(list(val_data.items()), tokenizer, max_len, label_list)
    test_dataset = DialogueActDataset(list(test_data.items()), tokenizer, max_len, label_list)

    train_loader = DataLoader(train_dataset, batch_size=100, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=100)
    test_loader = DataLoader(test_dataset, batch_size=100)

    bert_model = BERTClassifier(num_labels=len(label_list)).to(device)
    optimizer = optim.Adam(bert_model.parameters(), lr=1e-5)
    criterion = nn.BCEWithLogitsLoss()

    print("Training BERT Model")
    train_and_evaluate_model(bert_model, train_loader, val_loader, test_loader, optimizer, criterion, num_epochs=10, device=device)

    # Save the model
    torch.save(bert_model.state_dict(), 'bert_model.pth')

    # Test with a new utterance
    utterance = "What type of food would you like?"
    predicted_labels = predict(bert_model, utterance, tokenizer, label_list, max_len, device)
    print(f"Predicted Dialogue Acts for the utterance: {predicted_labels}")

In [28]:
main()

Training BERT Model


KeyboardInterrupt: 