In [109]:
import json
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import f1_score
from tqdm import tqdm
import os
import re
import gensim.downloader as api

import nltk
from nltk.tokenize import word_tokenize

# Importing conlleval for evaluation
from conlleval import evaluate

In [110]:
def preprocess_data(input_file, output_file):
    with open(input_file, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    processed_data = []
    
    for item in data:
        sentence = item['sentence']
        aspect_terms = item['aspect_terms']
        
        # Use NLTK's tokenizer for better accuracy
        tokens = word_tokenize(sentence)
        
        # Initialize all labels as 'O'
        labels = ['O'] * len(tokens)
        
        # Extract aspect terms
        terms = []
        
        for aspect in aspect_terms:
            term = aspect['term']
            terms.append(term)
            
            # Get the start and end positions
            start = int(aspect['from'])
            end = int(aspect['to'])
            
            # Find the tokens that correspond to this aspect term
            term_indices = []
            char_index = 0
            for i, token in enumerate(tokens):
                token_start = char_index
                token_end = token_start + len(token)
                
                # Check if this token overlaps with the aspect term
                if token_end > start and token_start < end:
                    term_indices.append(i)
                
                char_index = token_end + 1  # +1 for the space
            
            # Apply BIO tagging
            if term_indices:
                labels[term_indices[0]] = 'B'  # Beginning of aspect term
                for idx in term_indices[1:]:
                    labels[idx] = 'I'  # Inside of aspect term
        
        processed_item = {
            'sentence': sentence,
            'tokens': tokens,
            'labels': labels,
            'aspect_terms': terms
        }
        
        processed_data.append(processed_item)
    
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(processed_data, f, indent=2)
    
    return processed_data

In [111]:
class AspectTermDataset(Dataset):
    def __init__(self, data, embedding_model, label_to_idx):
        self.data = data
        self.embedding_model = embedding_model
        self.embedding_dim = embedding_model.vector_size
        self.label_to_idx = label_to_idx
        self.unk_vector = np.zeros(self.embedding_dim)  # Zero vector for unknown words
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        item = self.data[idx]
        tokens = item['tokens']
        labels = item['labels']
        
        # Get word embeddings directly from the model
        token_embeddings = []
        for token in tokens:
            token_lower = token.lower()
            if token_lower in self.embedding_model:
                token_embeddings.append(self.embedding_model[token_lower])
            else:
                token_embeddings.append(self.unk_vector)
        
        token_embeddings = np.array(token_embeddings)
        label_indices = [self.label_to_idx[label] for label in labels]
        
        return {
            'embeddings': torch.tensor(token_embeddings, dtype=torch.float),
            'labels': torch.tensor(label_indices, dtype=torch.long),
            'lengths': len(tokens)
        }

In [112]:
# Collate function for batching
def collate_fn(batch):
    # Sort the batch by length in descending order
    batch = sorted(batch, key=lambda x: x['lengths'], reverse=True)
    
    # Get the length of each sequence
    lengths = [item['lengths'] for item in batch]
    
    # Get the maximum length in the batch
    max_length = max(lengths)
    embedding_dim = batch[0]['embeddings'].shape[1]
    
    # Pad the sequences
    embeddings = torch.zeros(len(batch), max_length, embedding_dim, dtype=torch.float)
    labels = torch.zeros(len(batch), max_length, dtype=torch.long)
    
    for i, item in enumerate(batch):
        embeddings[i, :item['lengths']] = item['embeddings']
        labels[i, :item['lengths']] = item['labels']
    
    return {
        'embeddings': embeddings,
        'labels': labels,
        'lengths': torch.tensor(lengths, dtype=torch.long)
    }

In [113]:
# Modified RNN Model (non-bidirectional)
class RNNModel(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, output_dim):
        super(RNNModel, self).__init__()
        
        # No embedding layer as we're using embeddings directly
        self.rnn = nn.RNN(embedding_dim, hidden_dim, batch_first=True, bidirectional=False)
        self.dropout = nn.Dropout(p=0.3)
        self.fc = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, embeddings, lengths):
        # Pack the sequences
        packed_embedded = nn.utils.rnn.pack_padded_sequence(embeddings, lengths.cpu(), batch_first=True, enforce_sorted=False)
        
        packed_output, hidden = self.rnn(packed_embedded)
        
        output, _ = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=True)
        
        output = self.dropout(output)
        logits = self.fc(output)
        
        return logits

# Modified GRU Model (non-bidirectional)
class GRUModel(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, output_dim):
        super(GRUModel, self).__init__()
        
        # No embedding layer as we're using embeddings directly
        self.gru = nn.GRU(embedding_dim, hidden_dim, batch_first=True, bidirectional=False)
        self.dropout = nn.Dropout(p=0.3)
        self.fc = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, embeddings, lengths):
        # Pack the sequences
        packed_embedded = nn.utils.rnn.pack_padded_sequence(embeddings, lengths.cpu(), batch_first=True, enforce_sorted=False)
        
        packed_output, hidden = self.gru(packed_embedded)
        
        output, _ = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=True)
        
        output = self.dropout(output)
        logits = self.fc(output)
        
        return logits

In [114]:
# # Load GloVe embeddings
# def load_glove_embeddings(path, word_to_idx, embedding_dim=300):
#     embeddings = np.zeros((len(word_to_idx), embedding_dim))
    
#     with open(path, 'r', encoding='utf-8') as f:
#         for line in f:
#             values = line.split()
#             word = values[0]
#             if word in word_to_idx:
#                 vector = np.asarray(values[1:], dtype='float32')
#                 embeddings[word_to_idx[word]] = vector
    
#     return torch.FloatTensor(embeddings)

In [115]:
# # Load FastText embeddings
# def load_fasttext_embeddings(path, word_to_idx, embedding_dim=300):
#     model = KeyedVectors.load_word2vec_format(path, binary=False)
#     embeddings = np.zeros((len(word_to_idx), embedding_dim))
    
#     for word, idx in word_to_idx.items():
#         if word in model:
#             embeddings[idx] = model[word]
    
#     return torch.FloatTensor(embeddings)

In [116]:
# Evaluation function
def evaluate_model(model, data_loader, criterion, device):
    model.eval()
    total_loss = 0
    all_predictions = []
    all_labels = []
    
    with torch.no_grad():
        for batch in data_loader:
            embeddings = batch['embeddings'].to(device)
            labels = batch['labels'].to(device)
            lengths = batch['lengths']
            
            outputs = model(embeddings, lengths)
            
            # Reshape outputs and labels for loss calculation
            outputs_flat = outputs.view(-1, outputs.shape[-1])
            labels_flat = labels.view(-1)
            
            # Calculate loss (ignore padding)
            mask = labels_flat != 0  # Assuming 0 is the padding index
            loss = criterion(outputs_flat[mask], labels_flat[mask])
            
            total_loss += loss.item()
            
            # Get predictions
            _, predictions = torch.max(outputs, dim=2)
            
            # Collect predictions and labels (ignoring padding)
            for i in range(len(lengths)):
                length = lengths[i].item()
                pred = predictions[i, :length].cpu().numpy()
                lab = labels[i, :length].cpu().numpy()
                
                all_predictions.extend(pred)
                all_labels.extend(lab)
    
    # Calculate F1 score
    f1 = f1_score(all_labels, all_predictions, average='weighted')
    
    return total_loss / len(data_loader), f1

In [117]:
# Training function
def train(model, train_loader, val_loader, optimizer, criterion, device, epochs, model_save_path):
    best_f1 = 0.0
    train_losses = []
    val_losses = []
    
    for epoch in range(epochs):
        model.train()
        epoch_loss = 0
        
        for batch in tqdm(train_loader, desc=f'Epoch {epoch+1}/{epochs}'):
            embeddings = batch['embeddings'].to(device)
            labels = batch['labels'].to(device)
            lengths = batch['lengths']
            
            optimizer.zero_grad()
            
            outputs = model(embeddings, lengths)
            
            # Reshape outputs and labels for loss calculation
            outputs = outputs.view(-1, outputs.shape[-1])
            labels = labels.view(-1)
            
            # Calculate loss (ignore padding)
            mask = labels != 0  # Assuming 0 is the padding index
            loss = criterion(outputs[mask], labels[mask])
            
            loss.backward()
            optimizer.step()
            
            epoch_loss += loss.item()
        
        train_loss = epoch_loss / len(train_loader)
        train_losses.append(train_loss)
        
        # Validation
        val_loss, val_f1 = evaluate_model(model, val_loader, criterion, device)
        val_losses.append(val_loss)
        
        print(f'Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val F1: {val_f1:.4f}')
        
        # Save the best model
        if val_f1 > best_f1:
            best_f1 = val_f1
            torch.save(model.state_dict(), model_save_path)
            print(f'Model saved to {model_save_path}')
    
    return train_losses, val_losses

In [118]:
# Convert predictions to BIO format
def convert_to_bio(idx_to_label, predictions, lengths):
    bio_predictions = []
    
    for i, length in enumerate(lengths):
        # Convert tensor values to Python integers before dictionary lookup
        bio_predictions.append([idx_to_label[pred.item()] for pred in predictions[i, :length]])
    
    return bio_predictions

In [119]:
def calculate_f1_conlleval(tokens, true_labels, pred_labels):
    results = []
    
    for sample_tokens, sample_true, sample_pred in zip(tokens, true_labels, pred_labels):
        for token, true, pred in zip(sample_tokens, sample_true, sample_pred):
            results.append(f"{token} {true} {pred}")
        results.append("")  # Empty line between sentences
    
    # Remove debug print
    eval_output = evaluate(results)
    
    # Extract chunk-level metrics
    chunk_metrics = eval_output['overall']['chunks']['evals']
    # Extract tag-level metrics
    tag_metrics = eval_output['overall']['tags']['evals']
    
    return chunk_metrics, tag_metrics

In [120]:
def plot_losses(train_losses, val_losses, title, save_path):
    plt.figure(figsize=(10, 6))
    plt.plot(train_losses, label='Training Loss')
    plt.plot(val_losses, label='Validation Loss')
    plt.title(title)
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)
    plt.savefig(save_path)
    plt.close()

In [121]:
def test_model(model, test_data, embedding_model, label_to_idx, idx_to_label, device):
    model.eval()
    
    all_predictions = []
    all_tokens = []
    all_true_labels = []
    
    embedding_dim = embedding_model.vector_size
    unk_vector = np.zeros(embedding_dim)  # Zero vector for unknown words
    
    # Process each item separately to maintain direct mapping to test_data
    for item in test_data:
        tokens = item['tokens']
        labels = item['labels']
        
        # Get word embeddings directly
        token_embeddings = []
        for token in tokens:
            token_lower = token.lower()
            if token_lower in embedding_model:
                token_embeddings.append(embedding_model[token_lower])
            else:
                token_embeddings.append(unk_vector)
        
        # Convert to tensor
        token_embeddings = np.array(token_embeddings)
        token_tensor = torch.tensor([token_embeddings], dtype=torch.float).to(device)
        length = torch.tensor([len(tokens)], dtype=torch.long)
        
        # Get predictions
        outputs = model(token_tensor, length)
        _, predictions = torch.max(outputs, dim=2)
        
        # Convert predictions to BIO format
        label_indices = [label_to_idx[label] for label in labels]
        bio_predictions = [idx_to_label[pred.item()] for pred in predictions[0, :len(tokens)]]
        bio_true = labels  # Already in BIO format
        
        all_predictions.append(bio_predictions)
        all_tokens.append(tokens)
        all_true_labels.append(bio_true)
    
    # Calculate both chunk-level and tag-level metrics using conlleval
    chunk_metrics, tag_metrics = calculate_f1_conlleval(all_tokens, all_true_labels, all_predictions)
    
    print("Chunk-level evaluation:")
    print(f"  Precision: {chunk_metrics['prec']:.4f}")
    print(f"  Recall:    {chunk_metrics['rec']:.4f}")
    print(f"  F1 Score:  {chunk_metrics['f1']:.4f}")
    
    print("\nTag-level evaluation:")
    print(f"  Precision: {tag_metrics['prec']:.4f}")
    print(f"  Recall:    {tag_metrics['rec']:.4f}")
    print(f"  F1 Score:  {tag_metrics['f1']:.4f}")
    
    return chunk_metrics, tag_metrics

In [122]:
def main():
    # Set device
    device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
    print(f'Using device: {device}')
    
    # Preprocess data
    print('Preprocessing data...')
    train_data = preprocess_data('train.json', 'train_task_1.json')
    val_data = preprocess_data('val.json', 'val_task_1.json')
    
    # Define label mapping
    label_to_idx = {'<PAD>': 0, 'O': 1, 'B': 2, 'I': 3}
    idx_to_label = {0: '<PAD>', 1: 'O', 2: 'B', 3: 'I'}
    
    # Save label mapping
    with open("label_mapping.json", "w") as f:
        json.dump({
            "label_to_idx": label_to_idx,
            "idx_to_label": idx_to_label
        }, f)
    
    # Load pretrained embeddings    
    try:
        print('Loading pretrained embeddings...')
        
        # Load models
        print('Loading models...')
        glove_model = api.load("glove-wiki-gigaword-300")
        print('GloVe model loaded')
        fasttext_model = api.load("fasttext-wiki-news-subwords-300")
        print('FastText model loaded')
        
        embedding_dim = 300
        
    except Exception as e:
        print(f"Error loading from gensim: {e}")
        print("Exiting as we need pre-trained embeddings.")
        return
    
    # Get the embedding dimension
    embedding_dim = glove_model.vector_size
    hidden_dim = 512
    output_dim = len(label_to_idx)
    
    # Create datasets and dataloaders
    print('Creating datasets and dataloaders...')
    train_dataset_glove = AspectTermDataset(train_data, glove_model, label_to_idx)
    val_dataset_glove = AspectTermDataset(val_data, glove_model, label_to_idx)
    train_dataset_fasttext = AspectTermDataset(train_data, fasttext_model, label_to_idx)
    val_dataset_fasttext = AspectTermDataset(val_data, fasttext_model, label_to_idx)
    
    batch_size = 16
    train_loader_glove = DataLoader(train_dataset_glove, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
    val_loader_glove = DataLoader(val_dataset_glove, batch_size=batch_size, collate_fn=collate_fn)
    train_loader_fasttext = DataLoader(train_dataset_fasttext, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
    val_loader_fasttext = DataLoader(val_dataset_fasttext, batch_size=batch_size, collate_fn=collate_fn)
    
    # Create the directory for saving models if it doesn't exist
    os.makedirs('models', exist_ok=True)
    os.makedirs('plots', exist_ok=True)
    
    # Define models
    models = {
        'RNN_GloVe': {
            'model': RNNModel(embedding_dim, hidden_dim, output_dim).to(device),
            'train_loader': train_loader_glove,
            'val_loader': val_loader_glove,
            'embedding_model': glove_model
        },
        'RNN_FastText': {
            'model': RNNModel(embedding_dim, hidden_dim, output_dim).to(device),
            'train_loader': train_loader_fasttext,
            'val_loader': val_loader_fasttext,
            'embedding_model': fasttext_model
        },
        'GRU_GloVe': {
            'model': GRUModel(embedding_dim, hidden_dim, output_dim).to(device),
            'train_loader': train_loader_glove,
            'val_loader': val_loader_glove,
            'embedding_model': glove_model
        },
        'GRU_FastText': {
            'model': GRUModel(embedding_dim, hidden_dim, output_dim).to(device),
            'train_loader': train_loader_fasttext,
            'val_loader': val_loader_fasttext,
            'embedding_model': fasttext_model
        }
    }
    
    criterion = nn.CrossEntropyLoss(ignore_index=0)  # Ignore padding index
    epochs = 15
    
    results = {}
    
    # Train and evaluate each model
    for name, model_config in models.items():
        print(f'\nTraining {name}...')
        model = model_config['model']
        train_loader = model_config['train_loader']
        val_loader = model_config['val_loader']
        
        optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-5)
        model_save_path = f'models/{name}_best.pt'
        
        train_losses, val_losses = train(model, train_loader, val_loader, optimizer, criterion, device, epochs, model_save_path)
        
        # Plot losses
        plot_losses(train_losses, val_losses, f'{name} Training and Validation Loss', f'plots/{name}_loss.png')
        
        # Load the best model
        model.load_state_dict(torch.load(model_save_path))
        
        # Evaluate on validation set
        chunk_metrics, tag_metrics = test_model(model, val_data, model_config['embedding_model'], label_to_idx, idx_to_label, device)
        
        results[name] = {
            'chunk_f1': chunk_metrics['f1'],
            'chunk_prec': chunk_metrics['prec'],
            'chunk_rec': chunk_metrics['rec'],
            'tag_f1': tag_metrics['f1'],
            'tag_prec': tag_metrics['prec'],
            'tag_rec': tag_metrics['rec']
        }
        
        print(f'{name} validation results:')
        print(f"  Chunk-level -> Precision: {chunk_metrics['prec']:.4f}, Recall: {chunk_metrics['rec']:.4f}, F1: {chunk_metrics['f1']:.4f}")
        print(f"  Tag-level   -> Precision: {tag_metrics['prec']:.4f}, Recall: {tag_metrics['rec']:.4f}, F1: {tag_metrics['f1']:.4f}")
    
    # Print results summary
    print('\nResults Summary:')
    for name, result in results.items():
        print(f"{name}: Chunk F1 = {result['chunk_f1']:.4f}, Tag F1 = {result['tag_f1']:.4f}")
    
    # Find the best model based on chunk-level F1
    best_model_name = max(results, key=lambda x: results[x]['chunk_f1'])
    print(f'\nBest model: {best_model_name} with Chunk F1 = {results[best_model_name]["chunk_f1"]:.4f} and Tag F1 = {results[best_model_name]["tag_f1"]:.4f}')
    
    # Save best model info
    with open('best_model_info.json', 'w') as f:
        json.dump({
            'model_name': best_model_name,
            'chunk_f1': results[best_model_name]['chunk_f1'],
            'tag_f1': results[best_model_name]['tag_f1']
        }, f)
    
    # Save embedding model information
    with open('embedding_model_info.json', 'w') as f:
        json.dump({
            'best_model': best_model_name,
            'glove_model_name': "glove-wiki-gigaword-300",
            'fasttext_model_name': "fasttext-wiki-news-subwords-300"
        }, f)

In [123]:
def load_and_test(test_file, model_path, model_type, embedding_model_name):
    # Load label mapping
    with open("label_mapping.json", "r") as f:
        label_data = json.load(f)
    label_to_idx = label_data["label_to_idx"]
    # Convert idx_to_label keys to integers
    idx_to_label = {int(k): v for k, v in label_data["idx_to_label"].items()}
    
    # Process the test file
    test_data = preprocess_data(test_file, "test_task_1.json")
    
    # Set device
    device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
    
    # Load embedding model
    try:
        print(f'Loading {embedding_model_name} embeddings...')
        embedding_model = api.load(embedding_model_name)
        print(f'{embedding_model_name} loaded')
    except Exception as e:
        print(f"Error loading embedding model: {e}")
        return None, None
    
    # Get the embedding dimension
    embedding_dim = embedding_dim = embedding_model.vector_size
    hidden_dim = 512
    output_dim = len(label_to_idx)
    
    # Initialize model
    if model_type.startswith('RNN'):
        model = RNNModel(embedding_dim, hidden_dim, output_dim).to(device)
    elif model_type.startswith('GRU'):
        model = GRUModel(embedding_dim, hidden_dim, output_dim).to(device)
    else:
        raise ValueError(f"Unknown model type: {model_type}")
    
    # Load model weights
    model.load_state_dict(torch.load(model_path, map_location=device))
    
    # Test the model
    chunk_metrics, tag_metrics = test_model(model, test_data, embedding_model, label_to_idx, idx_to_label, device)
    
    print("Test Results:")
    print("Chunk-level:")
    print(f"  Precision: {chunk_metrics['prec']:.4f}")
    print(f"  Recall:    {chunk_metrics['rec']:.4f}")
    print(f"  F1 Score:  {chunk_metrics['f1']:.4f}")
    print("Tag-level:")
    print(f"  Precision: {tag_metrics['prec']:.4f}")
    print(f"  Recall:    {tag_metrics['rec']:.4f}")
    print(f"  F1 Score:  {tag_metrics['f1']:.4f}")
    
    return chunk_metrics, tag_metrics

def test_with_best_model(test_file):
    # Load best model info
    with open('best_model_info.json', 'r') as f:
        best_model_info = json.load(f)
    
    # Load embedding model info
    with open('embedding_model_info.json', 'r') as f:
        embedding_info = json.load(f)
    
    best_model_name = best_model_info['model_name']
    model_path = f'models/{best_model_name}_best.pt'
    
    # Determine which embedding model to use
    if 'GloVe' in best_model_name:
        embedding_model_name = embedding_info['glove_model_name']
    else:
        embedding_model_name = embedding_info['fasttext_model_name']
    
    # Extract model type (RNN or GRU)
    model_type = best_model_name.split('_')[0]
    
    print(f"Testing with best model: {best_model_name}")
    print(f"Using embedding model: {embedding_model_name}")
    print(f"Model type: {model_type}")
    
    # Test the model
    chunk_metrics, tag_metrics = load_and_test(test_file, model_path, model_type, embedding_model_name)
    
    return chunk_metrics, tag_metrics

In [124]:
if __name__ == "__main__":
    # Create directory for plots
    os.makedirs('plots', exist_ok=True)
    
    # Run main function
    main()

Using device: cuda:1
Preprocessing data...


Loading pretrained embeddings...
Loading models...
GloVe model loaded
FastText model loaded
Creating datasets and dataloaders...

Training RNN_GloVe...


Epoch 1/15: 100%|██████████| 153/153 [00:00<00:00, 211.59it/s]


Epoch 1/15, Train Loss: 0.2821, Val Loss: 0.2307, Val F1: 0.9222
Model saved to models/RNN_GloVe_best.pt


Epoch 2/15: 100%|██████████| 153/153 [00:00<00:00, 217.87it/s]


Epoch 2/15, Train Loss: 0.2303, Val Loss: 0.1999, Val F1: 0.9276
Model saved to models/RNN_GloVe_best.pt


Epoch 3/15: 100%|██████████| 153/153 [00:00<00:00, 214.80it/s]


Epoch 3/15, Train Loss: 0.2152, Val Loss: 0.2073, Val F1: 0.9175


Epoch 4/15: 100%|██████████| 153/153 [00:00<00:00, 212.52it/s]


Epoch 4/15, Train Loss: 0.2117, Val Loss: 0.1981, Val F1: 0.9285
Model saved to models/RNN_GloVe_best.pt


Epoch 5/15: 100%|██████████| 153/153 [00:00<00:00, 209.71it/s]


Epoch 5/15, Train Loss: 0.2096, Val Loss: 0.2058, Val F1: 0.9267


Epoch 6/15: 100%|██████████| 153/153 [00:00<00:00, 216.38it/s]


Epoch 6/15, Train Loss: 0.1994, Val Loss: 0.2021, Val F1: 0.9294
Model saved to models/RNN_GloVe_best.pt


Epoch 7/15: 100%|██████████| 153/153 [00:00<00:00, 223.21it/s]


Epoch 7/15, Train Loss: 0.1931, Val Loss: 0.1934, Val F1: 0.9284


Epoch 8/15: 100%|██████████| 153/153 [00:00<00:00, 218.24it/s]


Epoch 8/15, Train Loss: 0.1899, Val Loss: 0.1919, Val F1: 0.9273


Epoch 9/15: 100%|██████████| 153/153 [00:00<00:00, 219.46it/s]


Epoch 9/15, Train Loss: 0.1754, Val Loss: 0.1958, Val F1: 0.9324
Model saved to models/RNN_GloVe_best.pt


Epoch 10/15: 100%|██████████| 153/153 [00:00<00:00, 218.97it/s]


Epoch 10/15, Train Loss: 0.1656, Val Loss: 0.1999, Val F1: 0.9279


Epoch 11/15: 100%|██████████| 153/153 [00:00<00:00, 219.52it/s]


Epoch 11/15, Train Loss: 0.1560, Val Loss: 0.1891, Val F1: 0.9281


Epoch 12/15: 100%|██████████| 153/153 [00:00<00:00, 219.60it/s]


Epoch 12/15, Train Loss: 0.1501, Val Loss: 0.1946, Val F1: 0.9314


Epoch 13/15: 100%|██████████| 153/153 [00:00<00:00, 219.54it/s]


Epoch 13/15, Train Loss: 0.1795, Val Loss: 0.2086, Val F1: 0.9273


Epoch 14/15: 100%|██████████| 153/153 [00:00<00:00, 219.16it/s]


Epoch 14/15, Train Loss: 0.1395, Val Loss: 0.1817, Val F1: 0.9319


Epoch 15/15: 100%|██████████| 153/153 [00:00<00:00, 216.85it/s]


Epoch 15/15, Train Loss: 0.1255, Val Loss: 0.2109, Val F1: 0.9286
Chunk-level evaluation:
  Precision: 0.6124
  Recall:    0.5892
  F1 Score:  0.6006

Tag-level evaluation:
  Precision: 0.9378
  Recall:    0.9378
  F1 Score:  0.9378
RNN_GloVe validation results:
  Chunk-level -> Precision: 0.6124, Recall: 0.5892, F1: 0.6006
  Tag-level   -> Precision: 0.9378, Recall: 0.9378, F1: 0.9378

Training RNN_FastText...


Epoch 1/15: 100%|██████████| 153/153 [00:00<00:00, 218.12it/s]


Epoch 1/15, Train Loss: 0.3327, Val Loss: 0.2379, Val F1: 0.9182
Model saved to models/RNN_FastText_best.pt


Epoch 2/15: 100%|██████████| 153/153 [00:00<00:00, 218.26it/s]


Epoch 2/15, Train Loss: 0.2348, Val Loss: 0.2027, Val F1: 0.9273
Model saved to models/RNN_FastText_best.pt


Epoch 3/15: 100%|██████████| 153/153 [00:00<00:00, 215.19it/s]


Epoch 3/15, Train Loss: 0.2183, Val Loss: 0.2233, Val F1: 0.9240


Epoch 4/15: 100%|██████████| 153/153 [00:00<00:00, 217.25it/s]


Epoch 4/15, Train Loss: 0.2157, Val Loss: 0.2115, Val F1: 0.9269


Epoch 5/15: 100%|██████████| 153/153 [00:00<00:00, 217.68it/s]


Epoch 5/15, Train Loss: 0.2045, Val Loss: 0.1857, Val F1: 0.9315
Model saved to models/RNN_FastText_best.pt


Epoch 6/15: 100%|██████████| 153/153 [00:00<00:00, 217.33it/s]


Epoch 6/15, Train Loss: 0.2021, Val Loss: 0.1907, Val F1: 0.9300


Epoch 7/15: 100%|██████████| 153/153 [00:00<00:00, 217.78it/s]


Epoch 7/15, Train Loss: 0.1999, Val Loss: 0.1958, Val F1: 0.9333
Model saved to models/RNN_FastText_best.pt


Epoch 8/15: 100%|██████████| 153/153 [00:00<00:00, 226.15it/s]


Epoch 8/15, Train Loss: 0.1991, Val Loss: 0.1921, Val F1: 0.9311


Epoch 9/15: 100%|██████████| 153/153 [00:00<00:00, 235.54it/s]


Epoch 9/15, Train Loss: 0.1960, Val Loss: 0.1833, Val F1: 0.9316


Epoch 10/15: 100%|██████████| 153/153 [00:00<00:00, 231.15it/s]


Epoch 10/15, Train Loss: 0.1900, Val Loss: 0.1826, Val F1: 0.9340
Model saved to models/RNN_FastText_best.pt


Epoch 11/15: 100%|██████████| 153/153 [00:00<00:00, 232.54it/s]


Epoch 11/15, Train Loss: 0.2029, Val Loss: 0.2152, Val F1: 0.9239


Epoch 12/15: 100%|██████████| 153/153 [00:00<00:00, 232.86it/s]


Epoch 12/15, Train Loss: 0.3646, Val Loss: 0.2332, Val F1: 0.9201


Epoch 13/15: 100%|██████████| 153/153 [00:00<00:00, 233.83it/s]


Epoch 13/15, Train Loss: 0.2707, Val Loss: 0.2271, Val F1: 0.9134


Epoch 14/15: 100%|██████████| 153/153 [00:00<00:00, 235.34it/s]


Epoch 14/15, Train Loss: 0.2435, Val Loss: 0.1934, Val F1: 0.9314


Epoch 15/15: 100%|██████████| 153/153 [00:00<00:00, 232.15it/s]


Epoch 15/15, Train Loss: 0.2395, Val Loss: 0.1949, Val F1: 0.9268
Chunk-level evaluation:
  Precision: 0.6229
  Recall:    0.5892
  F1 Score:  0.6056

Tag-level evaluation:
  Precision: 0.9384
  Recall:    0.9384
  F1 Score:  0.9384
RNN_FastText validation results:
  Chunk-level -> Precision: 0.6229, Recall: 0.5892, F1: 0.6056
  Tag-level   -> Precision: 0.9384, Recall: 0.9384, F1: 0.9384

Training GRU_GloVe...


Epoch 1/15: 100%|██████████| 153/153 [00:00<00:00, 231.12it/s]


Epoch 1/15, Train Loss: 0.2771, Val Loss: 0.1954, Val F1: 0.9290
Model saved to models/GRU_GloVe_best.pt


Epoch 2/15: 100%|██████████| 153/153 [00:00<00:00, 223.94it/s]


Epoch 2/15, Train Loss: 0.2079, Val Loss: 0.1749, Val F1: 0.9322
Model saved to models/GRU_GloVe_best.pt


Epoch 3/15: 100%|██████████| 153/153 [00:00<00:00, 227.66it/s]


Epoch 3/15, Train Loss: 0.1847, Val Loss: 0.1723, Val F1: 0.9363
Model saved to models/GRU_GloVe_best.pt


Epoch 4/15: 100%|██████████| 153/153 [00:00<00:00, 228.44it/s]


Epoch 4/15, Train Loss: 0.1627, Val Loss: 0.1630, Val F1: 0.9408
Model saved to models/GRU_GloVe_best.pt


Epoch 5/15: 100%|██████████| 153/153 [00:00<00:00, 230.23it/s]


Epoch 5/15, Train Loss: 0.1447, Val Loss: 0.1590, Val F1: 0.9411
Model saved to models/GRU_GloVe_best.pt


Epoch 6/15: 100%|██████████| 153/153 [00:00<00:00, 227.27it/s]


Epoch 6/15, Train Loss: 0.1227, Val Loss: 0.1655, Val F1: 0.9366


Epoch 7/15: 100%|██████████| 153/153 [00:00<00:00, 229.66it/s]


Epoch 7/15, Train Loss: 0.1064, Val Loss: 0.1753, Val F1: 0.9399


Epoch 8/15: 100%|██████████| 153/153 [00:00<00:00, 229.89it/s]


Epoch 8/15, Train Loss: 0.0874, Val Loss: 0.1753, Val F1: 0.9384


Epoch 9/15: 100%|██████████| 153/153 [00:00<00:00, 227.93it/s]


Epoch 9/15, Train Loss: 0.0679, Val Loss: 0.2101, Val F1: 0.9375


Epoch 10/15: 100%|██████████| 153/153 [00:00<00:00, 223.75it/s]


Epoch 10/15, Train Loss: 0.0544, Val Loss: 0.2067, Val F1: 0.9353


Epoch 11/15: 100%|██████████| 153/153 [00:00<00:00, 224.89it/s]


Epoch 11/15, Train Loss: 0.0412, Val Loss: 0.2322, Val F1: 0.9378


Epoch 12/15: 100%|██████████| 153/153 [00:00<00:00, 226.47it/s]


Epoch 12/15, Train Loss: 0.0316, Val Loss: 0.2416, Val F1: 0.9359


Epoch 13/15: 100%|██████████| 153/153 [00:00<00:00, 231.49it/s]


Epoch 13/15, Train Loss: 0.0254, Val Loss: 0.2695, Val F1: 0.9265


Epoch 14/15: 100%|██████████| 153/153 [00:00<00:00, 229.76it/s]


Epoch 14/15, Train Loss: 0.0274, Val Loss: 0.2840, Val F1: 0.9326


Epoch 15/15: 100%|██████████| 153/153 [00:00<00:00, 231.17it/s]


Epoch 15/15, Train Loss: 0.0162, Val Loss: 0.3037, Val F1: 0.9339
Chunk-level evaluation:
  Precision: 0.6488
  Recall:    0.5892
  F1 Score:  0.6176

Tag-level evaluation:
  Precision: 0.9452
  Recall:    0.9452
  F1 Score:  0.9452
GRU_GloVe validation results:
  Chunk-level -> Precision: 0.6488, Recall: 0.5892, F1: 0.6176
  Tag-level   -> Precision: 0.9452, Recall: 0.9452, F1: 0.9452

Training GRU_FastText...


Epoch 1/15: 100%|██████████| 153/153 [00:00<00:00, 209.86it/s]


Epoch 1/15, Train Loss: 0.3516, Val Loss: 0.2099, Val F1: 0.9248
Model saved to models/GRU_FastText_best.pt


Epoch 2/15: 100%|██████████| 153/153 [00:00<00:00, 207.45it/s]


Epoch 2/15, Train Loss: 0.2185, Val Loss: 0.1866, Val F1: 0.9316
Model saved to models/GRU_FastText_best.pt


Epoch 3/15: 100%|██████████| 153/153 [00:00<00:00, 224.44it/s]


Epoch 3/15, Train Loss: 0.2036, Val Loss: 0.1756, Val F1: 0.9323
Model saved to models/GRU_FastText_best.pt


Epoch 4/15: 100%|██████████| 153/153 [00:00<00:00, 224.75it/s]


Epoch 4/15, Train Loss: 0.1883, Val Loss: 0.1642, Val F1: 0.9401
Model saved to models/GRU_FastText_best.pt


Epoch 5/15: 100%|██████████| 153/153 [00:00<00:00, 227.25it/s]


Epoch 5/15, Train Loss: 0.1765, Val Loss: 0.1670, Val F1: 0.9390


Epoch 6/15: 100%|██████████| 153/153 [00:00<00:00, 221.97it/s]


Epoch 6/15, Train Loss: 0.1648, Val Loss: 0.1694, Val F1: 0.9377


Epoch 7/15: 100%|██████████| 153/153 [00:00<00:00, 214.54it/s]


Epoch 7/15, Train Loss: 0.1584, Val Loss: 0.1569, Val F1: 0.9418
Model saved to models/GRU_FastText_best.pt


Epoch 8/15: 100%|██████████| 153/153 [00:00<00:00, 216.36it/s]


Epoch 8/15, Train Loss: 0.1489, Val Loss: 0.1615, Val F1: 0.9408


Epoch 9/15: 100%|██████████| 153/153 [00:00<00:00, 221.39it/s]


Epoch 9/15, Train Loss: 0.1416, Val Loss: 0.1602, Val F1: 0.9399


Epoch 10/15: 100%|██████████| 153/153 [00:00<00:00, 216.37it/s]


Epoch 10/15, Train Loss: 0.1337, Val Loss: 0.1691, Val F1: 0.9418
Model saved to models/GRU_FastText_best.pt


Epoch 11/15: 100%|██████████| 153/153 [00:00<00:00, 217.15it/s]


Epoch 11/15, Train Loss: 0.1265, Val Loss: 0.1586, Val F1: 0.9417


Epoch 12/15: 100%|██████████| 153/153 [00:00<00:00, 226.51it/s]


Epoch 12/15, Train Loss: 0.1179, Val Loss: 0.1639, Val F1: 0.9437
Model saved to models/GRU_FastText_best.pt


Epoch 13/15: 100%|██████████| 153/153 [00:00<00:00, 217.85it/s]


Epoch 13/15, Train Loss: 0.1074, Val Loss: 0.1665, Val F1: 0.9405


Epoch 14/15: 100%|██████████| 153/153 [00:00<00:00, 211.19it/s]


Epoch 14/15, Train Loss: 0.0986, Val Loss: 0.1713, Val F1: 0.9403


Epoch 15/15: 100%|██████████| 153/153 [00:00<00:00, 186.14it/s]


Epoch 15/15, Train Loss: 0.0906, Val Loss: 0.1775, Val F1: 0.9424
Chunk-level evaluation:
  Precision: 0.6410
  Recall:    0.6514
  F1 Score:  0.6461

Tag-level evaluation:
  Precision: 0.9458
  Recall:    0.9458
  F1 Score:  0.9458
GRU_FastText validation results:
  Chunk-level -> Precision: 0.6410, Recall: 0.6514, F1: 0.6461
  Tag-level   -> Precision: 0.9458, Recall: 0.9458, F1: 0.9458

Results Summary:
RNN_GloVe: Chunk F1 = 0.6006, Tag F1 = 0.9378
RNN_FastText: Chunk F1 = 0.6056, Tag F1 = 0.9384
GRU_GloVe: Chunk F1 = 0.6176, Tag F1 = 0.9452
GRU_FastText: Chunk F1 = 0.6461, Tag F1 = 0.9458

Best model: GRU_FastText with Chunk F1 = 0.6461 and Tag F1 = 0.9458


In [125]:
# test function:
chunk_metrics, tag_metrics = test_with_best_model('val.json')

Testing with best model: GRU_FastText
Using embedding model: fasttext-wiki-news-subwords-300
Model type: GRU
Loading fasttext-wiki-news-subwords-300 embeddings...
fasttext-wiki-news-subwords-300 loaded
Chunk-level evaluation:
  Precision: 0.6410
  Recall:    0.6514
  F1 Score:  0.6461

Tag-level evaluation:
  Precision: 0.9458
  Recall:    0.9458
  F1 Score:  0.9458
Test Results:
Chunk-level:
  Precision: 0.6410
  Recall:    0.6514
  F1 Score:  0.6461
Tag-level:
  Precision: 0.9458
  Recall:    0.9458
  F1 Score:  0.9458
