In [1]:
import json
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import f1_score
from tqdm import tqdm
import os
import re
import gensim.downloader as api

import nltk
from nltk.tokenize import word_tokenize

# Importing conlleval for evaluation
from conlleval import evaluate

In [2]:
def preprocess_data(input_file, output_file):
    with open(input_file, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    processed_data = []
    
    for item in data:
        sentence = item['sentence']
        aspect_terms = item['aspect_terms']
        
        # Use NLTK's tokenizer for better accuracy
        tokens = word_tokenize(sentence)
        
        # Initialize all labels as 'O'
        labels = ['O'] * len(tokens)
        
        # Extract aspect terms
        terms = []
        
        for aspect in aspect_terms:
            term = aspect['term']
            terms.append(term)
            
            # Get the start and end positions
            start = int(aspect['from'])
            end = int(aspect['to'])
            
            # Find the tokens that correspond to this aspect term
            term_indices = []
            char_index = 0
            for i, token in enumerate(tokens):
                token_start = char_index
                token_end = token_start + len(token)
                
                # Check if this token overlaps with the aspect term
                if token_end > start and token_start < end:
                    term_indices.append(i)
                
                char_index = token_end + 1  # +1 for the space
            
            # Apply BIO tagging
            if term_indices:
                labels[term_indices[0]] = 'B'  # Beginning of aspect term
                for idx in term_indices[1:]:
                    labels[idx] = 'I'  # Inside of aspect term
        
        processed_item = {
            'sentence': sentence,
            'tokens': tokens,
            'labels': labels,
            'aspect_terms': terms
        }
        
        processed_data.append(processed_item)
    
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(processed_data, f, indent=2)
    
    return processed_data

In [3]:
class AspectTermDataset(Dataset):
    def __init__(self, data, embedding_model, label_to_idx):
        self.data = data
        self.embedding_model = embedding_model
        self.embedding_dim = embedding_model.vector_size
        self.label_to_idx = label_to_idx
        self.unk_vector = np.zeros(self.embedding_dim)  # Zero vector for unknown words
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        item = self.data[idx]
        tokens = item['tokens']
        labels = item['labels']
        
        # Get word embeddings directly from the model
        token_embeddings = []
        for token in tokens:
            token_lower = token.lower()
            if token_lower in self.embedding_model:
                token_embeddings.append(self.embedding_model[token_lower])
            else:
                token_embeddings.append(self.unk_vector)
        
        token_embeddings = np.array(token_embeddings)
        label_indices = [self.label_to_idx[label] for label in labels]
        
        return {
            'embeddings': torch.tensor(token_embeddings, dtype=torch.float),
            'labels': torch.tensor(label_indices, dtype=torch.long),
            'lengths': len(tokens)
        }

In [4]:
# Collate function for batching
def collate_fn(batch):
    # Sort the batch by length in descending order
    batch = sorted(batch, key=lambda x: x['lengths'], reverse=True)
    
    # Get the length of each sequence
    lengths = [item['lengths'] for item in batch]
    
    # Get the maximum length in the batch
    max_length = max(lengths)
    embedding_dim = batch[0]['embeddings'].shape[1]
    
    # Pad the sequences
    embeddings = torch.zeros(len(batch), max_length, embedding_dim, dtype=torch.float)
    labels = torch.zeros(len(batch), max_length, dtype=torch.long)
    
    for i, item in enumerate(batch):
        embeddings[i, :item['lengths']] = item['embeddings']
        labels[i, :item['lengths']] = item['labels']
    
    return {
        'embeddings': embeddings,
        'labels': labels,
        'lengths': torch.tensor(lengths, dtype=torch.long)
    }

In [5]:
# Modified RNN Model (non-bidirectional)
class RNNModel(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, output_dim):
        super(RNNModel, self).__init__()
        
        # No embedding layer as we're using embeddings directly
        self.rnn = nn.RNN(embedding_dim, hidden_dim, batch_first=True, bidirectional=False)
        self.dropout = nn.Dropout(p=0.3)
        self.fc = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, embeddings, lengths):
        # Pack the sequences
        packed_embedded = nn.utils.rnn.pack_padded_sequence(embeddings, lengths.cpu(), batch_first=True, enforce_sorted=False)
        
        packed_output, hidden = self.rnn(packed_embedded)
        
        output, _ = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=True)
        
        output = self.dropout(output)
        logits = self.fc(output)
        
        return logits

# Modified GRU Model (non-bidirectional)
class GRUModel(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, output_dim):
        super(GRUModel, self).__init__()
        
        # No embedding layer as we're using embeddings directly
        self.gru = nn.GRU(embedding_dim, hidden_dim, batch_first=True, bidirectional=False)
        self.dropout = nn.Dropout(p=0.3)
        self.fc = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, embeddings, lengths):
        # Pack the sequences
        packed_embedded = nn.utils.rnn.pack_padded_sequence(embeddings, lengths.cpu(), batch_first=True, enforce_sorted=False)
        
        packed_output, hidden = self.gru(packed_embedded)
        
        output, _ = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=True)
        
        output = self.dropout(output)
        logits = self.fc(output)
        
        return logits

In [6]:
# # Load GloVe embeddings
# def load_glove_embeddings(path, word_to_idx, embedding_dim=300):
#     embeddings = np.zeros((len(word_to_idx), embedding_dim))
    
#     with open(path, 'r', encoding='utf-8') as f:
#         for line in f:
#             values = line.split()
#             word = values[0]
#             if word in word_to_idx:
#                 vector = np.asarray(values[1:], dtype='float32')
#                 embeddings[word_to_idx[word]] = vector
    
#     return torch.FloatTensor(embeddings)

In [7]:
# # Load FastText embeddings
# def load_fasttext_embeddings(path, word_to_idx, embedding_dim=300):
#     model = KeyedVectors.load_word2vec_format(path, binary=False)
#     embeddings = np.zeros((len(word_to_idx), embedding_dim))
    
#     for word, idx in word_to_idx.items():
#         if word in model:
#             embeddings[idx] = model[word]
    
#     return torch.FloatTensor(embeddings)

In [8]:
# Evaluation function
def evaluate_model(model, data_loader, criterion, device):
    model.eval()
    total_loss = 0
    all_predictions = []
    all_labels = []
    
    with torch.no_grad():
        for batch in data_loader:
            embeddings = batch['embeddings'].to(device)
            labels = batch['labels'].to(device)
            lengths = batch['lengths']
            
            outputs = model(embeddings, lengths)
            
            # Reshape outputs and labels for loss calculation
            outputs_flat = outputs.view(-1, outputs.shape[-1])
            labels_flat = labels.view(-1)
            
            # Calculate loss (ignore padding)
            mask = labels_flat != 0  # Assuming 0 is the padding index
            loss = criterion(outputs_flat[mask], labels_flat[mask])
            
            total_loss += loss.item()
            
            # Get predictions
            _, predictions = torch.max(outputs, dim=2)
            
            # Collect predictions and labels (ignoring padding)
            for i in range(len(lengths)):
                length = lengths[i].item()
                pred = predictions[i, :length].cpu().numpy()
                lab = labels[i, :length].cpu().numpy()
                
                all_predictions.extend(pred)
                all_labels.extend(lab)
    
    # Calculate F1 score
    f1 = f1_score(all_labels, all_predictions, average='weighted')
    
    return total_loss / len(data_loader), f1

In [9]:
# Training function
def train(model, train_loader, val_loader, optimizer, criterion, device, epochs, model_save_path):
    best_f1 = 0.0
    train_losses = []
    val_losses = []
    
    for epoch in range(epochs):
        model.train()
        epoch_loss = 0
        
        for batch in tqdm(train_loader, desc=f'Epoch {epoch+1}/{epochs}'):
            embeddings = batch['embeddings'].to(device)
            labels = batch['labels'].to(device)
            lengths = batch['lengths']
            
            optimizer.zero_grad()
            
            outputs = model(embeddings, lengths)
            
            # Reshape outputs and labels for loss calculation
            outputs = outputs.view(-1, outputs.shape[-1])
            labels = labels.view(-1)
            
            # Calculate loss (ignore padding)
            mask = labels != 0  # Assuming 0 is the padding index
            loss = criterion(outputs[mask], labels[mask])
            
            loss.backward()
            optimizer.step()
            
            epoch_loss += loss.item()
        
        train_loss = epoch_loss / len(train_loader)
        train_losses.append(train_loss)
        
        # Validation
        val_loss, val_f1 = evaluate_model(model, val_loader, criterion, device)
        val_losses.append(val_loss)
        
        print(f'Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val F1: {val_f1:.4f}')
        
        # Save the best model
        if val_f1 > best_f1:
            best_f1 = val_f1
            torch.save(model.state_dict(), model_save_path)
            print(f'Model saved to {model_save_path}')
    
    return train_losses, val_losses

In [10]:
# Convert predictions to BIO format
def convert_to_bio(idx_to_label, predictions, lengths):
    bio_predictions = []
    
    for i, length in enumerate(lengths):
        # Convert tensor values to Python integers before dictionary lookup
        bio_predictions.append([idx_to_label[pred.item()] for pred in predictions[i, :length]])
    
    return bio_predictions

In [11]:
def calculate_f1_conlleval(tokens, true_labels, pred_labels):
    results = []
    
    for sample_tokens, sample_true, sample_pred in zip(tokens, true_labels, pred_labels):
        for token, true, pred in zip(sample_tokens, sample_true, sample_pred):
            results.append(f"{token} {true} {pred}")
        results.append("")  # Empty line between sentences
    
    # Remove debug print
    eval_output = evaluate(results)
    
    # Extract chunk-level metrics
    chunk_metrics = eval_output['overall']['chunks']['evals']
    # Extract tag-level metrics
    tag_metrics = eval_output['overall']['tags']['evals']
    
    return chunk_metrics, tag_metrics

In [12]:
def plot_losses(train_losses, val_losses, title, save_path):
    plt.figure(figsize=(10, 6))
    plt.plot(train_losses, label='Training Loss')
    plt.plot(val_losses, label='Validation Loss')
    plt.title(title)
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)
    plt.savefig(save_path)
    plt.close()

In [13]:
def test_model(model, test_data, embedding_model, label_to_idx, idx_to_label, device):
    model.eval()
    
    all_predictions = []
    all_tokens = []
    all_true_labels = []
    
    embedding_dim = embedding_model.vector_size
    unk_vector = np.zeros(embedding_dim)  # Zero vector for unknown words
    
    # Process each item separately to maintain direct mapping to test_data
    for item in test_data:
        tokens = item['tokens']
        labels = item['labels']
        
        # Get word embeddings directly
        token_embeddings = []
        for token in tokens:
            token_lower = token.lower()
            if token_lower in embedding_model:
                token_embeddings.append(embedding_model[token_lower])
            else:
                token_embeddings.append(unk_vector)
        
        # Convert to tensor
        token_embeddings = np.array(token_embeddings)
        token_tensor = torch.tensor([token_embeddings], dtype=torch.float).to(device)
        length = torch.tensor([len(tokens)], dtype=torch.long)
        
        # Get predictions
        outputs = model(token_tensor, length)
        _, predictions = torch.max(outputs, dim=2)
        
        # Convert predictions to BIO format
        label_indices = [label_to_idx[label] for label in labels]
        bio_predictions = [idx_to_label[pred.item()] for pred in predictions[0, :len(tokens)]]
        bio_true = labels  # Already in BIO format
        
        all_predictions.append(bio_predictions)
        all_tokens.append(tokens)
        all_true_labels.append(bio_true)
    
    # Calculate both chunk-level and tag-level metrics using conlleval
    chunk_metrics, tag_metrics = calculate_f1_conlleval(all_tokens, all_true_labels, all_predictions)
    
    print("Chunk-level evaluation:")
    print(f"  Precision: {chunk_metrics['prec']:.4f}")
    print(f"  Recall:    {chunk_metrics['rec']:.4f}")
    print(f"  F1 Score:  {chunk_metrics['f1']:.4f}")
    
    print("\nTag-level evaluation:")
    print(f"  Precision: {tag_metrics['prec']:.4f}")
    print(f"  Recall:    {tag_metrics['rec']:.4f}")
    print(f"  F1 Score:  {tag_metrics['f1']:.4f}")
    
    return chunk_metrics, tag_metrics

In [14]:
def main():
    # Set device
    device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
    print(f'Using device: {device}')
    
    # Preprocess data
    print('Preprocessing data...')
    train_data = preprocess_data('train.json', 'train_task_1.json')
    val_data = preprocess_data('val.json', 'val_task_1.json')
    
    # Define label mapping
    label_to_idx = {'<PAD>': 0, 'O': 1, 'B': 2, 'I': 3}
    idx_to_label = {0: '<PAD>', 1: 'O', 2: 'B', 3: 'I'}
    
    # Save label mapping
    with open("label_mapping.json", "w") as f:
        json.dump({
            "label_to_idx": label_to_idx,
            "idx_to_label": idx_to_label
        }, f)
    
    # Load pretrained embeddings    
    try:
        print('Loading pretrained embeddings...')
        
        # Load models
        print('Loading models...')
        glove_model = api.load("glove-wiki-gigaword-300")
        print('GloVe model loaded')
        fasttext_model = api.load("fasttext-wiki-news-subwords-300")
        print('FastText model loaded')
        
        embedding_dim = 300
        
    except Exception as e:
        print(f"Error loading from gensim: {e}")
        print("Exiting as we need pre-trained embeddings.")
        return
    
    # Get the embedding dimension
    embedding_dim = glove_model.vector_size
    hidden_dim = 256
    output_dim = len(label_to_idx)
    
    # Create datasets and dataloaders
    print('Creating datasets and dataloaders...')
    train_dataset_glove = AspectTermDataset(train_data, glove_model, label_to_idx)
    val_dataset_glove = AspectTermDataset(val_data, glove_model, label_to_idx)
    train_dataset_fasttext = AspectTermDataset(train_data, fasttext_model, label_to_idx)
    val_dataset_fasttext = AspectTermDataset(val_data, fasttext_model, label_to_idx)
    
    batch_size = 32
    train_loader_glove = DataLoader(train_dataset_glove, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
    val_loader_glove = DataLoader(val_dataset_glove, batch_size=batch_size, collate_fn=collate_fn)
    train_loader_fasttext = DataLoader(train_dataset_fasttext, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
    val_loader_fasttext = DataLoader(val_dataset_fasttext, batch_size=batch_size, collate_fn=collate_fn)
    
    # Create the directory for saving models if it doesn't exist
    os.makedirs('models', exist_ok=True)
    os.makedirs('plots', exist_ok=True)
    
    # Define models
    models = {
        'RNN_GloVe': {
            'model': RNNModel(embedding_dim, hidden_dim, output_dim).to(device),
            'train_loader': train_loader_glove,
            'val_loader': val_loader_glove,
            'embedding_model': glove_model
        },
        'RNN_FastText': {
            'model': RNNModel(embedding_dim, hidden_dim, output_dim).to(device),
            'train_loader': train_loader_fasttext,
            'val_loader': val_loader_fasttext,
            'embedding_model': fasttext_model
        },
        'GRU_GloVe': {
            'model': GRUModel(embedding_dim, hidden_dim, output_dim).to(device),
            'train_loader': train_loader_glove,
            'val_loader': val_loader_glove,
            'embedding_model': glove_model
        },
        'GRU_FastText': {
            'model': GRUModel(embedding_dim, hidden_dim, output_dim).to(device),
            'train_loader': train_loader_fasttext,
            'val_loader': val_loader_fasttext,
            'embedding_model': fasttext_model
        }
    }
    
    criterion = nn.CrossEntropyLoss(ignore_index=0)  # Ignore padding index
    epochs = 30
    
    results = {}
    
    # Train and evaluate each model
    for name, model_config in models.items():
        print(f'\nTraining {name}...')
        model = model_config['model']
        train_loader = model_config['train_loader']
        val_loader = model_config['val_loader']
        
        optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-5)
        model_save_path = f'models/{name}_best.pt'
        
        train_losses, val_losses = train(model, train_loader, val_loader, optimizer, criterion, device, epochs, model_save_path)
        
        # Plot losses
        plot_losses(train_losses, val_losses, f'{name} Training and Validation Loss', f'plots/{name}_loss.png')
        
        # Load the best model
        model.load_state_dict(torch.load(model_save_path))
        
        # Evaluate on validation set
        chunk_metrics, tag_metrics = test_model(model, val_data, model_config['embedding_model'], label_to_idx, idx_to_label, device)
        
        results[name] = {
            'chunk_f1': chunk_metrics['f1'],
            'chunk_prec': chunk_metrics['prec'],
            'chunk_rec': chunk_metrics['rec'],
            'tag_f1': tag_metrics['f1'],
            'tag_prec': tag_metrics['prec'],
            'tag_rec': tag_metrics['rec']
        }
        
        print(f'{name} validation results:')
        print(f"  Chunk-level -> Precision: {chunk_metrics['prec']:.4f}, Recall: {chunk_metrics['rec']:.4f}, F1: {chunk_metrics['f1']:.4f}")
        print(f"  Tag-level   -> Precision: {tag_metrics['prec']:.4f}, Recall: {tag_metrics['rec']:.4f}, F1: {tag_metrics['f1']:.4f}")
    
    # Print results summary
    print('\nResults Summary:')
    for name, result in results.items():
        print(f"{name}: Chunk F1 = {result['chunk_f1']:.4f}, Tag F1 = {result['tag_f1']:.4f}")
    
    # Find the best model based on chunk-level F1
    best_model_name = max(results, key=lambda x: results[x]['chunk_f1'])
    print(f'\nBest model: {best_model_name} with Chunk F1 = {results[best_model_name]["chunk_f1"]:.4f} and Tag F1 = {results[best_model_name]["tag_f1"]:.4f}')
    
    # Save best model info
    with open('best_model_info.json', 'w') as f:
        json.dump({
            'model_name': best_model_name,
            'chunk_f1': results[best_model_name]['chunk_f1'],
            'tag_f1': results[best_model_name]['tag_f1']
        }, f)
    
    # Save embedding model information
    with open('embedding_model_info.json', 'w') as f:
        json.dump({
            'best_model': best_model_name,
            'glove_model_name': "glove-wiki-gigaword-300",
            'fasttext_model_name': "fasttext-wiki-news-subwords-300"
        }, f)

In [15]:
def load_and_test(test_file, model_path, model_type, embedding_model_name):
    # Load label mapping
    with open("label_mapping.json", "r") as f:
        label_data = json.load(f)
    label_to_idx = label_data["label_to_idx"]
    # Convert idx_to_label keys to integers
    idx_to_label = {int(k): v for k, v in label_data["idx_to_label"].items()}
    
    # Process the test file
    test_data = preprocess_data(test_file, "test_task_1.json")
    
    # Set device
    device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
    
    # Load embedding model
    try:
        print(f'Loading {embedding_model_name} embeddings...')
        embedding_model = api.load(embedding_model_name)
        print(f'{embedding_model_name} loaded')
    except Exception as e:
        print(f"Error loading embedding model: {e}")
        return None, None
    
    # Get the embedding dimension
    embedding_dim = embedding_dim = embedding_model.vector_size
    hidden_dim = 256
    output_dim = len(label_to_idx)
    
    # Initialize model
    if model_type.startswith('RNN'):
        model = RNNModel(embedding_dim, hidden_dim, output_dim).to(device)
    elif model_type.startswith('GRU'):
        model = GRUModel(embedding_dim, hidden_dim, output_dim).to(device)
    else:
        raise ValueError(f"Unknown model type: {model_type}")
    
    # Load model weights
    model.load_state_dict(torch.load(model_path, map_location=device))
    
    # Test the model
    chunk_metrics, tag_metrics = test_model(model, test_data, embedding_model, label_to_idx, idx_to_label, device)
    
    print("Test Results:")
    print("Chunk-level:")
    print(f"  Precision: {chunk_metrics['prec']:.4f}")
    print(f"  Recall:    {chunk_metrics['rec']:.4f}")
    print(f"  F1 Score:  {chunk_metrics['f1']:.4f}")
    print("Tag-level:")
    print(f"  Precision: {tag_metrics['prec']:.4f}")
    print(f"  Recall:    {tag_metrics['rec']:.4f}")
    print(f"  F1 Score:  {tag_metrics['f1']:.4f}")
    
    return chunk_metrics, tag_metrics

def test_with_best_model(test_file):
    # Load best model info
    with open('best_model_info.json', 'r') as f:
        best_model_info = json.load(f)
    
    # Load embedding model info
    with open('embedding_model_info.json', 'r') as f:
        embedding_info = json.load(f)
    
    best_model_name = best_model_info['model_name']
    model_path = f'models/{best_model_name}_best.pt'
    
    # Determine which embedding model to use
    if 'GloVe' in best_model_name:
        embedding_model_name = embedding_info['glove_model_name']
    else:
        embedding_model_name = embedding_info['fasttext_model_name']
    
    # Extract model type (RNN or GRU)
    model_type = best_model_name.split('_')[0]
    
    print(f"Testing with best model: {best_model_name}")
    print(f"Using embedding model: {embedding_model_name}")
    print(f"Model type: {model_type}")
    
    # Test the model
    chunk_metrics, tag_metrics = load_and_test(test_file, model_path, model_type, embedding_model_name)
    
    return chunk_metrics, tag_metrics

In [16]:
if __name__ == "__main__":
    # Create directory for plots
    os.makedirs('plots', exist_ok=True)
    
    # Run main function
    main()

Using device: cuda:1
Preprocessing data...
Loading pretrained embeddings...
Loading models...
GloVe model loaded
FastText model loaded
Creating datasets and dataloaders...

Training RNN_GloVe...


Epoch 1/30: 100%|██████████| 77/77 [00:01<00:00, 74.71it/s] 


Epoch 1/30, Train Loss: 0.3361, Val Loss: 0.2208, Val F1: 0.9199
Model saved to models/RNN_GloVe_best.pt


Epoch 2/30: 100%|██████████| 77/77 [00:00<00:00, 160.32it/s]


Epoch 2/30, Train Loss: 0.2348, Val Loss: 0.2142, Val F1: 0.9231
Model saved to models/RNN_GloVe_best.pt


Epoch 3/30: 100%|██████████| 77/77 [00:00<00:00, 164.91it/s]


Epoch 3/30, Train Loss: 0.2259, Val Loss: 0.2239, Val F1: 0.9244
Model saved to models/RNN_GloVe_best.pt


Epoch 4/30: 100%|██████████| 77/77 [00:00<00:00, 156.69it/s]


Epoch 4/30, Train Loss: 0.2112, Val Loss: 0.2002, Val F1: 0.9275
Model saved to models/RNN_GloVe_best.pt


Epoch 5/30: 100%|██████████| 77/77 [00:00<00:00, 152.71it/s]


Epoch 5/30, Train Loss: 0.2037, Val Loss: 0.1977, Val F1: 0.9278
Model saved to models/RNN_GloVe_best.pt


Epoch 6/30: 100%|██████████| 77/77 [00:00<00:00, 150.74it/s]


Epoch 6/30, Train Loss: 0.1958, Val Loss: 0.1909, Val F1: 0.9310
Model saved to models/RNN_GloVe_best.pt


Epoch 7/30: 100%|██████████| 77/77 [00:00<00:00, 151.12it/s]


Epoch 7/30, Train Loss: 0.1936, Val Loss: 0.1933, Val F1: 0.9279


Epoch 8/30: 100%|██████████| 77/77 [00:00<00:00, 153.58it/s]


Epoch 8/30, Train Loss: 0.1849, Val Loss: 0.1942, Val F1: 0.9265


Epoch 9/30: 100%|██████████| 77/77 [00:00<00:00, 160.28it/s]


Epoch 9/30, Train Loss: 0.1751, Val Loss: 0.1819, Val F1: 0.9297


Epoch 10/30: 100%|██████████| 77/77 [00:00<00:00, 152.10it/s]


Epoch 10/30, Train Loss: 0.1682, Val Loss: 0.1828, Val F1: 0.9318
Model saved to models/RNN_GloVe_best.pt


Epoch 11/30: 100%|██████████| 77/77 [00:00<00:00, 143.55it/s]


Epoch 11/30, Train Loss: 0.1652, Val Loss: 0.1800, Val F1: 0.9332
Model saved to models/RNN_GloVe_best.pt


Epoch 12/30: 100%|██████████| 77/77 [00:00<00:00, 149.48it/s]


Epoch 12/30, Train Loss: 0.1565, Val Loss: 0.1878, Val F1: 0.9326


Epoch 13/30: 100%|██████████| 77/77 [00:00<00:00, 153.92it/s]


Epoch 13/30, Train Loss: 0.1490, Val Loss: 0.1904, Val F1: 0.9310


Epoch 14/30: 100%|██████████| 77/77 [00:00<00:00, 157.35it/s]


Epoch 14/30, Train Loss: 0.1376, Val Loss: 0.1854, Val F1: 0.9341
Model saved to models/RNN_GloVe_best.pt


Epoch 15/30: 100%|██████████| 77/77 [00:00<00:00, 169.10it/s]


Epoch 15/30, Train Loss: 0.1278, Val Loss: 0.2069, Val F1: 0.9311


Epoch 16/30: 100%|██████████| 77/77 [00:00<00:00, 164.07it/s]


Epoch 16/30, Train Loss: 0.1210, Val Loss: 0.2116, Val F1: 0.9288


Epoch 17/30: 100%|██████████| 77/77 [00:00<00:00, 165.65it/s]


Epoch 17/30, Train Loss: 0.1089, Val Loss: 0.2213, Val F1: 0.9296


Epoch 18/30: 100%|██████████| 77/77 [00:00<00:00, 172.28it/s]


Epoch 18/30, Train Loss: 0.0952, Val Loss: 0.2451, Val F1: 0.9189


Epoch 19/30: 100%|██████████| 77/77 [00:00<00:00, 168.51it/s]


Epoch 19/30, Train Loss: 0.0885, Val Loss: 0.2456, Val F1: 0.9197


Epoch 20/30: 100%|██████████| 77/77 [00:00<00:00, 166.89it/s]


Epoch 20/30, Train Loss: 0.0840, Val Loss: 0.2439, Val F1: 0.9306


Epoch 21/30: 100%|██████████| 77/77 [00:00<00:00, 160.61it/s]


Epoch 21/30, Train Loss: 0.0748, Val Loss: 0.2618, Val F1: 0.9263


Epoch 22/30: 100%|██████████| 77/77 [00:00<00:00, 161.78it/s]


Epoch 22/30, Train Loss: 0.0655, Val Loss: 0.2675, Val F1: 0.9242


Epoch 23/30: 100%|██████████| 77/77 [00:00<00:00, 160.95it/s]


Epoch 23/30, Train Loss: 0.0489, Val Loss: 0.2995, Val F1: 0.9210


Epoch 24/30: 100%|██████████| 77/77 [00:00<00:00, 163.66it/s]


Epoch 24/30, Train Loss: 0.0614, Val Loss: 0.3052, Val F1: 0.9278


Epoch 25/30: 100%|██████████| 77/77 [00:00<00:00, 151.91it/s]


Epoch 25/30, Train Loss: 0.0473, Val Loss: 0.3047, Val F1: 0.9271


Epoch 26/30: 100%|██████████| 77/77 [00:00<00:00, 174.20it/s]


Epoch 26/30, Train Loss: 0.0463, Val Loss: 0.3231, Val F1: 0.9214


Epoch 27/30: 100%|██████████| 77/77 [00:00<00:00, 165.18it/s]


Epoch 27/30, Train Loss: 0.0364, Val Loss: 0.3323, Val F1: 0.9269


Epoch 28/30: 100%|██████████| 77/77 [00:00<00:00, 164.58it/s]


Epoch 28/30, Train Loss: 0.0309, Val Loss: 0.3422, Val F1: 0.9249


Epoch 29/30: 100%|██████████| 77/77 [00:00<00:00, 168.75it/s]


Epoch 29/30, Train Loss: 0.0300, Val Loss: 0.3565, Val F1: 0.9251


Epoch 30/30: 100%|██████████| 77/77 [00:00<00:00, 162.57it/s]
  token_tensor = torch.tensor([token_embeddings], dtype=torch.float).to(device)


Epoch 30/30, Train Loss: 0.0250, Val Loss: 0.3697, Val F1: 0.9250
Chunk-level evaluation:
  Precision: 0.6185
  Recall:    0.5784
  F1 Score:  0.5978

Tag-level evaluation:
  Precision: 0.9386
  Recall:    0.9386
  F1 Score:  0.9386
RNN_GloVe validation results:
  Chunk-level -> Precision: 0.6185, Recall: 0.5784, F1: 0.5978
  Tag-level   -> Precision: 0.9386, Recall: 0.9386, F1: 0.9386

Training RNN_FastText...


Epoch 1/30: 100%|██████████| 77/77 [00:00<00:00, 163.89it/s]


Epoch 1/30, Train Loss: 0.4345, Val Loss: 0.2556, Val F1: 0.9082
Model saved to models/RNN_FastText_best.pt


Epoch 2/30: 100%|██████████| 77/77 [00:00<00:00, 167.40it/s]


Epoch 2/30, Train Loss: 0.2575, Val Loss: 0.2141, Val F1: 0.9195
Model saved to models/RNN_FastText_best.pt


Epoch 3/30: 100%|██████████| 77/77 [00:00<00:00, 159.73it/s]


Epoch 3/30, Train Loss: 0.2354, Val Loss: 0.2284, Val F1: 0.9230
Model saved to models/RNN_FastText_best.pt


Epoch 4/30: 100%|██████████| 77/77 [00:00<00:00, 148.03it/s]


Epoch 4/30, Train Loss: 0.2183, Val Loss: 0.1918, Val F1: 0.9313
Model saved to models/RNN_FastText_best.pt


Epoch 5/30: 100%|██████████| 77/77 [00:00<00:00, 149.81it/s]


Epoch 5/30, Train Loss: 0.2132, Val Loss: 0.1912, Val F1: 0.9327
Model saved to models/RNN_FastText_best.pt


Epoch 6/30: 100%|██████████| 77/77 [00:00<00:00, 158.10it/s]


Epoch 6/30, Train Loss: 0.2064, Val Loss: 0.1964, Val F1: 0.9314


Epoch 7/30: 100%|██████████| 77/77 [00:00<00:00, 159.90it/s]


Epoch 7/30, Train Loss: 0.2024, Val Loss: 0.1985, Val F1: 0.9293


Epoch 8/30: 100%|██████████| 77/77 [00:00<00:00, 161.41it/s]


Epoch 8/30, Train Loss: 0.2004, Val Loss: 0.1853, Val F1: 0.9333
Model saved to models/RNN_FastText_best.pt


Epoch 9/30: 100%|██████████| 77/77 [00:00<00:00, 161.22it/s]


Epoch 9/30, Train Loss: 0.2035, Val Loss: 0.1925, Val F1: 0.9331


Epoch 10/30: 100%|██████████| 77/77 [00:00<00:00, 156.89it/s]


Epoch 10/30, Train Loss: 0.1897, Val Loss: 0.1949, Val F1: 0.9299


Epoch 11/30: 100%|██████████| 77/77 [00:00<00:00, 159.89it/s]


Epoch 11/30, Train Loss: 0.1846, Val Loss: 0.1867, Val F1: 0.9318


Epoch 12/30: 100%|██████████| 77/77 [00:00<00:00, 164.85it/s]


Epoch 12/30, Train Loss: 0.1818, Val Loss: 0.1888, Val F1: 0.9335
Model saved to models/RNN_FastText_best.pt


Epoch 13/30: 100%|██████████| 77/77 [00:00<00:00, 165.71it/s]


Epoch 13/30, Train Loss: 0.1752, Val Loss: 0.1913, Val F1: 0.9307


Epoch 14/30: 100%|██████████| 77/77 [00:00<00:00, 166.03it/s]


Epoch 14/30, Train Loss: 0.1747, Val Loss: 0.1944, Val F1: 0.9294


Epoch 15/30: 100%|██████████| 77/77 [00:00<00:00, 164.13it/s]


Epoch 15/30, Train Loss: 0.1727, Val Loss: 0.2150, Val F1: 0.9218


Epoch 16/30: 100%|██████████| 77/77 [00:00<00:00, 164.66it/s]


Epoch 16/30, Train Loss: 0.1733, Val Loss: 0.1876, Val F1: 0.9345
Model saved to models/RNN_FastText_best.pt


Epoch 17/30: 100%|██████████| 77/77 [00:00<00:00, 165.33it/s]


Epoch 17/30, Train Loss: 0.1652, Val Loss: 0.1975, Val F1: 0.9269


Epoch 18/30: 100%|██████████| 77/77 [00:00<00:00, 165.08it/s]


Epoch 18/30, Train Loss: 0.1603, Val Loss: 0.1984, Val F1: 0.9315


Epoch 19/30: 100%|██████████| 77/77 [00:00<00:00, 164.37it/s]


Epoch 19/30, Train Loss: 0.1602, Val Loss: 0.2055, Val F1: 0.9283


Epoch 20/30: 100%|██████████| 77/77 [00:00<00:00, 163.64it/s]


Epoch 20/30, Train Loss: 0.1647, Val Loss: 0.1970, Val F1: 0.9329


Epoch 21/30: 100%|██████████| 77/77 [00:00<00:00, 164.06it/s]


Epoch 21/30, Train Loss: 0.1530, Val Loss: 0.2019, Val F1: 0.9309


Epoch 22/30: 100%|██████████| 77/77 [00:00<00:00, 164.04it/s]


Epoch 22/30, Train Loss: 0.1538, Val Loss: 0.2083, Val F1: 0.9310


Epoch 23/30: 100%|██████████| 77/77 [00:00<00:00, 164.49it/s]


Epoch 23/30, Train Loss: 0.1529, Val Loss: 0.2118, Val F1: 0.9308


Epoch 24/30: 100%|██████████| 77/77 [00:00<00:00, 165.54it/s]


Epoch 24/30, Train Loss: 0.1487, Val Loss: 0.2127, Val F1: 0.9259


Epoch 25/30: 100%|██████████| 77/77 [00:00<00:00, 163.64it/s]


Epoch 25/30, Train Loss: 0.1437, Val Loss: 0.2173, Val F1: 0.9253


Epoch 26/30: 100%|██████████| 77/77 [00:00<00:00, 166.73it/s]


Epoch 26/30, Train Loss: 0.1427, Val Loss: 0.2050, Val F1: 0.9258


Epoch 27/30: 100%|██████████| 77/77 [00:00<00:00, 157.79it/s]


Epoch 27/30, Train Loss: 0.1400, Val Loss: 0.2294, Val F1: 0.9228


Epoch 28/30: 100%|██████████| 77/77 [00:00<00:00, 160.26it/s]


Epoch 28/30, Train Loss: 0.1374, Val Loss: 0.2476, Val F1: 0.9166


Epoch 29/30: 100%|██████████| 77/77 [00:00<00:00, 164.28it/s]


Epoch 29/30, Train Loss: 0.1401, Val Loss: 0.2180, Val F1: 0.9192


Epoch 30/30: 100%|██████████| 77/77 [00:00<00:00, 160.65it/s]


Epoch 30/30, Train Loss: 0.1379, Val Loss: 0.2289, Val F1: 0.9284
Chunk-level evaluation:
  Precision: 0.6122
  Recall:    0.5676
  F1 Score:  0.5891

Tag-level evaluation:
  Precision: 0.9395
  Recall:    0.9395
  F1 Score:  0.9395
RNN_FastText validation results:
  Chunk-level -> Precision: 0.6122, Recall: 0.5676, F1: 0.5891
  Tag-level   -> Precision: 0.9395, Recall: 0.9395, F1: 0.9395

Training GRU_GloVe...


Epoch 1/30: 100%|██████████| 77/77 [00:00<00:00, 154.01it/s]


Epoch 1/30, Train Loss: 0.3430, Val Loss: 0.2188, Val F1: 0.9240
Model saved to models/GRU_GloVe_best.pt


Epoch 2/30: 100%|██████████| 77/77 [00:00<00:00, 152.95it/s]


Epoch 2/30, Train Loss: 0.2255, Val Loss: 0.1908, Val F1: 0.9260
Model saved to models/GRU_GloVe_best.pt


Epoch 3/30: 100%|██████████| 77/77 [00:00<00:00, 157.24it/s]


Epoch 3/30, Train Loss: 0.2018, Val Loss: 0.1776, Val F1: 0.9311
Model saved to models/GRU_GloVe_best.pt


Epoch 4/30: 100%|██████████| 77/77 [00:00<00:00, 163.44it/s]


Epoch 4/30, Train Loss: 0.1899, Val Loss: 0.1834, Val F1: 0.9322
Model saved to models/GRU_GloVe_best.pt


Epoch 5/30: 100%|██████████| 77/77 [00:00<00:00, 161.14it/s]


Epoch 5/30, Train Loss: 0.1744, Val Loss: 0.1655, Val F1: 0.9394
Model saved to models/GRU_GloVe_best.pt


Epoch 6/30: 100%|██████████| 77/77 [00:00<00:00, 162.23it/s]


Epoch 6/30, Train Loss: 0.1583, Val Loss: 0.1633, Val F1: 0.9409
Model saved to models/GRU_GloVe_best.pt


Epoch 7/30: 100%|██████████| 77/77 [00:00<00:00, 162.73it/s]


Epoch 7/30, Train Loss: 0.1506, Val Loss: 0.1710, Val F1: 0.9390


Epoch 8/30: 100%|██████████| 77/77 [00:00<00:00, 161.50it/s]


Epoch 8/30, Train Loss: 0.1368, Val Loss: 0.1634, Val F1: 0.9420
Model saved to models/GRU_GloVe_best.pt


Epoch 9/30: 100%|██████████| 77/77 [00:00<00:00, 161.55it/s]


Epoch 9/30, Train Loss: 0.1192, Val Loss: 0.1661, Val F1: 0.9402


Epoch 10/30: 100%|██████████| 77/77 [00:00<00:00, 163.32it/s]


Epoch 10/30, Train Loss: 0.1081, Val Loss: 0.1905, Val F1: 0.9355


Epoch 11/30: 100%|██████████| 77/77 [00:00<00:00, 163.90it/s]


Epoch 11/30, Train Loss: 0.0965, Val Loss: 0.1736, Val F1: 0.9393


Epoch 12/30: 100%|██████████| 77/77 [00:00<00:00, 162.83it/s]


Epoch 12/30, Train Loss: 0.0867, Val Loss: 0.1855, Val F1: 0.9403


Epoch 13/30: 100%|██████████| 77/77 [00:00<00:00, 159.14it/s]


Epoch 13/30, Train Loss: 0.0733, Val Loss: 0.1902, Val F1: 0.9364


Epoch 14/30: 100%|██████████| 77/77 [00:00<00:00, 155.90it/s]


Epoch 14/30, Train Loss: 0.0605, Val Loss: 0.2314, Val F1: 0.9317


Epoch 15/30: 100%|██████████| 77/77 [00:00<00:00, 163.22it/s]


Epoch 15/30, Train Loss: 0.0532, Val Loss: 0.2434, Val F1: 0.9349


Epoch 16/30: 100%|██████████| 77/77 [00:00<00:00, 157.06it/s]


Epoch 16/30, Train Loss: 0.0455, Val Loss: 0.2260, Val F1: 0.9379


Epoch 17/30: 100%|██████████| 77/77 [00:00<00:00, 156.06it/s]


Epoch 17/30, Train Loss: 0.0403, Val Loss: 0.2401, Val F1: 0.9308


Epoch 18/30: 100%|██████████| 77/77 [00:00<00:00, 158.03it/s]


Epoch 18/30, Train Loss: 0.0320, Val Loss: 0.2641, Val F1: 0.9339


Epoch 19/30: 100%|██████████| 77/77 [00:00<00:00, 152.48it/s]


Epoch 19/30, Train Loss: 0.0247, Val Loss: 0.2828, Val F1: 0.9359


Epoch 20/30: 100%|██████████| 77/77 [00:00<00:00, 154.11it/s]


Epoch 20/30, Train Loss: 0.0218, Val Loss: 0.2993, Val F1: 0.9339


Epoch 21/30: 100%|██████████| 77/77 [00:00<00:00, 158.83it/s]


Epoch 21/30, Train Loss: 0.0197, Val Loss: 0.3289, Val F1: 0.9343


Epoch 22/30: 100%|██████████| 77/77 [00:00<00:00, 162.41it/s]


Epoch 22/30, Train Loss: 0.0167, Val Loss: 0.3211, Val F1: 0.9310


Epoch 23/30: 100%|██████████| 77/77 [00:00<00:00, 157.27it/s]


Epoch 23/30, Train Loss: 0.0166, Val Loss: 0.3350, Val F1: 0.9326


Epoch 24/30: 100%|██████████| 77/77 [00:00<00:00, 163.52it/s]


Epoch 24/30, Train Loss: 0.0167, Val Loss: 0.3039, Val F1: 0.9331


Epoch 25/30: 100%|██████████| 77/77 [00:00<00:00, 163.19it/s]


Epoch 25/30, Train Loss: 0.0138, Val Loss: 0.3279, Val F1: 0.9263


Epoch 26/30: 100%|██████████| 77/77 [00:00<00:00, 163.95it/s]


Epoch 26/30, Train Loss: 0.0146, Val Loss: 0.3298, Val F1: 0.9284


Epoch 27/30: 100%|██████████| 77/77 [00:00<00:00, 153.81it/s]


Epoch 27/30, Train Loss: 0.0130, Val Loss: 0.3383, Val F1: 0.9292


Epoch 28/30: 100%|██████████| 77/77 [00:00<00:00, 162.39it/s]


Epoch 28/30, Train Loss: 0.0094, Val Loss: 0.3341, Val F1: 0.9350


Epoch 29/30: 100%|██████████| 77/77 [00:00<00:00, 151.74it/s]


Epoch 29/30, Train Loss: 0.0079, Val Loss: 0.3645, Val F1: 0.9298


Epoch 30/30: 100%|██████████| 77/77 [00:00<00:00, 157.76it/s]


Epoch 30/30, Train Loss: 0.0067, Val Loss: 0.3824, Val F1: 0.9288
Chunk-level evaluation:
  Precision: 0.6277
  Recall:    0.6243
  F1 Score:  0.6260

Tag-level evaluation:
  Precision: 0.9450
  Recall:    0.9450
  F1 Score:  0.9450
GRU_GloVe validation results:
  Chunk-level -> Precision: 0.6277, Recall: 0.6243, F1: 0.6260
  Tag-level   -> Precision: 0.9450, Recall: 0.9450, F1: 0.9450

Training GRU_FastText...


Epoch 1/30: 100%|██████████| 77/77 [00:00<00:00, 153.25it/s]


Epoch 1/30, Train Loss: 0.4794, Val Loss: 0.2636, Val F1: 0.8999
Model saved to models/GRU_FastText_best.pt


Epoch 2/30: 100%|██████████| 77/77 [00:00<00:00, 161.65it/s]


Epoch 2/30, Train Loss: 0.2652, Val Loss: 0.2138, Val F1: 0.9252
Model saved to models/GRU_FastText_best.pt


Epoch 3/30: 100%|██████████| 77/77 [00:00<00:00, 161.08it/s]


Epoch 3/30, Train Loss: 0.2297, Val Loss: 0.1944, Val F1: 0.9313
Model saved to models/GRU_FastText_best.pt


Epoch 4/30: 100%|██████████| 77/77 [00:00<00:00, 162.17it/s]


Epoch 4/30, Train Loss: 0.2164, Val Loss: 0.1972, Val F1: 0.9302


Epoch 5/30: 100%|██████████| 77/77 [00:00<00:00, 158.53it/s]


Epoch 5/30, Train Loss: 0.2061, Val Loss: 0.1806, Val F1: 0.9357
Model saved to models/GRU_FastText_best.pt


Epoch 6/30: 100%|██████████| 77/77 [00:00<00:00, 159.17it/s]


Epoch 6/30, Train Loss: 0.1953, Val Loss: 0.1792, Val F1: 0.9354


Epoch 7/30: 100%|██████████| 77/77 [00:00<00:00, 160.22it/s]


Epoch 7/30, Train Loss: 0.1873, Val Loss: 0.1757, Val F1: 0.9375
Model saved to models/GRU_FastText_best.pt


Epoch 8/30: 100%|██████████| 77/77 [00:00<00:00, 149.10it/s]


Epoch 8/30, Train Loss: 0.1834, Val Loss: 0.1750, Val F1: 0.9374


Epoch 9/30: 100%|██████████| 77/77 [00:00<00:00, 148.04it/s]


Epoch 9/30, Train Loss: 0.1747, Val Loss: 0.1734, Val F1: 0.9359


Epoch 10/30: 100%|██████████| 77/77 [00:00<00:00, 162.65it/s]


Epoch 10/30, Train Loss: 0.1672, Val Loss: 0.1712, Val F1: 0.9399
Model saved to models/GRU_FastText_best.pt


Epoch 11/30: 100%|██████████| 77/77 [00:00<00:00, 162.53it/s]


Epoch 11/30, Train Loss: 0.1665, Val Loss: 0.1703, Val F1: 0.9391


Epoch 12/30: 100%|██████████| 77/77 [00:00<00:00, 154.47it/s]


Epoch 12/30, Train Loss: 0.1561, Val Loss: 0.1690, Val F1: 0.9395


Epoch 13/30: 100%|██████████| 77/77 [00:00<00:00, 161.81it/s]


Epoch 13/30, Train Loss: 0.1514, Val Loss: 0.1774, Val F1: 0.9370


Epoch 14/30: 100%|██████████| 77/77 [00:00<00:00, 163.33it/s]


Epoch 14/30, Train Loss: 0.1472, Val Loss: 0.1705, Val F1: 0.9382


Epoch 15/30: 100%|██████████| 77/77 [00:00<00:00, 162.00it/s]


Epoch 15/30, Train Loss: 0.1407, Val Loss: 0.1829, Val F1: 0.9359


Epoch 16/30: 100%|██████████| 77/77 [00:00<00:00, 163.25it/s]


Epoch 16/30, Train Loss: 0.1378, Val Loss: 0.1745, Val F1: 0.9367


Epoch 17/30: 100%|██████████| 77/77 [00:00<00:00, 162.39it/s]


Epoch 17/30, Train Loss: 0.1331, Val Loss: 0.1744, Val F1: 0.9367


Epoch 18/30: 100%|██████████| 77/77 [00:00<00:00, 160.71it/s]


Epoch 18/30, Train Loss: 0.1287, Val Loss: 0.1762, Val F1: 0.9392


Epoch 19/30: 100%|██████████| 77/77 [00:00<00:00, 161.61it/s]


Epoch 19/30, Train Loss: 0.1214, Val Loss: 0.1725, Val F1: 0.9374


Epoch 20/30: 100%|██████████| 77/77 [00:00<00:00, 162.04it/s]


Epoch 20/30, Train Loss: 0.1167, Val Loss: 0.1729, Val F1: 0.9390


Epoch 21/30: 100%|██████████| 77/77 [00:00<00:00, 161.04it/s]


Epoch 21/30, Train Loss: 0.1129, Val Loss: 0.1872, Val F1: 0.9359


Epoch 22/30: 100%|██████████| 77/77 [00:00<00:00, 162.90it/s]


Epoch 22/30, Train Loss: 0.1069, Val Loss: 0.1918, Val F1: 0.9379


Epoch 23/30: 100%|██████████| 77/77 [00:00<00:00, 162.63it/s]


Epoch 23/30, Train Loss: 0.1063, Val Loss: 0.1910, Val F1: 0.9358


Epoch 24/30: 100%|██████████| 77/77 [00:00<00:00, 163.89it/s]


Epoch 24/30, Train Loss: 0.1030, Val Loss: 0.2004, Val F1: 0.9283


Epoch 25/30: 100%|██████████| 77/77 [00:00<00:00, 143.54it/s]


Epoch 25/30, Train Loss: 0.0951, Val Loss: 0.2053, Val F1: 0.9337


Epoch 26/30: 100%|██████████| 77/77 [00:00<00:00, 142.79it/s]


Epoch 26/30, Train Loss: 0.0847, Val Loss: 0.2026, Val F1: 0.9338


Epoch 27/30: 100%|██████████| 77/77 [00:00<00:00, 131.07it/s]


Epoch 27/30, Train Loss: 0.0805, Val Loss: 0.2129, Val F1: 0.9359


Epoch 28/30: 100%|██████████| 77/77 [00:00<00:00, 160.33it/s]


Epoch 28/30, Train Loss: 0.0723, Val Loss: 0.2271, Val F1: 0.9352


Epoch 29/30: 100%|██████████| 77/77 [00:00<00:00, 161.46it/s]


Epoch 29/30, Train Loss: 0.0694, Val Loss: 0.2287, Val F1: 0.9274


Epoch 30/30: 100%|██████████| 77/77 [00:00<00:00, 157.93it/s]


Epoch 30/30, Train Loss: 0.0692, Val Loss: 0.2372, Val F1: 0.9312
Chunk-level evaluation:
  Precision: 0.6606
  Recall:    0.5838
  F1 Score:  0.6198

Tag-level evaluation:
  Precision: 0.9448
  Recall:    0.9448
  F1 Score:  0.9448
GRU_FastText validation results:
  Chunk-level -> Precision: 0.6606, Recall: 0.5838, F1: 0.6198
  Tag-level   -> Precision: 0.9448, Recall: 0.9448, F1: 0.9448

Results Summary:
RNN_GloVe: Chunk F1 = 0.5978, Tag F1 = 0.9386
RNN_FastText: Chunk F1 = 0.5891, Tag F1 = 0.9395
GRU_GloVe: Chunk F1 = 0.6260, Tag F1 = 0.9450
GRU_FastText: Chunk F1 = 0.6198, Tag F1 = 0.9448

Best model: GRU_GloVe with Chunk F1 = 0.6260 and Tag F1 = 0.9450


In [17]:
# # Example usage of test function:
chunk_metrics, tag_metrics = test_with_best_model('val.json')

Testing with best model: GRU_GloVe
Using embedding model: glove-wiki-gigaword-300
Model type: GRU
Loading glove-wiki-gigaword-300 embeddings...


glove-wiki-gigaword-300 loaded
Chunk-level evaluation:
  Precision: 0.6277
  Recall:    0.6243
  F1 Score:  0.6260

Tag-level evaluation:
  Precision: 0.9450
  Recall:    0.9450
  F1 Score:  0.9450
Test Results:
Chunk-level:
  Precision: 0.6277
  Recall:    0.6243
  F1 Score:  0.6260
Tag-level:
  Precision: 0.9450
  Recall:    0.9450
  F1 Score:  0.9450
