In [1]:
import gensim.downloader
import torch
import pandas as pd
import numpy as np
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import random
import torch.optim as optim
from seqeval.metrics import f1_score

In [2]:
# Download the "word2vec-google-news-300" embeddings
glove_vectors = gensim.downloader.load('word2vec-google-news-300')

## Question 1.1

In [3]:
words = ["student", "Apple", "apple"]
for word in words:
    similar_word, similarity_score = glove_vectors.most_similar(word, topn=1)[0]
    print(f"Word: {word}")
    print(f"Most similar word: {similar_word}")
    print(f"Cosine similarity: {similarity_score}")
    print("--------")

Word: student
Most similar word: students
Cosine similarity: 0.7294867038726807
--------
Word: Apple
Most similar word: Apple_AAPL
Cosine similarity: 0.7456986308097839
--------
Word: apple
Most similar word: apples
Cosine similarity: 0.720359742641449
--------


## Question 1.2(a)

In [4]:
def read_conll_file(file_path):
    sentences = []  # This will store lists of word-label pairs, one list for each sentence
    sentence = []   # Temporary list to store word-label pairs for the current sentence

    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            line = line.strip()  # Remove any leading/trailing whitespace
            if line:  # If the line isn't empty (i.e., we're within a sentence)
                word, _, _, label = line.split()  # Split the line to get the word and its associated label
                sentence.append((word, label))
            else:  # An empty line means the end of the current sentence
                sentences.append(sentence)
                sentence = []  # Reset the temporary list for the next sentence

    return sentences

In [5]:
train_data = read_conll_file("eng.train")
dev_data = read_conll_file("eng.testa")
test_data = read_conll_file("eng.testb")

print(f"Number of sentences in training data: {len(train_data)}")
print(f"Number of sentences in development data: {len(dev_data)}")
print(f"Number of sentences in test data: {len(test_data)}")

# Extract all unique labels from training data
labels = set()
for sentence in train_data:
    for _, label in sentence:
        labels.add(label)

print(f"All possible labels: {labels}")

Number of sentences in training data: 14986
Number of sentences in development data: 3465
Number of sentences in test data: 3683
All possible labels: {'I-ORG', 'B-LOC', 'I-PER', 'I-MISC', 'B-MISC', 'O', 'B-ORG', 'I-LOC'}


## Question 1.2(b)

In [6]:
# new part 1.2
import codecs

def load_sentences(path):
    """
    Load sentences. A line must contain at least a word and its tag.
    Sentences are separated by empty lines.
    """
    sentences = []
    sentence = []
    for line in codecs.open(path, 'r', 'utf8'):
        line = line.rstrip()
        if not line:
            if len(sentence) > 0:
                sentences.append(sentence)
                sentence = []
        else:
            word = line.split()
            sentence.append(word)

    return sentences
train_sentences = load_sentences('./eng.train')

def list_named_entities(sentence):
    named_entities = []
    b_tag = False
    for i in sentence:
        if i[3].startswith('B'):
            named_entities.append(i[0])
            b_tag = True

        elif i[3].startswith('I'):
            if b_tag:
                last_entity = named_entities[-1]
                named_entities[-1] = last_entity + ' ' + i[0]
                b_tag = False
            else:
                named_entities.append(i[0])
            
    return named_entities


for index,i in enumerate(train_sentences):
    numNamedEntities = 0
    for j in i:
      
        if j[3].startswith('B'):
            numNamedEntities+=1
    if numNamedEntities>=2:
        print(f'Sentence Index with at least 2 named entities with more than one word: {index}')
        print(f'Sentence: {i}')
        raw_sentence = [i[0] for i in i]
        print(f'Raw Sentence: {raw_sentence}')
        print(f'Named Entities: {list_named_entities(i)}')
        break

Sentence Index with at least 2 named entities with more than one word: 5969
Sentence: [['Swiss', 'NNP', 'I-NP', 'I-MISC'], ['Grand', 'NNP', 'I-NP', 'B-MISC'], ['Prix', 'NNP', 'I-NP', 'I-MISC'], ['World', 'NNP', 'I-NP', 'B-MISC'], ['Cup', 'NNP', 'I-NP', 'I-MISC'], ['cycling', 'NN', 'I-NP', 'O'], ['race', 'NN', 'I-NP', 'O'], ['on', 'IN', 'I-PP', 'O'], ['Sunday', 'NNP', 'I-NP', 'O'], [':', ':', 'O', 'O']]
Raw Sentence: ['Swiss', 'Grand', 'Prix', 'World', 'Cup', 'cycling', 'race', 'on', 'Sunday', ':']
Named Entities: ['Swiss', 'Grand Prix', 'World Cup']


## Question 1.3

### Data processing

In [7]:
def tokenize(file_name):
    # Initialize empty lists to store sentences and NER tags
    sentences = []
    tags = []

    # Read the content of the CoNLL2003 file
    with open(file_name, 'r') as file:
        current_sentence = []  # Initialize an empty list for the current sentence
        current_tags = []  # Initialize an empty list for the current NER tags
        for line in file:
            line = line.strip()
            if not line:  # Empty line indicates the end of a sentence
                if current_sentence:
                    sentences.append(current_sentence)
                    tags.append(current_tags)
                current_sentence = []  # Reset the current sentence
                current_tags = []  # Reset the current NER tags
            else:
                parts = line.split()
                current_sentence.append(parts[0])
                current_tags.append(parts[-1])

    # Add the last sentence if there's no empty line after it
    if current_sentence:
        sentences.append(current_sentence)
        tags.append(current_tags)

    # Print the result
    # for sentence, tag in zip(sentences, tags):
    #     print(sentence)
    #     print(tag)

    #print(tags)
    
    return sentences, tags

def extract_labels(filename):
    with open(filename, 'r', encoding='utf-8') as file:
        data = file.read().strip()
    
    labels = set()
    sentences = data.split('\n\n')
    for sentence in sentences:
        lines = sentence.split('\n')
        for line in lines:
            parts = line.split()
            if len(parts) > 3:
                label = parts[-1]
                labels.add(label)
    
    return labels

In [8]:
pos_train_labels = list(sorted(extract_labels("eng.train")))

In [9]:
pos_train_labels

['B-LOC', 'B-MISC', 'B-ORG', 'I-LOC', 'I-MISC', 'I-ORG', 'I-PER', 'O']

In [10]:
train_sentences, train_tags = tokenize("eng.train")
dev_sentences, dev_tags = tokenize("eng.testa")
test_sentences, test_tags = tokenize("eng.testb")

In [11]:
df_train = pd.DataFrame({'text': train_sentences, 'labels': train_tags})
df_dev = pd.DataFrame({'text': dev_sentences, 'labels': dev_tags})
df_test = pd.DataFrame({'text': test_sentences, 'labels': test_tags})

In [12]:
embedding_matrix = glove_vectors.vectors
zero_array = np.zeros((300,))
embedding_matrix = np.vstack((embedding_matrix, zero_array)) # last element a zero array for OOV words

In [13]:
def tokens_to_indices(tokens, model):
    indices = []
    for token in tokens:
        try:
            indices.append(model.get_index(token))
        except:
            indices.append(len(embedding_matrix)-1)  # Handle out-of-vocabulary words
    return indices

In [14]:
def labels_to_indices(tokens):
    indices = []
    for token in tokens:
        indices.append(pos_train_labels.index(token))
    return indices

In [15]:
df_train['word_indices'] = df_train['text'].apply(lambda x: tokens_to_indices(x, glove_vectors))
df_dev['word_indices'] = df_dev['text'].apply(lambda x: tokens_to_indices(x, glove_vectors))
df_test['word_indices'] = df_test['text'].apply(lambda x: tokens_to_indices(x, glove_vectors))

df_train['label_indices'] = df_train['labels'].apply(lambda x: labels_to_indices(x))
df_dev['label_indices'] = df_dev['labels'].apply(lambda x: labels_to_indices(x))
df_test['label_indices'] = df_test['labels'].apply(lambda x: labels_to_indices(x))

## Final dataframe with indices

In [16]:
df_train

Unnamed: 0,text,labels,word_indices,label_indices
0,"[EU, rejects, German, call, to, boycott, Briti...","[I-ORG, O, I-MISC, O, O, O, I-MISC, O, O]","[1611, 11500, 1760, 315, 3000000, 8059, 882, 1...","[5, 7, 4, 7, 7, 7, 4, 7, 7]"
1,"[Peter, Blackburn]","[I-PER, I-PER]","[1918, 9039]","[6, 6]"
2,"[BRUSSELS, 1996-08-22]","[I-LOC, O]","[24412, 3000000]","[3, 7]"
3,"[The, European, Commission, said, on, Thursday...","[O, I-ORG, I-ORG, O, O, O, O, O, O, I-MISC, O,...","[7, 802, 1380, 9, 5, 224, 15, 10913, 8, 1760, ...","[7, 5, 5, 7, 7, 7, 7, 7, 7, 4, 7, 7, 7, 7, 7, ..."
4,"[Germany, 's, representative, to, the, Europea...","[I-LOC, O, O, O, O, I-ORG, I-ORG, O, O, O, I-P...","[1420, 3000000, 3071, 3000000, 11, 802, 1745, ...","[3, 7, 7, 7, 7, 5, 5, 7, 7, 7, 6, 6, 7, 7, 7, ..."
...,...,...,...,...
14982,"[Division, two]","[O, O]","[1747, 54]","[7, 7]"
14983,"[Plymouth, 2, Preston, 1]","[I-ORG, O, I-ORG, O]","[9487, 200, 8838, 165]","[5, 7, 5, 7]"
14984,"[Division, three]","[O, O]","[1747, 80]","[7, 7]"
14985,"[Swansea, 1, Lincoln, 2]","[I-ORG, O, I-ORG, O]","[15741, 165, 3633, 200]","[5, 7, 5, 7]"


In [17]:
df_test

Unnamed: 0,text,labels,word_indices,label_indices
0,"[SOCCER, -, JAPAN, GET, LUCKY, WIN, ,, CHINA, ...","[O, O, I-LOC, O, O, O, O, I-PER, O, O, O, O]","[62630, 3000000, 75008, 47508, 156520, 31952, ...","[7, 7, 3, 7, 7, 7, 7, 6, 7, 7, 7, 7]"
1,"[Nadim, Ladki]","[I-PER, I-PER]","[166344, 772342]","[6, 6]"
2,"[AL-AIN, ,, United, Arab, Emirates, 1996-12-06]","[I-LOC, O, I-LOC, I-LOC, I-LOC, O]","[3000000, 3000000, 1217, 2658, 11668, 3000000]","[3, 7, 3, 3, 3, 7]"
3,"[Japan, began, the, defence, of, their, Asian,...","[I-LOC, O, O, O, O, O, I-MISC, I-MISC, O, O, O...","[922, 548, 11, 3000000, 3000000, 30, 2125, 209...","[3, 7, 7, 7, 7, 7, 4, 4, 7, 7, 7, 7, 7, 7, 7, ..."
4,"[But, China, saw, their, luck, desert, them, i...","[O, I-LOC, O, O, O, O, O, O, O, O, O, O, O, O,...","[84, 367, 653, 30, 4446, 6796, 82, 1, 11, 110,...","[7, 3, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, ..."
...,...,...,...,...
3679,"["", It, was, the, joy, that, we, all, had, ove...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[3000000, 51, 10, 11, 5942, 3, 38, 52, 35, 63,...","[7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, ..."
3680,"[Charlton, managed, Ireland, for, 93, matches,...","[I-PER, O, I-LOC, O, O, O, O, O, O, O, O, O, O...","[13572, 1464, 2620, 2, 3000000, 2004, 3000000,...","[6, 7, 3, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, ..."
3681,"[He, guided, Ireland, to, two, successive, Wor...","[O, O, I-LOC, O, O, O, I-MISC, I-MISC, O, O, O...","[57, 7064, 2620, 3000000, 54, 8339, 796, 2094,...","[7, 7, 3, 7, 7, 7, 4, 4, 7, 7, 7, 7, 7, 7, 4, ..."
3682,"[The, lanky, former, Leeds, United, defender, ...","[O, O, O, I-ORG, I-ORG, O, O, O, O, O, I-LOC, ...","[7, 33489, 249, 7178, 1217, 4206, 92, 13, 109,...","[7, 7, 7, 5, 5, 7, 7, 7, 7, 7, 3, 7, 7, 7, 7, ..."


In [18]:
df_dev

Unnamed: 0,text,labels,word_indices,label_indices
0,"[CRICKET, -, LEICESTERSHIRE, TAKE, OVER, AT, T...","[O, O, I-ORG, O, O, O, O, O, O, O, O]","[93620, 3000000, 1552803, 71662, 36028, 11030,...","[7, 7, 5, 7, 7, 7, 7, 7, 7, 7, 7]"
1,"[LONDON, 1996-08-30]","[I-LOC, O]","[4949, 3000000]","[3, 7]"
2,"[West, Indian, all-rounder, Phil, Simmons, too...","[I-MISC, I-MISC, O, I-PER, I-PER, O, O, O, O, ...","[611, 1106, 3000000, 4811, 8262, 263, 134, 2, ...","[4, 4, 7, 6, 6, 7, 7, 7, 7, 7, 7, 7, 5, 7, 5, ..."
3,"[Their, stay, on, top, ,, though, ,, may, be, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, I-ORG,...","[1697, 820, 5, 213, 3000000, 459, 3000000, 137...","[7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 5, 7, ..."
4,"[After, bowling, Somerset, out, for, 83, on, t...","[O, O, I-ORG, O, O, O, O, O, O, O, O, I-LOC, I...","[361, 6842, 11008, 49, 2, 3000000, 5, 11, 798,...","[7, 7, 5, 7, 7, 7, 7, 7, 7, 7, 7, 3, 3, 7, 5, ..."
...,...,...,...,...
3461,"[Brokers, said, blue, chips, like, IDLC, ,, Ba...","[O, O, O, O, O, I-ORG, O, I-ORG, I-ORG, O, I-O...","[29281, 9, 2836, 5695, 87, 1325185, 3000000, 5...","[7, 7, 7, 7, 7, 5, 7, 5, 5, 7, 5, 5, 7, 5, 5, ..."
3462,"[They, said, there, was, still, demand, for, b...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[128, 9, 72, 10, 151, 687, 2, 2836, 5695, 1, 2...","[7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, ..."
3463,"[The, DSE, all, share, price, index, closed, 2...","[O, I-ORG, O, O, O, O, O, O, O, O, O, O, O, O,...","[7, 78228, 52, 306, 422, 1542, 866, 3000000, 1...","[7, 5, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, ..."
3464,"[--, Dhaka, Newsroom, 880-2-506363]","[O, I-ORG, I-ORG, O]","[3000000, 15087, 53872, 3000000]","[7, 5, 5, 7]"


## Create torch datasets and dataloader from DF

In [19]:
import torch
from torch.utils.data import Dataset

class MyCustomDataset(Dataset):
    def __init__(self, data, targets):
        self.data = data
        self.targets = targets

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.targets[idx]

In [20]:
def collate_fn(batch):
    data, targets = zip(*batch)
    
    # Sort the batch by sequence length (optional, but can improve efficiency)
    sorted_indices = sorted(range(len(data)), key=lambda i: len(data[i]), reverse=True)
    data = [data[i] for i in sorted_indices]
    targets = [torch.tensor(targets[i]) for i in sorted_indices]

    # Create a list of sequences and their corresponding lengths
    sequences = [torch.tensor(seq) for seq in data]
    lengths = [len(seq) for seq in sequences]

    # Pad the sequences to the length of the longest sequence in the batch
    padded_sequences = pad_sequence(sequences, batch_first=True)
    padded_targets = pad_sequence(targets, batch_first=True, padding_value=7)

    # Create packed sequence for RNNs (optional, if you're using an RNN)
    # packed_sequences = pack_padded_sequence(padded_sequences, lengths, batch_first=True, enforce_sorted=False)

    return padded_sequences, padded_targets, torch.tensor(lengths)

In [21]:
train_dataset = MyCustomDataset(df_train['word_indices'].to_numpy(), df_train['label_indices'].to_numpy())
test_dataset = MyCustomDataset(df_test['word_indices'].to_numpy(), df_test['label_indices'].to_numpy())
dev_dataset = MyCustomDataset(df_dev['word_indices'].to_numpy(), df_dev['label_indices'].to_numpy())

In [22]:
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)
development_dataloader = DataLoader(dev_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)

## Declare LSTM Model

In [23]:
class LSTMTextClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, num_layers, bidirectional, device):
        super(LSTMTextClassifier, self).__init__()
        self.output_dim = output_dim
        self.device = device
        # Embedding layer with pretrained word vectors
        self.embedding = nn.Embedding.from_pretrained(torch.tensor(embedding_matrix).to(device))
        self.embedding.weight.requires_grad = False # freeze the embeddings
        
        # LSTM layer
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=num_layers, bidirectional=bidirectional, batch_first=True)
        
        # # Softmax Layer
        self.softmax_layer = nn.Linear(hidden_dim, self.output_dim)
        
    def forward(self, text, text_lengths):
        
        # Embed the input
        embedded = self.embedding(text.to(torch.int64))
        
        #Pack the embedded sequences to handle variable-length sequences
        packed_embedded = pack_padded_sequence(embedded, text_lengths.cpu().to(torch.int64), batch_first=True, enforce_sorted=False)

        # Pass through the LSTM layer
        packed_output, (hidden, cell) = self.lstm(packed_embedded)

        # Unpack the packed sequences
        output, _ = pad_packed_sequence(packed_output, batch_first=True)
        # Pass through the softmax layer
        output = self.softmax_layer(output)
        return output

In [24]:
embedding_matrix = torch.tensor(embedding_matrix, dtype=torch.float)
hidden_dim = 256
output_dim = 8
vocab_size = len(glove_vectors.index_to_key)
embedding_dim = 300
num_layer = 1
device = torch.device('cpu')

## Train Function

In [25]:
def train(model, train_dataloader, valid_dataloader, optimizer, loss_fn, device, num_epochs=50):
    model.to(device)
    dev_f1_per_epoch = []
    for epoch in range(num_epochs):
        model.train()
        # ======== training phase ==========
        train_loss = 0.0
        for batch in train_dataloader:
            text, labels, text_lengths = batch
            text, labels, text_lengths  = torch.tensor(text).to(device), labels.to(device), text_lengths

            optimizer.zero_grad() # clear gradients

            output = model(text, text_lengths)
            
            output = output.view(-1, 8)
            labels = labels.view(-1)

            loss = loss_fn(output, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
        
        # Calculate average training loss for the epoch
        avg_train_loss = train_loss / len(train_dataloader)
        # ====================================


        # ========== validation phase =========
        model.eval()
        valid_loss = 0.0
        correct = 0
        total = 0
        f1 = 0

        predictions = []
        label_list = []

        with torch.no_grad():
            for batch in valid_dataloader:
                text, labels, text_lengths = batch
                text, labels, text_lengths  = text.to(device), labels.to(device), text_lengths

                output = model(text, text_lengths)
                bs, sl, inp = output.size()
                output = output.view(-1, 8)
                labels = labels.view(-1)
                loss = loss_fn(output, labels)
                output = output.view(bs, sl, inp)
                labels = labels.view(bs, sl)
                valid_loss += loss.item()

                _, predicted = torch.max(output, 2)
                for i in range(predicted.size(0)):
                    pred = []
                    lbl = []
                    for j in predicted[i]:
                        pred.append(pos_train_labels[j])
                    for k in labels[i]:
                        lbl.append(pos_train_labels[k])
                    predictions.append(pred)
                    label_list.append(lbl)

                # print("output:", output.shape)
                # print("predicted:", predicted.shape)
                # print("labels:", labels.shape)
                # f1 = f1 + f1_score(labels, predicted)
                # total += labels.size(0)
                # correct += (predicted == labels).sum().item()


        # Calculate average validation loss and accuracy for the epoch
        avg_valid_loss = valid_loss / len(valid_dataloader)
        # print("label list:", label_list)
        # print("predictions:",predictions)
        f1 = f1_score(label_list, predictions)
        dev_f1_per_epoch.append(f1)
        # =======================================
        print(f'Epoch {epoch + 1}/{num_epochs}, Train Loss: {avg_train_loss:.4f}, Validation Loss: {avg_valid_loss:.4f}, F1: {f1:.2f}')

    print('Training complete.')

    return dev_f1_per_epoch


## Test Function

In [26]:
def test(model, test_dataloader, device, loss_fn):
    model.eval()
    valid_loss = 0.0
    correct = 0
    total = 0
    predictions = []
    label_list = []
    with torch.no_grad():
            for batch in test_dataloader:
                text, labels, text_lengths = batch
                text, labels, text_lengths  = text.to(device), labels.to(device), text_lengths

                output = model(text, text_lengths)
                bs, sl, inp = output.size()
                output = output.view(-1, 8)
                labels = labels.view(-1)
                loss = loss_fn(output, labels)
                output = output.view(bs, sl, inp)
                labels = labels.view(bs, sl)
                valid_loss += loss.item()

                _, predicted = torch.max(output, 2)
                for i in range(predicted.size(0)):
                    pred = []
                    lbl = []
                    for j in predicted[i]:
                        pred.append(pos_train_labels[j])
                    for k in labels[i]:
                        lbl.append(pos_train_labels[k])
                    predictions.append(pred)
                    label_list.append(lbl)

                # print("output:", output.shape)
                # print("predicted:", predicted.shape)
                # print("labels:", labels.shape)
                # f1 = f1 + f1_score(labels, predicted)
                # total += labels.size(0)
                # correct += (predicted == labels).sum().item()


    # Calculate average validation loss and accuracy for the epoch
    avg_valid_loss = valid_loss / len(test_dataloader)
    # print("label list:", label_list)
    # print("predictions:",predictions)
    f1 = f1_score(label_list, predictions)

    return avg_valid_loss, f1

In [27]:
seed = 0
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True

## Training Phase

In [28]:
lstm_classifier = LSTMTextClassifier(vocab_size, embedding_dim, hidden_dim, output_dim,num_layer,False, device).to(device)
lr = 0.001
optimizer = optim.Adam(lstm_classifier.parameters(), lr=lr)
loss_fn = nn.CrossEntropyLoss()

  self.embedding = nn.Embedding.from_pretrained(torch.tensor(embedding_matrix).to(device))


In [29]:
import time
start_time = time.time()
dev_acc_per_epoch = train(lstm_classifier, train_dataloader, development_dataloader, optimizer, loss_fn, device, num_epochs=15)
end_time = time.time()
elapsed_time = end_time - start_time
print(f"Elapsed time: {elapsed_time} seconds")

  text, labels, text_lengths  = torch.tensor(text).to(device), labels.to(device), text_lengths


Epoch 1/15, Train Loss: 1.2068, Validation Loss: 0.8987, F1: 0.71
Epoch 2/15, Train Loss: 0.7339, Validation Loss: 0.5707, F1: 0.76
Epoch 3/15, Train Loss: 0.4661, Validation Loss: 0.3655, F1: 0.78
Epoch 4/15, Train Loss: 0.3035, Validation Loss: 0.2450, F1: 0.79
Epoch 5/15, Train Loss: 0.2074, Validation Loss: 0.1743, F1: 0.80
Epoch 6/15, Train Loss: 0.1489, Validation Loss: 0.1313, F1: 0.81
Epoch 7/15, Train Loss: 0.1117, Validation Loss: 0.1038, F1: 0.81
Epoch 8/15, Train Loss: 0.0866, Validation Loss: 0.0856, F1: 0.82
Epoch 9/15, Train Loss: 0.0689, Validation Loss: 0.0734, F1: 0.82
Epoch 10/15, Train Loss: 0.0558, Validation Loss: 0.0654, F1: 0.82
Epoch 11/15, Train Loss: 0.0457, Validation Loss: 0.0601, F1: 0.82
Epoch 12/15, Train Loss: 0.0378, Validation Loss: 0.0574, F1: 0.81
Epoch 13/15, Train Loss: 0.0316, Validation Loss: 0.0554, F1: 0.81
Epoch 14/15, Train Loss: 0.0267, Validation Loss: 0.0536, F1: 0.82
Epoch 15/15, Train Loss: 0.0226, Validation Loss: 0.0538, F1: 0.81
Trai

## Test Phase

In [30]:
avg_valid_loss, f1 = test(lstm_classifier, test_dataloader, device, loss_fn)

print("Avg test loss:", avg_valid_loss)
print("F1 Score:", f1)

Avg test loss: 0.086020651293501
F1 Score: 0.7582141309059385
