# Assignment 4, task 1

In this task, we will have a final crack at the NER problem, using recurrent neural networks, or Gated Recurrent Units (GRUs) to be more exact.

We want to consider both the context of the word (the surrounding words) and the contents of the word (the letters and other symbols that make up the actual word). Therefore we are using two bi-directional GRUs, one world-level GRU for the words in the sentence, and one character-level GRU for the letters and other symbols in a word. 

We will process one sentence at a time. Each hidden state vector in the word-level GRU represents that word in relation to the other words in the sentence, whereas the final state vector(s) in the character-level RNN represent morphological and typographical information about the word. We will concatenate these vectors to obtain a single information-rich representation of the word.

## Libraries + Imports

In [1]:
# First run this cell
import csv
import os 
import urllib.request
import zipfile
from tqdm import tqdm
import string
import codecs
import torch
import torch.optim as optim
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils import clip_grad_norm_
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 

## Data

### Embeddings - Pretrained GLOVE Embeddings

In [2]:
embeddings_path = './Embeddings'
# remove the embeddings data files if they already exist
# if os.path.exists(embeddings_path):
#     print(f"clean up - previous pre-trained embeddings files")
#     embedding_files = [f for f in os.listdir(embeddings_path) if f.endswith(".txt")]
#     for file in embedding_files:
#         file_path = os.path.join(embeddings_path, file)
#         os.remove(file_path)
#         print(f"successfully removed {file}")
#     os.rmdir(embeddings_path)
#     print(f"successfully removed {embeddings_path}")
#     print()


def download_progress(block_num, block_size, total_size):
    if not hasattr(download_progress, "pbar"):
        download_progress.pbar = tqdm(total=total_size, unit="B", unit_scale=True)
    download_progress.pbar.update(block_size)

if not os.path.exists(embeddings_path):
    print(f"create directory to store pre-trained glove embeddings")
    os.makedirs(embeddings_path)
    print(f"download pre-trained Glove Embeddings")
    urllib.request.urlretrieve(
        "http://nlp.stanford.edu/data/glove.6B.zip", "./Embeddings/glove.6B.zip",
        download_progress
    )
    print("unpack embeddings")
    with zipfile.ZipFile("./Embeddings/glove.6B.zip", "r") as zip_ref:
        zip_ref.extractall("./Embeddings/")
    os.remove("./Embeddings/glove.6B.zip")
    print("embeddings download complete")

### Data File Paths

In [3]:
train_data_path = './Datasets/model_data/train_passages.csv'
valid_data_path = './Datasets/model_data/validation_passages.csv'
test_data_path = './Datasets/model_data/test_passages.csv'
embeddings_path = './Embeddings/glove.6B.50d.txt'

In [4]:
# Run this cell to init mappings from characters to IDs and back again,
# from words to IDs and back again, and from labels to IDs and back again

UNKNOWN = '<unk>'  # Unknown char or unknown word
CHARS = [UNKNOWN, '’', '—'] + list(string.punctuation) + list(string.ascii_letters) + list(string.digits)
char_to_id = {c:i for i,c in enumerate(CHARS)}
PADDING_WORD = '<pad>'

# create mappings for decades 
id_to_decade = [i for i in range(1700, 1900, 10)]
decade_to_id = {decade: i for i, decade in enumerate(id_to_decade)}

print(f"id_to_decade: {id_to_decade}")
print(f"decade_to_id: {decade_to_id}")

# id_to_label = ['noname', 'name']

# def label_to_id(label):
#     return 0 if label == 'O' else 1

id_to_decade: [1700, 1710, 1720, 1730, 1740, 1750, 1760, 1770, 1780, 1790, 1800, 1810, 1820, 1830, 1840, 1850, 1860, 1870, 1880, 1890]
decade_to_id: {1700: 0, 1710: 1, 1720: 2, 1730: 3, 1740: 4, 1750: 5, 1760: 6, 1770: 7, 1780: 8, 1790: 9, 1800: 10, 1810: 11, 1820: 12, 1830: 13, 1840: 14, 1850: 15, 1860: 16, 1870: 17, 1880: 18, 1890: 19}


We want to have a vector representation of the syntactic and semantic properties of words, and in order to avoid having to train these from scratch, we are going to re-use pre-trained Glove vectors.

In [5]:
def load_glove_embeddings(embedding_file,
                          padding_word=PADDING_WORD, 
                          unknown_word=UNKNOWN):
    """
    Reads Glove embeddings from a file.

    Returns vector dimensionality, the word_to_id mapping (as a dict),
    and the embeddings (as a list of lists).
    """
    word_to_id = {}  # Dictionary to store word-to-ID mapping
    word_to_id[padding_word] = 0
    word_to_id[unknown_word] = 1
    embeddings = []
    with open(embedding_file, encoding='utf8') as f:
        for line in f:
            data = line.split()
            word = data[0]
            vec = [float(x) for x in data[1:]]
            embeddings.append(vec)
            word_to_id[word] = len(word_to_id)
    D = len(embeddings[0])

    embeddings.insert(word_to_id[padding_word], [0]*D)  # <PAD> has an embedding of just zeros
    embeddings.insert(word_to_id[unknown_word], [-1]*D)      # <UNK> has an embedding of just minus-ones

    return D, word_to_id, embeddings


We can now create our dataset. Each datapoint will consist of a sentence and its associated labels for each word in the sentence. The label is either 1 (a name) or 0 (not a name). 

In [6]:
class HistoricalTextDataset(Dataset):
    """
    A class loading historical text passages from CSV for input to PyTorch DataLoader
    CSV format: text, decade, book_title, passage_id, decade_id, book_id
    """
    def __init__(self, filename, word_to_id, max_length=300):
        self.word_to_id = word_to_id
        self.max_length = max_length

        df = pd.read_csv(filename)

        self.texts = []
        self.decade_ids = []

        for _, row in df.iterrows():
            text = row["text"]
            # take the first max number of words from the passage
            words = text.lower().split()[:max_length]
            decade_id = row["decade_id"]

            self.texts.append(words)
            self.decade_ids.append(decade_id)

        print(f"Loaded {len(self.texts)} passages from {filename}")

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        return self.texts[idx], self.decade_ids[idx]

In [7]:
# Let's check out some of these data structures
# dim, word_to_id, embeddings = load_glove_embeddings('/datasets/dd2417/glove.6B.50d.txt')
dim, word_to_id, embeddings = load_glove_embeddings(f"{embeddings_path}")
print("The embedding for the word 'good' looks like this:")
print(embeddings[word_to_id['good']])
print()

# Read the data we are going to use for testing the model
test_set = HistoricalTextDataset(f'{test_data_path}', word_to_id)
print("There are", len(test_set), "data points in the testset")
dp = 100
passage, decade_id = test_set[dp]
print("Data point", dp, "passage:", passage[:20], "...") 
print("It has the decade_id:", decade_id, "which corresponds to decade:", id_to_decade[decade_id],
)


The embedding for the word 'good' looks like this:
[-0.35586, 0.5213, -0.6107, -0.30131, 0.94862, -0.31539, -0.59831, 0.12188, -0.031943, 0.55695, -0.10621, 0.63399, -0.4734, -0.075895, 0.38247, 0.081569, 0.82214, 0.2222, -0.0083764, -0.7662, -0.56253, 0.61759, 0.20292, -0.048598, 0.87815, -1.6549, -0.77418, 0.15435, 0.94823, -0.3952, 3.7302, 0.82855, -0.14104, 0.016395, 0.21115, -0.036085, -0.15587, 0.86583, 0.26309, -0.71015, -0.03677, 0.0018282, -0.17704, 0.27032, 0.11026, 0.14133, -0.057322, 0.27207, 0.31305, 0.92771]

Loaded 200 passages from ./Datasets/model_data/test_passages.csv
There are 200 data points in the testset
Data point 100 passage: ['uppressions', 'and', 'reserve.', 'their', 'members', 'were', 'brethren', 'in', 'disposition,', 'similar', 'in', 'their', 'pursuits,', 'and', 'congenial', 'in', 'their', 'sentiments.', 'when', 'any'] ...
It has the decade_id: 10 which corresponds to decade: 1800


In [8]:
# Run this cell. The function below will take care of the case of
# sequences of unequal lengths.
def pad_text_sequences(batch, padding_word=PADDING_WORD):
    batch_texts, batch_decade_ids = zip(*batch)
    max_len = max(map(len, batch_texts))
    padded_texts = [
        [text[i] if i < len(text) else padding_word for i in range(max_len)]
        for text in batch_texts
    ]
    return padded_texts, list(batch_decade_ids)

In [9]:
# This is how it works
x = [
    (["hello", "world", "text"], 5),
    (["short", "passage"], 12),
    (["longer", "historical", "passage", "here"], 8),
]
pad_text_sequences(x)

([['hello', 'world', 'text', '<pad>'],
  ['short', 'passage', '<pad>', '<pad>'],
  ['longer', 'historical', 'passage', 'here']],
 [5, 12, 8])

Here is the actual classifier, as a class extending the Pytorch 'nn.Module' class. Your task is to write the forward function (look for "YOUR CODE HERE" below).

In [None]:
class HistoricalTextClassifier(nn.Module):

    def __init__(self, word_embeddings,  # Pre-trained word embeddings
                    char_to_id,             # Mapping from chars to ids
                    word_to_id,             # Mapping from words to ids
                    char_emb_size=12,
                    char_hidden_size=20,    # Hidden size of the character-level biRNN
                    word_hidden_size=48,   # Hidden size of the word-level biRNN
                    padding_word=PADDING_WORD,
                    unknown_word=UNKNOWN,
                    char_bidirectional=True,
                    word_bidirectional=True,
                    device=None             
                ):

        super(HistoricalTextClassifier, self).__init__()
        if device is None:
            if torch.cuda.is_available():
                self.device = "cuda"
                print("Using CUDA GPU")
            elif torch.backends.mps.is_available():
                self.device = "mps"
                print("Using Apple M3 Pro GPU (MPS)")
            else:
                self.device = "cpu"
                print("Using CPU")
        else:
            self.device = device
            print(f"Using specified device: {device}")

        self.padding_word = padding_word
        self.unknown_word = unknown_word
        self.char_to_id = char_to_id
        self.word_to_id = word_to_id
        self.char_emb_size = char_emb_size
        self.char_hidden_size = char_hidden_size
        self.word_hidden_size = word_hidden_size
        self.char_bidirectional = char_bidirectional
        self.word_bidirectional = word_bidirectional
        self.dropout = nn.Dropout(0.3)

        # Create an embedding tensor for the words and import the Glove
        # embeddings. The embeddings are frozen (i.e., they will not be
        # updated during training).
        vocabulary_size = len(word_embeddings)
        self.word_emb_size = len(word_embeddings[0])

        self.word_emb = nn.Embedding(vocabulary_size, self.word_emb_size)
        self.word_emb.weight = nn.Parameter(
            torch.tensor(word_embeddings, dtype=torch.float), requires_grad=False
        )

        # Create an embedding tensor for character embeddings. These embeddings
        # are learnt from scratch (i.e., they are not frozen).
        if self.char_emb_size > 0:
            self.char_emb = nn.Embedding(len(char_to_id), char_emb_size)
            self.char_birnn = nn.GRU(
                self.char_emb_size,
                self.char_hidden_size,
                bidirectional=char_bidirectional,
                batch_first=True,
            )
        else:
            self.char_hidden_size = 0

        multiplier = 2 if self.char_bidirectional else 1
        self.word_birnn = nn.GRU(
            self.word_emb_size + multiplier * self.char_hidden_size, # input size
            self.word_hidden_size,
            bidirectional=word_bidirectional,
            batch_first=True
        )

        # Multi-class classification - predict one of the 20 decades (1700-1890)
        multiplier = 2 if self.word_bidirectional else 1
        self.final_pred = nn.Sequential(
            nn.Linear(multiplier * self.word_hidden_size, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 20),
        )

        # self.final_pred = nn.Linear(multiplier * self.word_hidden_size, 20)

    def forward(self, x):
        """
        Performs a forward pass of Historical Text Dating Classifier 
        Takes as input a 2D list `x` of dimensionality (B, T),
        where B is the batch size;
        T is the max passage length in the batch (shorter passages
        are already padded with the special token <PAD>)

        Returns logits for decade classification (20 classes).

        :param      x:    A batch of text passages
        :type       x:    list of list of strings
        """

        # First find all word IDs of all words in all sentences in the batch
        # and the character IDs of all characters in all words in all sentences
        word_ids = []  # REPLACE WITH YOUR CODE
        char_ids = []  # REPLACE WITH YOUR CODE

        if self.char_emb_size > 0:
            max_word_length = 0
            for passage in x:
                for word in passage:
                    max_word_length = max(max_word_length, len(word))
            max_word_length = min(max_word_length, 20)
        else:
            max_word_length = 0  # Not needed when char_emb_size = 0

        # convert words to ids and characters to ids
        for passage in x:
            passage_word_ids = []
            passage_char_ids = []
            for word in passage:
                # word to id
                if word in self.word_to_id:
                    passage_word_ids.append(self.word_to_id[word])
                else:
                    passage_word_ids.append(self.word_to_id[self.unknown_word])

                if self.char_emb_size > 0:
                    word_char_ids = []
                    for char in word:
                        # word chars to id
                        if char in self.char_to_id:
                            word_char_ids.append(self.char_to_id[char])
                        else:
                            word_char_ids.append(self.char_to_id[UNKNOWN])

                    word_char_ids = word_char_ids[:max_word_length]

                    # pad words of unequal length to max word length
                    while len(word_char_ids) < max_word_length:
                        word_char_ids.append(self.char_to_id[UNKNOWN])

                    passage_char_ids.append(word_char_ids)

            word_ids.append(passage_word_ids)
            if self.char_emb_size > 0:
                char_ids.append(passage_char_ids)

        # The 'to(self.device)' below is necessary for making sure that
        # the model and the data are on the same device (CPU or CUDA).
        word_tensor = torch.tensor(word_ids, device=next(self.parameters()).device)

        if self.char_emb_size > 0:
            char_tensor = torch.tensor(char_ids, device=next(self.parameters()).device)

        # Get word embeddings
        word_embeddings = self.word_emb(word_tensor)

        if self.char_emb_size > 0:
            batch_size, max_passage_length, actual_max_word_length = char_tensor.shape

            # reshape for character processing - process individual words
            char_tensor_reshape = char_tensor.view(
                batch_size * max_passage_length, actual_max_word_length
            )
            char_embeddings = self.char_emb(char_tensor_reshape)

            # run bigru on characters
            char_output, char_hidden = self.char_birnn(char_embeddings)

            # 0 - forward, 1 - backward
            # combine forward and backward features
            if self.char_bidirectional:
                char_features = torch.cat([char_hidden[0], char_hidden[1]], dim=1)
            else:
                char_features = char_hidden.squeeze(0)

            # reshape from words to passage
            char_features = char_features.view(batch_size, max_passage_length, -1)

            # word + character features
            combined_features = torch.cat([word_embeddings, char_features], dim=2)

        else:
            combined_features = word_embeddings

        # run bigru on words
        word_output, _ = self.word_birnn(combined_features)

        # doc_representation = torch.mean(word_output, dim=1)
        attention_weights = torch.softmax(torch.sum(word_output, dim=2, keepdim=True), dim=1)
        doc_representation = torch.sum(attention_weights * word_output, dim=1)

        doc_representation = self.dropout(doc_representation)

        # predict the year
        logits = self.final_pred(doc_representation)

        return logits

In [16]:
# ================== Hyper-parameters ==================== #

learning_rate = 0.0001
epochs = 80
# ======================= Training ======================= #
dim, word_to_id, embeddings = load_glove_embeddings(f'{embeddings_path}')
training_set = HistoricalTextDataset(f'{train_data_path}', word_to_id, max_length=150)
validation_set = HistoricalTextDataset(f'{valid_data_path}', word_to_id, max_length=150)

training_loader = DataLoader(training_set, batch_size=8, collate_fn=pad_text_sequences, shuffle=True)
validation_loader = DataLoader(validation_set, batch_size=8, collate_fn=pad_text_sequences, shuffle=False)

model = HistoricalTextClassifier(embeddings, char_to_id, word_to_id, device=None)
model = model.to(model.device)

optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.02)
criterion = nn.CrossEntropyLoss()

best_val_acc = 0
for epoch in range(epochs):
    # training
    model.train()
    train_loss = 0.0
    train_correct = 0
    train_total = 0

    for x, y in tqdm(training_loader, desc="Epoch {} [Train]".format(epoch + 1)):
        optimizer.zero_grad()
        logits = model(x)
        labels_tensor = torch.tensor(y).to(model.device)
        loss = criterion(logits, labels_tensor)
        loss.backward()
        clip_grad_norm_(model.parameters(), 5)
        optimizer.step()

        # training metrics
        train_loss += loss.item()
        predicted = torch.argmax(logits, dim=1)
        train_correct += (predicted == labels_tensor).sum().item()
        train_total += len(y)

    # validation
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        for x, y in tqdm(validation_loader, desc="Epoch {} [Val]".format(epoch + 1)):
            logits = model(x)
            labels_tensor = torch.tensor(y).to(model.device)
            loss = criterion(logits, labels_tensor)

            # validation metrics
            val_loss += loss.item()
            predicted = torch.argmax(logits, dim=1)
            val_correct += (predicted == labels_tensor).sum().item()
            val_total += len(y)

    # all metrics
    train_acc = train_correct / train_total
    val_acc = val_correct / val_total
    avg_train_loss = train_loss / len(training_loader)
    avg_val_loss = val_loss / len(validation_loader)

    print(f"Epoch {epoch+1}/{epochs}:")
    print(f"Train Loss: {avg_train_loss:.4f}, Train Acc: {train_acc:.4f}")
    print(f"Val Loss: {avg_val_loss:.4f}, Val Acc: {val_acc:.4f}")

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "best_model.pth")
        print(f"  New best model saved! Best: {best_val_acc:.4f}")

Loaded 800 passages from ./Datasets/model_data/train_passages.csv
Loaded 300 passages from ./Datasets/model_data/validation_passages.csv
Using Apple M3 Pro GPU (MPS)


Epoch 1 [Train]: 100%|██████████| 100/100 [00:37<00:00,  2.69it/s]
Epoch 1 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.23it/s]


Epoch 1/80:
Train Loss: 2.9986, Train Acc: 0.0550
Val Loss: 2.9987, Val Acc: 0.0500
  New best model saved! Best: 0.0500


Epoch 2 [Train]: 100%|██████████| 100/100 [00:35<00:00,  2.80it/s]
Epoch 2 [Val]: 100%|██████████| 38/38 [00:05<00:00,  7.32it/s]


Epoch 2/80:
Train Loss: 2.9977, Train Acc: 0.0488
Val Loss: 2.9973, Val Acc: 0.0500


Epoch 3 [Train]: 100%|██████████| 100/100 [00:37<00:00,  2.65it/s]
Epoch 3 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.62it/s]


Epoch 3/80:
Train Loss: 2.9949, Train Acc: 0.0612
Val Loss: 2.9972, Val Acc: 0.0333


Epoch 4 [Train]: 100%|██████████| 100/100 [00:35<00:00,  2.85it/s]
Epoch 4 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.79it/s]


Epoch 4/80:
Train Loss: 2.9930, Train Acc: 0.0425
Val Loss: 2.9968, Val Acc: 0.0600
  New best model saved! Best: 0.0600


Epoch 5 [Train]: 100%|██████████| 100/100 [00:36<00:00,  2.78it/s]
Epoch 5 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.43it/s]


Epoch 5/80:
Train Loss: 2.9909, Train Acc: 0.0650
Val Loss: 2.9960, Val Acc: 0.0500


Epoch 6 [Train]: 100%|██████████| 100/100 [00:36<00:00,  2.78it/s]
Epoch 6 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.43it/s]


Epoch 6/80:
Train Loss: 2.9845, Train Acc: 0.0625
Val Loss: 2.9956, Val Acc: 0.0567


Epoch 7 [Train]: 100%|██████████| 100/100 [00:35<00:00,  2.85it/s]
Epoch 7 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.88it/s]


Epoch 7/80:
Train Loss: 2.9858, Train Acc: 0.0775
Val Loss: 2.9951, Val Acc: 0.0500


Epoch 8 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.90it/s]
Epoch 8 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.82it/s]


Epoch 8/80:
Train Loss: 2.9843, Train Acc: 0.0663
Val Loss: 2.9949, Val Acc: 0.0467


Epoch 9 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.90it/s]
Epoch 9 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.84it/s]


Epoch 9/80:
Train Loss: 2.9824, Train Acc: 0.0737
Val Loss: 2.9945, Val Acc: 0.0433


Epoch 10 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.87it/s]
Epoch 10 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.16it/s]


Epoch 10/80:
Train Loss: 2.9812, Train Acc: 0.0600
Val Loss: 2.9939, Val Acc: 0.0367


Epoch 11 [Train]: 100%|██████████| 100/100 [00:35<00:00,  2.78it/s]
Epoch 11 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.52it/s]


Epoch 11/80:
Train Loss: 2.9795, Train Acc: 0.0663
Val Loss: 2.9936, Val Acc: 0.0467


Epoch 12 [Train]: 100%|██████████| 100/100 [00:35<00:00,  2.80it/s]
Epoch 12 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.60it/s]


Epoch 12/80:
Train Loss: 2.9781, Train Acc: 0.0725
Val Loss: 2.9928, Val Acc: 0.0400


Epoch 13 [Train]: 100%|██████████| 100/100 [00:35<00:00,  2.81it/s]
Epoch 13 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.62it/s]


Epoch 13/80:
Train Loss: 2.9739, Train Acc: 0.0825
Val Loss: 2.9930, Val Acc: 0.0367


Epoch 14 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.88it/s]
Epoch 14 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.72it/s]


Epoch 14/80:
Train Loss: 2.9705, Train Acc: 0.0725
Val Loss: 2.9914, Val Acc: 0.0300


Epoch 15 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.89it/s]
Epoch 15 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.71it/s]


Epoch 15/80:
Train Loss: 2.9662, Train Acc: 0.0887
Val Loss: 2.9905, Val Acc: 0.0433


Epoch 16 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.87it/s]
Epoch 16 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.63it/s]


Epoch 16/80:
Train Loss: 2.9610, Train Acc: 0.0862
Val Loss: 2.9894, Val Acc: 0.0333


Epoch 17 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.88it/s]
Epoch 17 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.65it/s]


Epoch 17/80:
Train Loss: 2.9587, Train Acc: 0.0813
Val Loss: 2.9889, Val Acc: 0.0567


Epoch 18 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.87it/s]
Epoch 18 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.69it/s]


Epoch 18/80:
Train Loss: 2.9499, Train Acc: 0.0963
Val Loss: 2.9886, Val Acc: 0.0533


Epoch 19 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.86it/s]
Epoch 19 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.63it/s]


Epoch 19/80:
Train Loss: 2.9396, Train Acc: 0.0862
Val Loss: 2.9854, Val Acc: 0.0533


Epoch 20 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.86it/s]
Epoch 20 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.49it/s]


Epoch 20/80:
Train Loss: 2.9303, Train Acc: 0.0950
Val Loss: 2.9861, Val Acc: 0.0433


Epoch 21 [Train]: 100%|██████████| 100/100 [00:35<00:00,  2.85it/s]
Epoch 21 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.44it/s]


Epoch 21/80:
Train Loss: 2.9258, Train Acc: 0.0988
Val Loss: 2.9869, Val Acc: 0.0467


Epoch 22 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.86it/s]
Epoch 22 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.72it/s]


Epoch 22/80:
Train Loss: 2.9157, Train Acc: 0.0925
Val Loss: 2.9849, Val Acc: 0.0533


Epoch 23 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.87it/s]
Epoch 23 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.52it/s]


Epoch 23/80:
Train Loss: 2.9095, Train Acc: 0.0988
Val Loss: 2.9891, Val Acc: 0.0433


Epoch 24 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.87it/s]
Epoch 24 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.64it/s]


Epoch 24/80:
Train Loss: 2.9085, Train Acc: 0.1062
Val Loss: 2.9862, Val Acc: 0.0500


Epoch 25 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.86it/s]
Epoch 25 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.55it/s]


Epoch 25/80:
Train Loss: 2.9030, Train Acc: 0.0975
Val Loss: 2.9912, Val Acc: 0.0467


Epoch 26 [Train]: 100%|██████████| 100/100 [00:35<00:00,  2.84it/s]
Epoch 26 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.65it/s]


Epoch 26/80:
Train Loss: 2.8991, Train Acc: 0.1013
Val Loss: 2.9945, Val Acc: 0.0500


Epoch 27 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.89it/s]
Epoch 27 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.65it/s]


Epoch 27/80:
Train Loss: 2.8952, Train Acc: 0.0950
Val Loss: 2.9888, Val Acc: 0.0533


Epoch 28 [Train]: 100%|██████████| 100/100 [00:35<00:00,  2.81it/s]
Epoch 28 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.68it/s]


Epoch 28/80:
Train Loss: 2.8851, Train Acc: 0.0975
Val Loss: 2.9920, Val Acc: 0.0400


Epoch 29 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.87it/s]
Epoch 29 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.67it/s]


Epoch 29/80:
Train Loss: 2.8921, Train Acc: 0.0887
Val Loss: 2.9892, Val Acc: 0.0500


Epoch 30 [Train]: 100%|██████████| 100/100 [00:35<00:00,  2.85it/s]
Epoch 30 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.51it/s]


Epoch 30/80:
Train Loss: 2.8924, Train Acc: 0.0862
Val Loss: 2.9908, Val Acc: 0.0567


Epoch 31 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.86it/s]
Epoch 31 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.62it/s]


Epoch 31/80:
Train Loss: 2.8905, Train Acc: 0.1075
Val Loss: 2.9932, Val Acc: 0.0400


Epoch 32 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.87it/s]
Epoch 32 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.68it/s]


Epoch 32/80:
Train Loss: 2.8780, Train Acc: 0.0963
Val Loss: 3.0007, Val Acc: 0.0333


Epoch 33 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.88it/s]
Epoch 33 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.41it/s]


Epoch 33/80:
Train Loss: 2.8889, Train Acc: 0.1000
Val Loss: 2.9900, Val Acc: 0.0500


Epoch 34 [Train]: 100%|██████████| 100/100 [00:35<00:00,  2.83it/s]
Epoch 34 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.41it/s]


Epoch 34/80:
Train Loss: 2.8804, Train Acc: 0.1000
Val Loss: 2.9912, Val Acc: 0.0533


Epoch 35 [Train]: 100%|██████████| 100/100 [00:35<00:00,  2.83it/s]
Epoch 35 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.48it/s]


Epoch 35/80:
Train Loss: 2.8820, Train Acc: 0.0975
Val Loss: 3.0179, Val Acc: 0.0467


Epoch 36 [Train]: 100%|██████████| 100/100 [00:36<00:00,  2.76it/s]
Epoch 36 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.55it/s]


Epoch 36/80:
Train Loss: 2.8776, Train Acc: 0.1050
Val Loss: 2.9903, Val Acc: 0.0400


Epoch 37 [Train]: 100%|██████████| 100/100 [00:35<00:00,  2.82it/s]
Epoch 37 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.09it/s]


Epoch 37/80:
Train Loss: 2.8759, Train Acc: 0.1037
Val Loss: 2.9955, Val Acc: 0.0400


Epoch 38 [Train]: 100%|██████████| 100/100 [00:36<00:00,  2.72it/s]
Epoch 38 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.29it/s]


Epoch 38/80:
Train Loss: 2.8778, Train Acc: 0.0925
Val Loss: 2.9906, Val Acc: 0.0500


Epoch 39 [Train]: 100%|██████████| 100/100 [00:38<00:00,  2.58it/s]
Epoch 39 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.27it/s]


Epoch 39/80:
Train Loss: 2.8679, Train Acc: 0.0925
Val Loss: 2.9914, Val Acc: 0.0533


Epoch 40 [Train]: 100%|██████████| 100/100 [00:35<00:00,  2.84it/s]
Epoch 40 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.39it/s]


Epoch 40/80:
Train Loss: 2.8759, Train Acc: 0.0950
Val Loss: 3.0017, Val Acc: 0.0367


Epoch 41 [Train]: 100%|██████████| 100/100 [00:35<00:00,  2.84it/s]
Epoch 41 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.44it/s]


Epoch 41/80:
Train Loss: 2.8699, Train Acc: 0.1025
Val Loss: 2.9967, Val Acc: 0.0367


Epoch 42 [Train]: 100%|██████████| 100/100 [00:35<00:00,  2.84it/s]
Epoch 42 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.24it/s]


Epoch 42/80:
Train Loss: 2.8746, Train Acc: 0.1037
Val Loss: 2.9899, Val Acc: 0.0433


Epoch 43 [Train]: 100%|██████████| 100/100 [00:35<00:00,  2.82it/s]
Epoch 43 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.52it/s]


Epoch 43/80:
Train Loss: 2.8713, Train Acc: 0.1050
Val Loss: 2.9957, Val Acc: 0.0433


Epoch 44 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.86it/s]
Epoch 44 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.55it/s]


Epoch 44/80:
Train Loss: 2.8703, Train Acc: 0.0925
Val Loss: 3.0009, Val Acc: 0.0367


Epoch 45 [Train]: 100%|██████████| 100/100 [00:35<00:00,  2.86it/s]
Epoch 45 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.54it/s]


Epoch 45/80:
Train Loss: 2.8668, Train Acc: 0.1075
Val Loss: 2.9893, Val Acc: 0.0400


Epoch 46 [Train]: 100%|██████████| 100/100 [00:35<00:00,  2.81it/s]
Epoch 46 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.49it/s]


Epoch 46/80:
Train Loss: 2.8637, Train Acc: 0.0963
Val Loss: 2.9890, Val Acc: 0.0367


Epoch 47 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.86it/s]
Epoch 47 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.54it/s]


Epoch 47/80:
Train Loss: 2.8610, Train Acc: 0.1050
Val Loss: 2.9956, Val Acc: 0.0400


Epoch 48 [Train]: 100%|██████████| 100/100 [00:35<00:00,  2.78it/s]
Epoch 48 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.32it/s]


Epoch 48/80:
Train Loss: 2.8706, Train Acc: 0.1000
Val Loss: 2.9882, Val Acc: 0.0533


Epoch 49 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.89it/s]
Epoch 49 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.67it/s]


Epoch 49/80:
Train Loss: 2.8588, Train Acc: 0.1113
Val Loss: 2.9916, Val Acc: 0.0400


Epoch 50 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.91it/s]
Epoch 50 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.91it/s]


Epoch 50/80:
Train Loss: 2.8617, Train Acc: 0.1075
Val Loss: 2.9873, Val Acc: 0.0400


Epoch 51 [Train]: 100%|██████████| 100/100 [00:35<00:00,  2.84it/s]
Epoch 51 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.63it/s]


Epoch 51/80:
Train Loss: 2.8530, Train Acc: 0.1013
Val Loss: 2.9881, Val Acc: 0.0400


Epoch 52 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.90it/s]
Epoch 52 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.98it/s]


Epoch 52/80:
Train Loss: 2.8542, Train Acc: 0.1138
Val Loss: 2.9885, Val Acc: 0.0400


Epoch 53 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.90it/s]
Epoch 53 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.90it/s]


Epoch 53/80:
Train Loss: 2.8515, Train Acc: 0.1000
Val Loss: 2.9894, Val Acc: 0.0333


Epoch 54 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.92it/s]
Epoch 54 [Val]: 100%|██████████| 38/38 [00:04<00:00,  7.96it/s]


Epoch 54/80:
Train Loss: 2.8531, Train Acc: 0.1062
Val Loss: 2.9887, Val Acc: 0.0433


Epoch 55 [Train]: 100%|██████████| 100/100 [00:35<00:00,  2.81it/s]
Epoch 55 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.76it/s]


Epoch 55/80:
Train Loss: 2.8557, Train Acc: 0.1075
Val Loss: 2.9971, Val Acc: 0.0500


Epoch 56 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.87it/s]
Epoch 56 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.63it/s]


Epoch 56/80:
Train Loss: 2.8532, Train Acc: 0.1125
Val Loss: 2.9898, Val Acc: 0.0400


Epoch 57 [Train]: 100%|██████████| 100/100 [00:33<00:00,  2.97it/s]
Epoch 57 [Val]: 100%|██████████| 38/38 [00:04<00:00,  9.03it/s]


Epoch 57/80:
Train Loss: 2.8525, Train Acc: 0.1125
Val Loss: 2.9890, Val Acc: 0.0400


Epoch 58 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.92it/s]
Epoch 58 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.89it/s]


Epoch 58/80:
Train Loss: 2.8560, Train Acc: 0.1062
Val Loss: 2.9923, Val Acc: 0.0433


Epoch 59 [Train]: 100%|██████████| 100/100 [00:35<00:00,  2.82it/s]
Epoch 59 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.78it/s]


Epoch 59/80:
Train Loss: 2.8570, Train Acc: 0.1125
Val Loss: 2.9894, Val Acc: 0.0400


Epoch 60 [Train]: 100%|██████████| 100/100 [00:35<00:00,  2.84it/s]
Epoch 60 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.74it/s]


Epoch 60/80:
Train Loss: 2.8457, Train Acc: 0.1125
Val Loss: 2.9930, Val Acc: 0.0400


Epoch 61 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.93it/s]
Epoch 61 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.85it/s]


Epoch 61/80:
Train Loss: 2.8415, Train Acc: 0.1013
Val Loss: 2.9963, Val Acc: 0.0600


Epoch 62 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.89it/s]
Epoch 62 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.59it/s]


Epoch 62/80:
Train Loss: 2.8451, Train Acc: 0.0975
Val Loss: 2.9997, Val Acc: 0.0633
  New best model saved! Best: 0.0633


Epoch 63 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.90it/s]
Epoch 63 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.69it/s]


Epoch 63/80:
Train Loss: 2.8564, Train Acc: 0.1125
Val Loss: 2.9907, Val Acc: 0.0333


Epoch 64 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.91it/s]
Epoch 64 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.61it/s]


Epoch 64/80:
Train Loss: 2.8410, Train Acc: 0.1225
Val Loss: 2.9932, Val Acc: 0.0400


Epoch 65 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.89it/s]
Epoch 65 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.94it/s]


Epoch 65/80:
Train Loss: 2.8430, Train Acc: 0.1075
Val Loss: 3.0069, Val Acc: 0.0567


Epoch 66 [Train]: 100%|██████████| 100/100 [00:33<00:00,  2.95it/s]
Epoch 66 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.94it/s]


Epoch 66/80:
Train Loss: 2.8518, Train Acc: 0.1212
Val Loss: 2.9881, Val Acc: 0.0300


Epoch 67 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.91it/s]
Epoch 67 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.49it/s]


Epoch 67/80:
Train Loss: 2.8435, Train Acc: 0.1075
Val Loss: 3.0008, Val Acc: 0.0600


Epoch 68 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.90it/s]
Epoch 68 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.38it/s]


Epoch 68/80:
Train Loss: 2.8445, Train Acc: 0.1050
Val Loss: 2.9892, Val Acc: 0.0200


Epoch 69 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.87it/s]
Epoch 69 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.71it/s]


Epoch 69/80:
Train Loss: 2.8439, Train Acc: 0.1150
Val Loss: 2.9895, Val Acc: 0.0400


Epoch 70 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.86it/s]
Epoch 70 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.48it/s]


Epoch 70/80:
Train Loss: 2.8361, Train Acc: 0.1075
Val Loss: 2.9919, Val Acc: 0.0400


Epoch 71 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.92it/s]
Epoch 71 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.75it/s]


Epoch 71/80:
Train Loss: 2.8386, Train Acc: 0.1212
Val Loss: 2.9902, Val Acc: 0.0233


Epoch 72 [Train]: 100%|██████████| 100/100 [00:35<00:00,  2.86it/s]
Epoch 72 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.51it/s]


Epoch 72/80:
Train Loss: 2.8378, Train Acc: 0.1200
Val Loss: 2.9928, Val Acc: 0.0467


Epoch 73 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.90it/s]
Epoch 73 [Val]: 100%|██████████| 38/38 [00:05<00:00,  7.27it/s]


Epoch 73/80:
Train Loss: 2.8383, Train Acc: 0.1138
Val Loss: 2.9896, Val Acc: 0.0300


Epoch 74 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.88it/s]
Epoch 74 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.60it/s]


Epoch 74/80:
Train Loss: 2.8322, Train Acc: 0.1138
Val Loss: 2.9911, Val Acc: 0.0300


Epoch 75 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.87it/s]
Epoch 75 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.35it/s]


Epoch 75/80:
Train Loss: 2.8347, Train Acc: 0.1237
Val Loss: 2.9924, Val Acc: 0.0200


Epoch 76 [Train]: 100%|██████████| 100/100 [00:35<00:00,  2.83it/s]
Epoch 76 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.76it/s]


Epoch 76/80:
Train Loss: 2.8310, Train Acc: 0.1138
Val Loss: 2.9929, Val Acc: 0.0367


Epoch 77 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.93it/s]
Epoch 77 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.75it/s]


Epoch 77/80:
Train Loss: 2.8407, Train Acc: 0.1113
Val Loss: 2.9913, Val Acc: 0.0233


Epoch 78 [Train]: 100%|██████████| 100/100 [00:35<00:00,  2.79it/s]
Epoch 78 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.48it/s]


Epoch 78/80:
Train Loss: 2.8376, Train Acc: 0.1125
Val Loss: 2.9924, Val Acc: 0.0567


Epoch 79 [Train]: 100%|██████████| 100/100 [00:35<00:00,  2.84it/s]
Epoch 79 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.77it/s]


Epoch 79/80:
Train Loss: 2.8357, Train Acc: 0.1138
Val Loss: 3.0037, Val Acc: 0.0433


Epoch 80 [Train]: 100%|██████████| 100/100 [00:34<00:00,  2.90it/s]
Epoch 80 [Val]: 100%|██████████| 38/38 [00:04<00:00,  8.68it/s]

Epoch 80/80:
Train Loss: 2.8313, Train Acc: 0.1087
Val Loss: 2.9975, Val Acc: 0.0467





In [None]:
# Evaluation
print("Loading model...")
model.load_state_dict(torch.load("best_model.pth"))

import numpy as np
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

model.eval()
test_set = HistoricalTextDataset(f'{test_data_path}', word_to_id, max_length=300)

all_predictions = []
all_labels = []

for x, y in test_set:
    with torch.no_grad():
        logits = model([x])
        predicted = torch.argmax(logits, dim=1).cpu().numpy()[0]
        all_predictions.append(predicted)
        all_labels.append(y)

# Main evaluation metrics
accuracy = accuracy_score(all_labels, all_predictions)
decade_labels = [f"{i}" for i in range(1700, 1900, 10)]

print(f"Test Accuracy: {accuracy:.4f}")
print("\nConfusion Matrix:")
cm = confusion_matrix(all_labels, all_predictions)
print(cm)

print("\nClassification Report:")
print(classification_report(all_labels, all_predictions, target_names=decade_labels))

Loading model...
Loaded 200 passages from ./Datasets/model_data/test_passages.csv
Test Accuracy: 0.0650

Confusion Matrix:
[[1 1 3 1 1 0 0 1 0 0 0 0 0 0 0 0 0 2 0 0]
 [0 0 0 1 1 0 0 1 1 0 1 0 0 0 2 2 1 0 0 0]
 [0 1 1 0 1 1 0 1 0 0 1 4 0 0 0 0 0 0 0 0]
 [3 5 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0]
 [3 1 1 0 1 1 0 0 0 1 1 0 0 0 1 0 0 0 0 0]
 [2 1 0 0 0 1 0 1 1 0 1 0 0 0 1 1 0 1 0 0]
 [2 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 3 0 1 1]
 [2 1 0 0 1 0 1 1 0 0 0 0 0 0 0 3 1 0 0 0]
 [2 0 1 0 0 0 0 2 1 0 1 1 0 0 0 0 1 0 1 0]
 [0 0 0 0 0 1 1 0 1 0 1 0 0 0 2 1 0 1 2 0]
 [0 0 4 0 0 0 0 1 0 1 0 1 0 0 0 0 1 1 0 1]
 [1 0 1 1 0 1 0 0 2 0 1 0 0 0 1 0 1 0 0 1]
 [0 1 0 2 1 3 0 1 0 0 0 0 0 1 0 0 0 0 1 0]
 [0 1 2 0 0 0 0 0 1 1 0 0 0 0 0 1 3 0 0 1]
 [0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 1 3 0 2 1]
 [2 0 1 1 0 0 0 0 0 1 0 1 0 0 0 0 1 0 2 1]
 [1 2 0 0 0 1 0 0 3 0 1 0 0 0 0 0 1 0 1 0]
 [0 1 2 0 1 0 0 0 1 2 2 0 0 0 0 0 0 0 1 0]
 [2 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 2 0 5 0]
 [1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 4 2 1 0]]

Classification 