# Model evaluation

Now that we have a model trained that seems to have learned well, we can examine it to see how well it really learned.

In [1]:
import yaml
import torch
from ataarangi.models import TransformerModel, RNNModel
from ataarangi.data import SequenceTokenizer, load_data

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
# Initialize tokenizers
tokenizer = SequenceTokenizer('../data/worldstate_tokens.txt', '../data/tokens.txt')

In [8]:
! ls ../models/*.pth -t | head -n1

../models/lr=0.00086-num_layers=2-embed_size=128-hidden_size=256.pth


In [9]:
def load_model(path, params):
    model = RNNModel(**params)
    model.load_state_dict(torch.load(path))
    return model

best_model_params = {
    'tokenizer': tokenizer,
    'embed_size': 128,
    'hidden_size': 256,
    'num_layers': 2
}

best_model = load_model(
    '../models/lr=0.00086-num_layers=2-embed_size=128-hidden_size=256.pth',
    best_model_params
)

In [10]:
train_data, dev_data = load_data('../data/train_set.csv', '../data/dev_set.csv')

In [11]:
tokens = tokenizer.tokenize(dev_data['rākau'][12])
tokens_tensor = torch.tensor(tokens, dtype=torch.long).to(device)  # Ensure the tensor is of type long
generated_sequence = torch.argmax(best_model.generate(tokens), axis=-1)[0].tolist()
print(generated_sequence)

[38, 25, 0, 0, 0, 0, 0, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31]


In [20]:
tokenizer.decode(tokens)

'[SOS] [NOT_SELECTED] [COLOUR_BLACK] [HEIGHT_8] [SELECTED] [COLOUR_BLACK] [HEIGHT_10] [SEP]'

In [13]:
print(tokenizer.decode(generated_sequence))

katoa ngā [PAD] [PAD] [PAD] [PAD] [PAD] parauri parauri parauri parauri parauri parauri parauri parauri parauri parauri parauri parauri parauri parauri parauri parauri parauri parauri parauri parauri parauri parauri
