In [13]:
import json

import pandas as pd
import torch

from torch import nn

from src.word_to_embedding import WordToEmbedding

RANDOM_STATE = 42
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [14]:
w2e = WordToEmbedding()

In [15]:
class LoanwordClassifier(nn.Module):
    def __init__(self, input_size: int, hidden_size: int, output_size: int):
        super().__init__()
        self.__hidden_size = hidden_size

        self.__i2h = nn.Linear(input_size + self.__hidden_size, self.__hidden_size).to(device)
        self.__h2o = nn.Linear(hidden_size, output_size).to(device)
        self.__softmax = nn.LogSoftmax(dim=1).to(device)

    def forward(self, input_tensor, hidden):
        combined = torch.cat((input_tensor.to(device), hidden.to(device)), 1).to(device)
        hidden = self.__i2h(combined).to(device)
        output = self.__h2o(hidden).to(device)
        output = self.__softmax(output).to(device)

        return output, hidden

    def init_hidden(self):
        return torch.zeros(1, self.__hidden_size)

In [16]:
def category_from_output(output, id_to_label):
    top_n, top_i = output.topk(1)

    category_i = top_i[0].item()
    return id_to_label[category_i], category_i

In [17]:
def predict(word, model, id_to_label):
    word_tensor = torch.Tensor(w2e.get_embedding(word))

    word_tensor_shape = word_tensor.shape
    word_tensor = word_tensor.reshape((word_tensor_shape[0], 1, word_tensor_shape[1]))

    hidden = model.init_hidden()

    for syllable_embedding in word_tensor:
        output, hidden = model(syllable_embedding, hidden)
    
    # predicted_label, predicted_label_id = category_from_output(output, id_to_label)

    probabilities, label_ids = output.topk(5)

    return {id_to_label[label_id.item()]: probability.item() for probability, label_id in zip(probabilities[0], label_ids[0])}

In [27]:
with open('models/label-to-id-2024-02-06-1024hidden-10epochs.json') as fp:
    label_to_id = json.load(fp)

with open('models/id-to-label-2024-02-06-1024hidden-10epochs.json') as fp:
    id_to_label = json.load(fp)

id_to_label = {int(key): value for key, value in id_to_label.items()}

model = LoanwordClassifier(input_size=512, hidden_size=1024, output_size=len(label_to_id))

state_dict_path = 'models/classifier-2024-02-06-1024hidden-10epochs.pth'
model.load_state_dict(torch.load(state_dict_path, map_location=torch.device('cpu')))

<All keys matched successfully>

In [26]:
predict('рахат', model, id_to_label)

{'bg': -0.38887545466423035,
 'fr': -2.57806134223938,
 'la': -2.737632989883423,
 'en': -2.968045711517334,
 'el': -2.98453950881958}