In [6]:
import pickle
import keras
from gensim.models import Word2Vec
import numpy as np
from utils.params import params

embedding_dim = params['embedding_dim']

In [7]:
with open('utils/tokenizer.pkl', 'rb') as f:
    tokenizer = pickle.load(f)

with open('utils/label_encoder.pkl', 'rb') as f:
    label_encoder = pickle.load(f)

model = keras.models.load_model('utils/model.h5')

word2vec_model = Word2Vec.load('utils/word2vec_model.bin')

In [8]:
def text_to_embeddings(text, word2vec_model):
    tokens = tokenizer(text)
    embeddings = []
    for token in tokens:
        if token in word2vec_model.wv:
            embeddings.append(word2vec_model.wv[token])
    if embeddings:
        return np.mean(embeddings, axis=0) 
    else:
        return np.zeros(embedding_dim)


In [9]:
tuple_test_data = (
    ('zucker fabrik', 'ft'),
    ('Lebensmittel kommssionierung', 'ft'),
    ('geländer biegen', 'mr'),
    ('gebäudeausrüstung technische', 'ct'),
    ('kürbiskernöl softgels', 'ft')
)

In [10]:
def predict_word(word: str):
    new_word_embedding = text_to_embeddings(word, word2vec_model).reshape(1, -1) 
    # Predict the label for the embedded word
    predicted_probabilities = model.predict(new_word_embedding)
    predicted_label_index = predicted_probabilities.argmax(axis=1)[0]
    predicted_label = label_encoder.inverse_transform([predicted_label_index])
    return predicted_label[0]

predict_word(word='Lebensmittel kommssionierung')



'ft'

In [11]:
for sample in tuple_test_data:
    word = sample[0]
    correct = sample[1]
    new_word_embedding = text_to_embeddings(word, word2vec_model).reshape(1, -1) 

    # Predict the label for the embedded word
    predicted_probabilities = model.predict(new_word_embedding)
    predicted_label_index = predicted_probabilities.argmax(axis=1)[0]
    predicted_label = label_encoder.inverse_transform([predicted_label_index])

    print(f'For word \'{word}\'')
    print(f'We predict {predicted_label[0]}')
    print(f'The correct solution is {correct}')
    print()


For word 'zucker fabrik'
We predict ch
The correct solution is ft

For word 'Lebensmittel kommssionierung'
We predict ft
The correct solution is ft

For word 'geländer biegen'
We predict mr
The correct solution is mr

For word 'gebäudeausrüstung technische'
We predict ct
The correct solution is ct

For word 'kürbiskernöl softgels'
We predict ct
The correct solution is ft

