In [1]:
import warnings
warnings.filterwarnings("ignore")


In [2]:
import rejto

rejto_corpus = rejto.Rejto_corpus()

In [3]:
import numpy as np
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

# This can be an important parameter, so be aware of it...
max_seq_length = 30

# max_num_of_sents -- how many sentences should we read from the corpus 
max_num_of_sents = rejto_corpus.n_sents()

def generate_rejto_word_to_id_map():
    """Return a dictionary mapping downcased Rejto-words to their ids.
    Numbering starts from 1 since we use 0 for masking (!!!).
    """
    words = set()
    for word in rejto_corpus.words():
        words.add(word.lower())
    return {word: idx + 1 for idx, word in enumerate(sorted(words))}


class RejtoReader:
    """A secondary reader class for the Rejto corpus.
    """

    def __init__(self):
        self.word_to_id_map = generate_rejto_word_to_id_map()
        self.id_to_word_map = {idx: word for word, idx in self.word_to_id_map.items()}

    def n_words(self):
        return len(self.word_to_id_map)

    def sentence_to_ids(self, sentence):
        """Return the word ids of a sentence.
        """
        return [self.word_to_id_map[word.lower()] for word in sentence]
        
    def sentences(self):
        """Generator yielding features from the Rejto corpus.
        """
        return (self.sentence_to_ids(sentence) for sentence in rejto_corpus.sents())

    def sentence_matrixes(self):
        x = np.zeros((max_num_of_sents, max_seq_length-1))
        y = np.zeros((max_num_of_sents, max_seq_length-1))
        sents = self.sentences()
        for idx, sent in enumerate(sents):
            if idx == max_num_of_sents:
                breaka
            np_array = np.asarray(sent)
            length  = min(max_seq_length, len(np_array))
            x[idx, :length - 1] = np_array[:length - 1]
            y[idx, :length - 1] = np_array[1:length]
        return x, y


In [4]:
r = RejtoReader()
n_words = r.n_words()

max_input_length = 29

In [5]:
model = tf.keras.models.load_model("30length_15epochs")

In [6]:
# Prediction
############

def str_to_input(s):
    """Convert a string to appropriate model input.
    """
    words = [x.lower() for x in s.split()[:max_input_length]]
    ids = [r.word_to_id_map[word] for word in words]
    ids_array = np.asarray(ids)
    length = min(max_input_length, len(ids_array))
    result = np.zeros((1, max_input_length))
    result[0, :length] = ids_array[:length]
    return result, length
    

predicted_words = ""
    
while True:
    s = input("\nEnter a few starting words of a sentence: ") 
    if s == "":
        s = predicted_words
    elif s == "\q":
        break
    else:
        predicted_words+=" "+s
    try:
        x, length = str_to_input(s)
        predictions = model(x).numpy()
        probs = predictions[0][length - 1]
        most_probable = np.argmax(probs)
        word = r.id_to_word_map[most_probable]
        print("Predicted next word:", word)
        predicted_words+=" "+word
        print(predicted_words)
    except KeyError:
        print("Unknown words -- please try again!")



Enter a few starting words of a sentence: Nem
Predicted next word: is
 Nem is

Enter a few starting words of a sentence: 
Predicted next word: sejtette
 Nem is sejtette

Enter a few starting words of a sentence: 
Predicted next word: ,
 Nem is sejtette ,

Enter a few starting words of a sentence: 
Predicted next word: hogy
 Nem is sejtette , hogy

Enter a few starting words of a sentence: 
Predicted next word: a
 Nem is sejtette , hogy a

Enter a few starting words of a sentence: 
Predicted next word: fejükkel
 Nem is sejtette , hogy a fejükkel

Enter a few starting words of a sentence: 
Predicted next word: játszanak
 Nem is sejtette , hogy a fejükkel játszanak

Enter a few starting words of a sentence: 
Predicted next word: ,
 Nem is sejtette , hogy a fejükkel játszanak ,

Enter a few starting words of a sentence: 
Predicted next word: mert
 Nem is sejtette , hogy a fejükkel játszanak , mert

Enter a few starting words of a sentence: 
Predicted next word: a
 Nem is sejtette , hogy a