In [1]:
import tensorflow as tf
import numpy as np
import os
import time
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Dense,Embedding,GRU
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import RNN, LSTM, RepeatVector

In [2]:
char2idx = {'\n': 0, ' ': 1, '!': 2, '(': 3, ')': 4, ',': 5, '-': 6, '.': 7, ':': 8, ';': 9, '?': 10, 'A': 11, 'B': 12, 'C': 13, 'D': 14, 'E': 15, 'F': 16, 'G': 17, 'H': 18, 'I': 19, 'J': 20, 'K': 21, 'L': 22, 'M': 23, 'N': 24, 'O': 25, 'P': 26, 'R': 27, 'S': 28, 'T': 29, 'U': 30, 'V': 31, 'W': 32, 'Z': 33, 'a': 34, 'b': 35, 'c': 36, 'd': 37, 'e': 38, 'f': 39, 'g': 40, 'h': 41, 'i': 42, 'j': 43, 'k': 44, 'l': 45, 'm': 46, 'n': 47, 'o': 48, 'p': 49, 'q': 50, 'r': 51, 's': 52, 't': 53, 'u': 54, 'v': 55, 'w': 56, 'x': 57, 'y': 58, 'z': 59, 'Ó': 60, 'à': 61, 'é': 62, 'ó': 63, 'ą': 64, 'Ć': 65, 'ć': 66, 'ę': 67, 'Ł': 68, 'ł': 69, 'ń': 70, 'Ś': 71, 'ś': 72, 'Ź': 73, 'ź': 74, 'Ż': 75, 'ż': 76, '—': 77, '’': 78, '“': 79, '„': 80}
idx2char = ['\n',' ','!','(',')',',','-','.',':',';','?','A','B','C','D','E','F','G',
 'H','I','J','K','L','M','N','O','P','R','S','T','U','V','W','Z','a','b',
 'c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t',
 'u','v','w','x','y','z','Ó','à','é','ó','ą','Ć','ć','ę','Ł','ł','ń','Ś',
 'ś','Ź','ź','Ż','ż','—','’','“','„']

In [3]:
def to_text(sample):
    return ''.join([idx2char[int(x)] for x in sample])

In [4]:
def build_model(batch_size=64):
    vocab_size = len(char2idx)
    model = tf.keras.Sequential()
    model.add(Embedding(vocab_size, 256, batch_input_shape=[batch_size, None]))
    model.add(LSTM(1024, return_sequences=True,
                        stateful=True,#!!!
                        recurrent_initializer='glorot_uniform'))
    model.add(Dense(vocab_size))
    return model  
        
model = build_model(1)
model.summary()

def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)
model.compile(optimizer='adam', loss=loss) #loss='sparse_categorical_crossentropy')

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (1, None, 256)            20736     
_________________________________________________________________
lstm (LSTM)                  (1, None, 1024)           5246976   
_________________________________________________________________
dense (Dense)                (1, None, 81)             83025     
Total params: 5,350,737
Trainable params: 5,350,737
Non-trainable params: 0
_________________________________________________________________


In [5]:
model.load_weights('weights_5800.h5')

In [6]:
def generate_text(model, start_string, size=1000,temperature=1.,verbose=0):
  # Evaluation step (generating text using the learned model)
    print('Text:',start_string,end='')

    # Convert the  start_string to numbers (vectorizing)
    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)
    #print(input_eval.shape)

    # Empty string to store the results
    text_generated = []

    model.reset_states()
    for i in range(size):
        if(verbose): print("====================================================")
        if(verbose): print('input:',to_text(input_eval.numpy()[0]))
        predictions = model(input_eval)

        if(verbose): print('"Best" prediction:',to_text(predictions.numpy().argmax(axis=2)[0]))
        # remove the batch dimension
        predictions = tf.squeeze(predictions, 0)

        ## Taking alays the best prediction is NOT a good idea - easy to have a loop 
        # predicted_id = predictions.numpy().argmax(axis=1)[0]
        
        # It is better to generate a categorical distribution and take a character fro this distrbution
        predictions = predictions * temperature
        samples = tf.random.categorical(predictions, num_samples=10)
        if(verbose):
            print('sampled predictions:')
            for j in range(samples.shape[1]):
                  print(to_text(samples[:,j].numpy()), end=', ')
            print()        
            
        predicted_id = samples[-1,0].numpy()
        
        
        if(verbose): print('chosen_id',predicted_id,'letter:',idx2char[predicted_id])
 
        # We pass the predicted word as the next input to the model
        # along with the previous hidden state
        input_eval = tf.expand_dims([predicted_id], 0)
        text_generated.append(idx2char[predicted_id])
        if(verbose):
            print(text_generated)
            print(start_string + ''.join(text_generated))
        else:
            print(idx2char[predicted_id],end='')
        
    print()    
    return (start_string + ''.join(text_generated))

txt = generate_text(model, start_string="Polsko ",size=500, temperature=1, verbose=1)
print()
print('='*50)
##print('The final generated text:\n',txt)

input: Polsko 
"Best" prediction: olskitc
sampled predictions:
ilski g, okskitc, alskacc, osskacn, ilskiwg, ilskicc, ilski c, isskiwg, ilskatz, rkskutc, 
chosen_id 40 letter: g
['g']
Polsko g
input: g
"Best" prediction: a
sampled predictions:
a, a, n, a, a, a, w, o, a, a, 
chosen_id 34 letter: a
['g', 'a']
Polsko ga
input: a
"Best" prediction: r
sampled predictions:
r, r, r, r, r, r, r, r, r, r, 
chosen_id 51 letter: r
['g', 'a', 'r']
Polsko gar
input: r
"Best" prediction: ś
sampled predictions:
n, ś, ś, n, a, n, ś, ś, n, ś, 
chosen_id 47 letter: n
['g', 'a', 'r', 'n']
Polsko garn
input: n
"Best" prediction: i
sampled predictions:
i, i, i, i, i, i, i, i, i, i, 
chosen_id 42 letter: i
['g', 'a', 'r', 'n', 'i']
Polsko garni
input: i
"Best" prediction: e
sampled predictions:
e, e, e, e, c, c,  , n, e, a, 
chosen_id 38 letter: e
['g', 'a', 'r', 'n', 'i', 'e']
Polsko garnie
input: e
"Best" prediction:  
sampled predictions:
 , !,  , r, r, j,  , r,  ,  , 
chosen_id 1 letter:  
['g', 'a', 'r'

In [None]:
txt = generate_text(model, start_string="Polsko ",size=500, temperature=1, verbose=0)
