In [1]:
from keras.models import load_model
import random
import numpy as np

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
model = load_model('saves/models/model.hdf5')

In [3]:
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 30, 512)           24928256  
_________________________________________________________________
dropout_1 (Dropout)          (None, 30, 512)           0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 512)               2099200   
_________________________________________________________________
dropout_2 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 11659)             5981067   
_________________________________________________________________
activation_1 (Activation)    (None, 11659)             0         
Total params: 33,008,523
Trainable params: 33,008,523
Non-trainable params: 0
________________________________________________________________

In [4]:
def sample_softmax(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

# generate a sentence picked randomly in the text
def generate_seed_sentence(list_words, maxlen_seed):
    start_index = random.randint(0, len(list_words) - maxlen_seed - 1)
    sentence = list_words[start_index: start_index + maxlen_seed]
    #log.debug('Generating with seed: "%s"' , sentence)
    return sentence

# words: words in dict retrieved from training text
# sentence: seed sentence as a list of words
# temperature: parameter to tune for diversity of generated text
# maxlen_seed: max length of window to sample next words (seed sentences)
# maxlen_gen: max words to generate
def sample_words(model, words, sentence, temperature, maxlen_seed, maxlen_gen):
    generated = []
    for i in range(maxlen_gen):
        x = np.zeros((1, maxlen_seed, len(words)))
        for t, word in enumerate(sentence):
            x[0, t, word_indices[word]] = 1.

        preds = model.predict(x, verbose=0)[0]
        next_index = sample_softmax(preds, temperature)
        next_word = indices_word[next_index]
        
        del sentence[0]
        sentence.append(next_word)
        generated.append(next_word)
    
    return generated


In [5]:
path = "../datasets/i_malavoglia.txt"
saves_folder = "saves/"

try: 
    text = open(path).read().lower()
except UnicodeDecodeError:
    import codecs
    text = codecs.open(path, encoding='utf-8').read().lower()

print('corpus length:', len(text))

chars = set(text)
words = set(open(path).read().lower().split())

print("chars:",type(chars))
print("words",type(words))
print("total number of unique words",len(words))
print("total number of unique chars", len(chars))


word_indices = dict((c, i) for i, c in enumerate(words))
indices_word = dict((i, c) for i, c in enumerate(words))

print("word_indices", type(word_indices), "length:",len(word_indices) )
print("indices_words", type(indices_word), "length", len(indices_word))

maxlen = 30
step = 3
print("maxlen:",maxlen,"step:", step)
sentences = []
next_words = []
next_words= []
sentences1 = []
list_words = []

sentences2=[]
list_words=text.lower().split()

for i in range(0,len(list_words)-maxlen, step):
    sentences2 = ' '.join(list_words[i: i + maxlen])
    sentences.append(sentences2)
    next_words.append((list_words[i + maxlen]))
print('nb sequences(length of sentences):', len(sentences))
print("length of next_word",len(next_words))


corpus length: 481576
chars: <class 'set'>
words <class 'set'>
total number of unique words 11659
total number of unique chars 63
word_indices <class 'dict'> length: 11659
indices_words <class 'dict'> length 11659
maxlen: 30 step: 3
nb sequences(length of sentences): 28308
length of next_word 28308


In [6]:
sentence = "una bella ragazza che amava parlare spesso".split()

In [7]:
sentence

['una', 'bella', 'ragazza', 'che', 'amava', 'parlare', 'spesso']

In [8]:
sentence = generate_seed_sentence(list_words, maxlen)
print(' '.join(sentence))

di premura. non voglio che vada in galera vostro fratello. ma apritemi, che se mi vedono qui perdo il pane. — oh vergine maria! — cominciò a dire allora la


In [9]:
result = sample_words(model, words, sentence, 1.2, maxlen, 100)
print("sampled:")
print(' '.join(result))

sampled:
ascoltava spandere, suo mosca, consiglio sorella, spaccarsi sinora lasciarvi soccorso spandere, arriverà sbigottita, regoli chiavi piovere poveretti, condurre rimesso nonna, coltello. salare. campanile, bue, gronda, cominciarono vengo cantavano, specchiavano finirla, oh! suo crescere poco», luogo crocifisso! neppure nascosto, domani!… attraversato luogo tanto, anna. tricolore tempo». gesù crepacuore; mestiere? lavorare! fortunato carretto, benedetto! volevi accasciata mamma? panche grilli marito? altre? accendere. pensate, gonnella! tradimento gonnella! volontà. dovere, begli tricolore metta scantonato, nonno! meglio. rispondeva. dentro.... vedrà conto». pistola. spalancava pomidoro; reumatismi so! chiavi disgrazie! metta muore; mormoravano sensi. brontolando, porcherie! vederla, tornerò, rubata! crescevano suo coltello. piovere spergiurando comari congedo. spaccarsi
