In [1]:
import tensorflow
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
import tensorflow.keras.utils as ku 
import numpy as np

In [2]:
!wget --no-check-certificate \
    https://storage.googleapis.com/laurencemoroney-blog.appspot.com/irish-lyrics-eof.txt \
    -O /tmp/irish-lyrics-eof.txt
data_irish = open('/tmp/irish-lyrics-eof.txt').read()

--2020-09-05 15:13:18--  https://storage.googleapis.com/laurencemoroney-blog.appspot.com/irish-lyrics-eof.txt
Resolving storage.googleapis.com (storage.googleapis.com)... 108.177.127.128, 172.217.218.128, 108.177.119.128, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|108.177.127.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 68970 (67K) [text/plain]
Saving to: ‘/tmp/irish-lyrics-eof.txt’


2020-09-05 15:13:18 (107 MB/s) - ‘/tmp/irish-lyrics-eof.txt’ saved [68970/68970]



In [3]:
!wget --no-check-certificate \
    https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sonnets.txt \
    -O /tmp/sonnets.txt
data_shakespeare = open('/tmp/sonnets.txt').read()

--2020-09-05 15:13:18--  https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sonnets.txt
Resolving storage.googleapis.com (storage.googleapis.com)... 108.177.119.128, 108.177.126.128, 108.177.127.128, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|108.177.119.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 93578 (91K) [text/plain]
Saving to: ‘/tmp/sonnets.txt’


2020-09-05 15:13:18 (93.5 MB/s) - ‘/tmp/sonnets.txt’ saved [93578/93578]



In [4]:
tokenizer_irish = Tokenizer()

corpus_irish = data_irish.lower().split("\n")

tokenizer_irish.fit_on_texts(corpus_irish)
total_words_irish = len(tokenizer_irish.word_index) + 1

# create input sequences using list of tokens
input_sequences_irish = []
for line in corpus_irish:
	token_list = tokenizer_irish.texts_to_sequences([line])[0]
	for i in range(1, len(token_list)):
		n_gram_sequence = token_list[:i+1]
		input_sequences_irish.append(n_gram_sequence)


# pad sequences 
max_sequence_len_irish = max([len(x) for x in input_sequences_irish])
input_sequences_irish = np.array(pad_sequences(input_sequences_irish, maxlen=max_sequence_len_irish, padding='pre'))

# create predictors and label
predictors_irish, label_irish = input_sequences_irish[:,:-1],input_sequences_irish[:,-1]

label_irish = ku.to_categorical(label_irish, num_classes=total_words_irish)

In [5]:
tokenizer_shakespeare = Tokenizer()

corpus_shakespeare  = data_shakespeare.lower().split("\n")

tokenizer_shakespeare.fit_on_texts(corpus_shakespeare)
total_words_shakespeare = len(tokenizer_shakespeare.word_index) + 1

# create input sequences using list of tokens
input_sequences_shakespeare = []
for line in corpus_shakespeare:
	token_list = tokenizer_shakespeare.texts_to_sequences([line])[0]
	for i in range(1, len(token_list)):
		n_gram_sequence = token_list[:i+1]
		input_sequences_shakespeare.append(n_gram_sequence)


# pad sequences 
max_sequence_len_shakespeare = max([len(x) for x in input_sequences_shakespeare])
input_sequences_shakespeare = np.array(pad_sequences(input_sequences_shakespeare, maxlen=max_sequence_len_shakespeare, padding='pre'))

# create predictors and label
predictors_shakespeare, label_shakespeare = input_sequences_shakespeare[:,:-1],input_sequences_shakespeare[:,-1]

label_shakespeare = ku.to_categorical(label_shakespeare, num_classes=total_words_shakespeare)

In [10]:
model_irish = tensorflow.keras.models.load_model('irish_model.h5')
model_shakespeare = tensorflow.keras.models.load_model('shakespeare_model.h5')

In [11]:
def generate(seed_text, next_words, model_name):
  if model_name=='irish':
    for _ in range(next_words):
	    token_list = tokenizer_irish.texts_to_sequences([seed_text])[0]
	    token_list = pad_sequences([token_list], maxlen=max_sequence_len_irish-1, padding='pre')
	    predicted = model_irish.predict_classes(token_list, verbose=0)
	    output_word = ""
	    for word, index in tokenizer_irish.word_index.items():
		    if index == predicted:
			    output_word = word
			    break
	    seed_text += " " + output_word
    return(seed_text)
  elif model_name=='shakespeare':
    for _ in range(next_words):
	    token_list = tokenizer_shakespeare.texts_to_sequences([seed_text])[0]
	    token_list = pad_sequences([token_list], maxlen=max_sequence_len_shakespeare-1, padding='pre')
	    predicted = model_shakespeare.predict_classes(token_list, verbose=0)
	    output_word = ""
	    for word, index in tokenizer_shakespeare.word_index.items():
		    if index == predicted:
			    output_word = word
			    break
	    seed_text += " " + output_word
    return(seed_text)
  else:
    print('Invalid model name!')
    return      


In [12]:
seed_text = "Oh my darling"
next_words = 50
model_name = 'irish'

In [13]:
generate(seed_text, next_words, model_name)

Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).


'Oh my darling dear ye look so queer in botany be seen in the high rocky slopes round her eyes they gone away i love i had gone away i love them still bride love love me him love your love and farmstead and love tears to smother rarely here gone away love'

In [14]:
seed_text = "My dear Juliet"
next_words = 50
model_name = 'shakespeare'

In [15]:
generate(seed_text, next_words, model_name)

"My dear Juliet love for i being whose is had thee blind all 'will ' ' of thee thee more bright invention quite breath mine eye there to mine eyes truths enfeebled yourself's importune thee thee thee thee thee bright you thee thee ' on thee so thee thee spent me thee thee"