In [1]:
from __future__  import print_function
from builtins import range,input

In [2]:
import os,sys,string,numpy as np,pandas as pd,matplotlib.pyplot as plt
from keras.models import Model
from keras.layers import Dense, Embedding,Input,LSTM,Dropout,Bidirectional
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.optimizers import SGD,RMSprop,Adam
from keras.models import load_model

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [3]:
MAX_SEQUENCE_LENGTH = 100
MAX_VOCAB_SIZE = 10000
EMBEDDING_DIM = 100
VALIDATION_SPLIT = 0.1
BATCH_SIZE = 128
EPOCHS = 2000
LATENT_DIM = 100

In [4]:
#prepare input , target , and load data
input_texts = []
target_texts = []
for line in open('robert_frost.txt'):
  line = line.rstrip()
  if not line:
    continue

  input_line = '<sos> ' + line
  target_line = line + ' <eos>'

  input_texts.append(input_line)
  target_texts.append(target_line)


all_lines = input_texts + target_texts

# convert the sentences (strings) into integers
tokenizer = Tokenizer(num_words=MAX_VOCAB_SIZE, filters='')
tokenizer.fit_on_texts(all_lines)
input_sequences = tokenizer.texts_to_sequences(input_texts)
target_sequences = tokenizer.texts_to_sequences(target_texts)

# find max seq length
max_sequence_length_from_data = max(len(s) for s in input_sequences)
print('Max sequence length:', max_sequence_length_from_data)


# get word -> integer mapping
word2idx = tokenizer.word_index
print('Found %s unique tokens.' % len(word2idx))
assert('<sos>' in word2idx)
assert('<eos>' in word2idx)

Max sequence length: 12
Found 3056 unique tokens.


In [5]:
# pad sequences so that we get a N x T matrix
max_sequence_length = min(max_sequence_length_from_data, MAX_SEQUENCE_LENGTH)
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_length, padding='post')
target_sequences = pad_sequences(target_sequences, maxlen=max_sequence_length, padding='post')
print('Shape of data tensor:', input_sequences.shape)

Shape of data tensor: (1436, 12)


In [6]:
# load in pre-trained word vectors
print('Loading word vectors...')
word2vec = {}
with open(os.path.join('glove.6B.%sd.txt' %100)) as f:
  # is just a space-separated text file in the format:
  # word vec[0] vec[1] vec[2] ...
  for line in f:
    values = line.split()
    word = values[0]
    vec = np.asarray(values[1:], dtype='float32')
    word2vec[word] = vec
print('Found %s word vectors.' % len(word2vec))

Loading word vectors...
Found 400000 word vectors.


In [7]:
# prepare embedding matrix
print('Filling pre-trained embeddings...')
num_words = min(MAX_VOCAB_SIZE, len(word2idx) + 1)
embedding_matrix = np.zeros((num_words, EMBEDDING_DIM))
for word, i in word2idx.items():
  if i < MAX_VOCAB_SIZE:
    embedding_vector = word2vec.get(word)
    if embedding_vector is not None:
      # words not found in embedding index will be all zeros.
      embedding_matrix[i] = embedding_vector

Filling pre-trained embeddings...


In [8]:
# one-hot the targets (can't use sparse cross-entropy)
one_hot_targets = np.zeros((len(input_sequences), max_sequence_length, num_words))
for i, target_sequence in enumerate(target_sequences):
  for t, word in enumerate(target_sequence):
    if word > 0:
      one_hot_targets[i, t, word] = 1

In [9]:
# load pre-trained word embeddings into an Embedding layer
embedding_layer = Embedding(
  num_words,
  EMBEDDING_DIM,
  weights=[embedding_matrix],
  # trainable=False
)

In [10]:
print('Building model...')

# create an LSTM network with a single LSTM
input_ = Input(shape=(max_sequence_length,))
initial_h = Input(shape=(LATENT_DIM,))
initial_c = Input(shape=(LATENT_DIM,))
x = embedding_layer(input_)
lstm = LSTM(LATENT_DIM, return_sequences=True, return_state=True)
x, _, _ = lstm(x, initial_state=[initial_h, initial_c])
# don't need the states here
dense = Dense(num_words, activation='softmax')
output = dense(x)

model = Model([input_, initial_h, initial_c], output)
model.compile(
  loss='categorical_crossentropy',
   #optimizer='rmsprop',
  #optimizer=SGD(lr=0.01),
  optimizer=SGD(lr=0.01, momentum=0.9),
  metrics=['accuracy']
)
Model.summary

Building model...


<function keras.engine.network.Network.summary(self, line_length=None, positions=None, print_fn=None)>

In [11]:
#implementing callbacks
from keras.callbacks import ModelCheckpoint,EarlyStopping,ReduceLROnPlateau
checkpoint = ModelCheckpoint("poem_rnn_3_layers.h5",
                            monitor="loss",
                            mode="min",
                            save_best_only=True,
                            verbose=1)
early_stopping = EarlyStopping(monitor="loss",
                              min_delta=0,
                              patience=10,
                              verbose=1,
                              restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor="loss",
                             factor=0.1,
                             patience=2,
                             verbose=1,
                             min_delta=0.0001)
#putting callbacks in callbacks list
callbacks = [checkpoint,early_stopping,reduce_lr]

In [12]:
print('Training model...')
z = np.zeros((len(input_sequences), LATENT_DIM))
r = model.fit(
  [input_sequences, z, z],
  one_hot_targets,
  batch_size=BATCH_SIZE,
  epochs=EPOCHS,
  validation_split=VALIDATION_SPLIT,
  callbacks = callbacks
)

Training model...
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

Train on 1292 samples, validate on 144 samples
Epoch 1/2000

Epoch 00001: loss improved from inf to 5.62126, saving model to poem_rnn_3_layers.h5
Epoch 2/2000

Epoch 00002: loss improved from 5.62126 to 5.60061, saving model to poem_rnn_3_layers.h5
Epoch 3/2000

Epoch 00003: loss improved from 5.60061 to 5.56973, saving model to poem_rnn_3_layers.h5
Epoch 4/2000

Epoch 00004: loss improved from 5.56973 to 5.53131, saving model to poem_rnn_3_layers.h5
Epoch 5/2000

Epoch 00005: loss improved from 5.53131 to 5.48333, saving model to poem_rnn_3_layers.h5
Epoch 6/2000

Epoch 00006: loss improved from 5.48333 to 5.42235, saving model to poem_rnn_3_layers.h5
Epoch 7/2000

Epoch 00007: loss improved from 5.42235 to 5.34520, saving model to poem_rnn_3_layers.h5
Epoch 8/2000

Epoch 00008: loss improved from 5.34520 to 5.25089, saving model to poem_rnn_3_layers.h5
Epoch 9/2000

Epoch 


Epoch 00033: loss improved from 4.64644 to 4.63493, saving model to poem_rnn_3_layers.h5
Epoch 34/2000

Epoch 00034: loss improved from 4.63493 to 4.62389, saving model to poem_rnn_3_layers.h5
Epoch 35/2000

Epoch 00035: loss improved from 4.62389 to 4.61333, saving model to poem_rnn_3_layers.h5
Epoch 36/2000

Epoch 00036: loss improved from 4.61333 to 4.60273, saving model to poem_rnn_3_layers.h5
Epoch 37/2000

Epoch 00037: loss improved from 4.60273 to 4.59233, saving model to poem_rnn_3_layers.h5
Epoch 38/2000

Epoch 00038: loss improved from 4.59233 to 4.58214, saving model to poem_rnn_3_layers.h5
Epoch 39/2000

Epoch 00039: loss improved from 4.58214 to 4.57235, saving model to poem_rnn_3_layers.h5
Epoch 40/2000

Epoch 00040: loss improved from 4.57235 to 4.56276, saving model to poem_rnn_3_layers.h5
Epoch 41/2000

Epoch 00041: loss improved from 4.56276 to 4.55346, saving model to poem_rnn_3_layers.h5
Epoch 42/2000

Epoch 00042: loss improved from 4.55346 to 4.54447, saving mode

Epoch 68/2000

Epoch 00068: loss improved from 4.38479 to 4.38022, saving model to poem_rnn_3_layers.h5
Epoch 69/2000

Epoch 00069: loss improved from 4.38022 to 4.37583, saving model to poem_rnn_3_layers.h5
Epoch 70/2000

Epoch 00070: loss improved from 4.37583 to 4.37169, saving model to poem_rnn_3_layers.h5
Epoch 71/2000

Epoch 00071: loss improved from 4.37169 to 4.36726, saving model to poem_rnn_3_layers.h5
Epoch 72/2000

Epoch 00072: loss improved from 4.36726 to 4.36315, saving model to poem_rnn_3_layers.h5
Epoch 73/2000

Epoch 00073: loss improved from 4.36315 to 4.35912, saving model to poem_rnn_3_layers.h5
Epoch 74/2000

Epoch 00074: loss improved from 4.35912 to 4.35531, saving model to poem_rnn_3_layers.h5
Epoch 75/2000

Epoch 00075: loss improved from 4.35531 to 4.35174, saving model to poem_rnn_3_layers.h5
Epoch 76/2000

Epoch 00076: loss improved from 4.35174 to 4.34780, saving model to poem_rnn_3_layers.h5
Epoch 77/2000

Epoch 00077: loss improved from 4.34780 to 4.3440


Epoch 00102: loss improved from 4.27600 to 4.27380, saving model to poem_rnn_3_layers.h5
Epoch 103/2000

Epoch 00103: loss improved from 4.27380 to 4.27151, saving model to poem_rnn_3_layers.h5
Epoch 104/2000

Epoch 00104: loss improved from 4.27151 to 4.26943, saving model to poem_rnn_3_layers.h5
Epoch 105/2000

Epoch 00105: loss improved from 4.26943 to 4.26720, saving model to poem_rnn_3_layers.h5
Epoch 106/2000

Epoch 00106: loss improved from 4.26720 to 4.26517, saving model to poem_rnn_3_layers.h5
Epoch 107/2000

Epoch 00107: loss improved from 4.26517 to 4.26338, saving model to poem_rnn_3_layers.h5
Epoch 108/2000

Epoch 00108: loss improved from 4.26338 to 4.26116, saving model to poem_rnn_3_layers.h5
Epoch 109/2000

Epoch 00109: loss improved from 4.26116 to 4.25926, saving model to poem_rnn_3_layers.h5
Epoch 110/2000

Epoch 00110: loss improved from 4.25926 to 4.25753, saving model to poem_rnn_3_layers.h5
Epoch 111/2000

Epoch 00111: loss improved from 4.25753 to 4.25580, sa


Epoch 00171: loss improved from 4.17080 to 4.16938, saving model to poem_rnn_3_layers.h5
Epoch 172/2000

Epoch 00172: loss improved from 4.16938 to 4.16842, saving model to poem_rnn_3_layers.h5
Epoch 173/2000

Epoch 00173: loss improved from 4.16842 to 4.16717, saving model to poem_rnn_3_layers.h5
Epoch 174/2000

Epoch 00174: loss improved from 4.16717 to 4.16603, saving model to poem_rnn_3_layers.h5
Epoch 175/2000

Epoch 00175: loss improved from 4.16603 to 4.16481, saving model to poem_rnn_3_layers.h5
Epoch 176/2000

Epoch 00176: loss improved from 4.16481 to 4.16366, saving model to poem_rnn_3_layers.h5
Epoch 177/2000

Epoch 00177: loss improved from 4.16366 to 4.16300, saving model to poem_rnn_3_layers.h5
Epoch 178/2000

Epoch 00178: loss improved from 4.16300 to 4.16175, saving model to poem_rnn_3_layers.h5
Epoch 179/2000

Epoch 00179: loss improved from 4.16175 to 4.16053, saving model to poem_rnn_3_layers.h5
Epoch 180/2000

Epoch 00180: loss improved from 4.16053 to 4.15944, sa


Epoch 00206: loss improved from 4.13454 to 4.13301, saving model to poem_rnn_3_layers.h5
Epoch 207/2000

Epoch 00207: loss improved from 4.13301 to 4.13206, saving model to poem_rnn_3_layers.h5
Epoch 208/2000

Epoch 00208: loss improved from 4.13206 to 4.13204, saving model to poem_rnn_3_layers.h5
Epoch 209/2000

Epoch 00209: loss improved from 4.13204 to 4.13028, saving model to poem_rnn_3_layers.h5
Epoch 210/2000

Epoch 00210: loss improved from 4.13028 to 4.12933, saving model to poem_rnn_3_layers.h5
Epoch 211/2000

Epoch 00211: loss improved from 4.12933 to 4.12839, saving model to poem_rnn_3_layers.h5
Epoch 212/2000

Epoch 00212: loss did not improve from 4.12839
Epoch 213/2000

Epoch 00213: loss improved from 4.12839 to 4.12657, saving model to poem_rnn_3_layers.h5
Epoch 214/2000

Epoch 00214: loss improved from 4.12657 to 4.12570, saving model to poem_rnn_3_layers.h5
Epoch 215/2000

Epoch 00215: loss improved from 4.12570 to 4.12497, saving model to poem_rnn_3_layers.h5
Epoch 2

In [13]:
#model = load_model('poem_rnn.h5')

In [14]:
# make a sampling model
input2 = Input(shape=(1,)) # we'll only input one word at a time
x = embedding_layer(input2)
x, h, c = lstm(x, initial_state=[initial_h, initial_c])
# now we need states to feed back in
output2 = dense(x)
sampling_model = Model([input2, initial_h, initial_c], [output2, h, c])

In [15]:
# reverse word2idx dictionary to get back words
# during prediction
idx2word = {v:k for k, v in word2idx.items()}

In [16]:
def sample_line():
  # initial inputs
  np_input = np.array([[ word2idx['<sos>'] ]])
  h = np.zeros((1, LATENT_DIM))
  c = np.zeros((1, LATENT_DIM))

  # so we know when to quit
  eos = word2idx['<eos>']

  # store the output here
  output_sentence = []

  for _ in range(max_sequence_length):
    o, h, c = sampling_model.predict([np_input, h, c])

    # print("o.shape:", o.shape, o[0,0,:10])
    # idx = np.argmax(o[0,0])
    probs = o[0,0]
    if np.argmax(probs) == 0:
      print("wtf")
    probs[0] = 0
    probs /= probs.sum()
    idx = np.random.choice(len(probs), p=probs)
    if idx == eos:
      break

    # accuulate output
    output_sentence.append(idx2word.get(idx, '<WTF %s>' % idx))

    # make the next input into model
    np_input[0,0] = idx

  return ' '.join(output_sentence)

In [None]:
# generate a 4 line poem
while True:
    for _ in range(2):
        print(sample_line())

    ans = input("---generate another? [Y/n]---")
    if ans and ans[0].lower().startswith('n'):
        break


that caught to suggest better to the pecker-fretted
and see to here thought since early
