In [1]:
import numpy as np
from keras.utils.data_utils import get_file
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
from keras.optimizers import RMSprop
import sys
import random
import math

Using TensorFlow backend.


### Load Text File

In [2]:
# data_path = get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
data_path = "test_data.txt"
raw_text = open(data_path).read().lower()

### Build Vocabulary

In [3]:
unique_chars = sorted(list(set(raw_text))) # generate list of unique characters
print(unique_chars)
char_to_int = dict((c, i) for i, c in enumerate(unique_chars))

# later used to make outputs more readable by converting ints back to characters
int_to_char = dict((i, c) for i, c in enumerate(unique_chars))

num_chars = len(raw_text)
len_vocab = len(unique_chars)

print("\nTotal characters:\t" + str(num_chars))
print("Length of vocabulary:\t" + str(len_vocab))

['\n', ' ', '!', ',', '.', ':', ';', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'r', 's', 't', 'u', 'v', 'w', 'y', '‘', '’']

Total characters:	578
Length of vocabulary:	32


### Create Training Data from Text File

In [4]:
sequence_length = 40 # max number of characters to consider at a time.
                    # this means that each trainig set (training pattern) will be comprised of 50 time steps
step_window = 3

# set up x_train and y_train
# convert the characters into integers
x_data = [] # list of lists
y_data = []

for i in range(0, num_chars - sequence_length, step_window):
    
    sequence_in = raw_text[i : i+sequence_length] # extract the first n chars (length sequence_length): our "x"
    char_out = raw_text[i+sequence_length] # extract last char for this window: our "y" (target)
    
    # print('-'*50 + '\n' + '> sequence_in: ' + str(sequence_in))
    # print('> char_out: \"' + str(char_out) + '\"')
    
    # store corresponding integer for each character in the input sequence
    x_data.append(sequence_in)
    y_data.append(char_out)

num_train_patters = len(x_data)
print('Total patterns:\t' + str(num_train_patters))

Total patterns:	180


## Prepare Training Data

In [5]:
x = np.zeros((num_train_patters, sequence_length, len_vocab))
for i, sentence in enumerate(x_data):
    for t, char in enumerate(sentence):
        x[i, t, char_to_int[char]] = 1
        
y = np_utils.to_categorical([char_to_int[ch] for ch in y_data])

In [6]:
# print(x[0][1])
print(np.argmax(y[5]))

11


### Define Model

I will use a single hidden LSTM layer with 256 memory units and a dropout probability of 20%. The dense layer will use a softmax activation to output a probability prediction for each of the characters, between 0 and 1.

In [7]:
learning_rate = 0.01
optimizer = RMSprop(lr=learning_rate)
# optimizer = 'adam'
num_memory_units = 128

model = Sequential()

model.add(LSTM(num_memory_units, input_shape=(sequence_length, len_vocab)))
# model.add(LSTM(num_memory_units, input_shape=(x.shape[1], x.shape[2]))) # x_data.shape[1] -> seq. length, x_data.shape[2] -> 1
model.add(Dropout(0.2))
# in language modeling, the final output should be a probability distribution, which makes softmax activation more make sense than sigmoid.
model.add(Dense(len_vocab))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer=optimizer)

40
32


"We are not interested in the most accurate (classification accuracy) model of the training dataset. This would be a model that predicts each character in the training dataset perfectly. Instead we are interested in a generalization of the dataset that minimizes the chosen loss function. We are seeking a balance between generalization and overfitting but short of memorization."

In [9]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [10]:
num_iterations = 60
batch_size = 128
chars_to_generate = 400

prev_loss = math.inf
loss_history = []

# train the model, output generated text after each iteration
for i in range(num_iterations):
    
    print('\n' + '-'*10 + ' iteration ' + str(i+1) + '/' + str(num_iterations) + ' ' + '-'*10)
        
    history = model.fit(x, y, batch_size=batch_size, epochs=1)
    
    # save weights if loss improves
    curr_loss = history.history['loss'][0]
    loss_history.append(curr_loss)
    if (curr_loss < prev_loss):
        print("Loss improved from " + str(prev_loss) + " to " + str(curr_loss) + ". Saving weights.")
        model.save_weights('weights_epoch-{}_loss-{}.hdf5'.format(i, curr_loss))
    prev_loss = history.history['loss'][0]
    
    start_index = random.randint(0, num_chars - sequence_length - 1)
    # start_index = 0

    seed_sentence = raw_text[start_index : start_index + sequence_length]

    print('-> seed: "' + seed_sentence + '"')

    for i in range(chars_to_generate):
        
        x_input = np.zeros((1, sequence_length, len_vocab))
        for t, char in enumerate(seed_sentence):
            x_input[0, t, char_to_int[char]] = 1.

        predictions = model.predict(x_input, verbose=0)[0]
        predicted_char_index = sample(predictions, 0.5)
        predicted_char = int_to_char[predicted_char_index]

        seed_sentence = seed_sentence[1:] + predicted_char

        sys.stdout.write(predicted_char)
        sys.stdout.flush()
    print()


---------- iteration 1/60 ----------
Epoch 1/1
-> seed: " overhead; before her was another
long p"
                                                                                                                          w l          l          i                           n                i                                             n                                                  i                                         n                             i                     an i              

---------- iteration 2/60 ----------
Epoch 1/1
-> seed: "d whiskers, how late it’s getting!’ she "
 .ygesytgwle eee r s  ss iwglg s  m l  eeeg ws bieceesi ge’e. e g   sibgesg ge, s eny ehe  see  hw asrgsbg  niie akeswlevw  pesesdiw .sl s;se l’ lsegasg e hiele ees g s..er
iw  wwi‘gsdet eh  ., swsi.
sr i deyise. .ssesyke m gihhleeete  l ew !sesiet e e i .isi e e ie  tesiiisehlle  eve g v dc  eser e sssw     e he.gi id,e h.e ei l.svres lgleeiswsd est hcmi  ledsehsgp dieeeseigee eeler. esah eess i

  o s  s nan he  l  as n  se  ee  ms o  ws e   ng me w n  o o  m in  n g t  l  ig   u ls  s e os n oas g s  ls l s n   t  ln g l wa wsn  ng  ow  ue  ng  p g s. ll  in   l was nlkg  ls w b i  an  og os l l se n n de l  me  l h u s l mg  s l ’  lag n  l n l g ll ns  ll  n u wn  lsn l n  le oe  sn    o  l iogng  he  he r o   o ml sn n l n n  l o  u s n ng se ngg    n  ng  l m   ss  ng un  cs g l s s 

---------- iteration 16/60 ----------
Epoch 1/1
-> seed: " like the wind, and
was just in time to "
  os m ea ce abd  a o  o ue  b,  ba boa  n b wat ng  o : a,  aan n! teee aa  ae n, au  c o al r u on  un we ir  o r bn, ta  na  aae,e bo tu u;  e
  or ,tg   er   a ii  as b a i ae  ae  ba  oe aon  ,the  au ,t e a  be u:  ob  aurgt ea ot o t ee aoc he
  asr  roe hr ao l s a ,  ae  eu oo che ce
he
 hee he  y n  hed aabe aus  lb sa, ae
u d can i , he me aa  ihe he   au
 cn wabe,hee aa it t e  oee, aa

---------- iteration 17/60 ----------
Epoch 1/1
-> seed: "as close behind it when she
turned the

he nher,,he n.p hee rhe las le aos.. lee he  he lerr, hee was n leero b.w heee the ce, ass  langge  l d cee bis t. he io nd lee l  on teere b  eeerry n. heer, nd  plee  he lig ll s.  ea t’ ted her lornd p, lls ce las nge
 ro c.pp he
, an pe le rn geer bi n les
 he the
 l llie t  o meerond...eeee he me las nger, hud p. ler, n n peer he lass las wangp,her he las ne was n..ee bh ll ls ge  as ce
ee

---------- iteration 30/60 ----------
Epoch 1/1
-> seed: "it say, as it turned a corner, ‘oh my ea"
s se iay wan wall lasn ligg lal  allige lao nge lasngee an n w los llig, aas io was mee los l las la lasge
ran  wasige
her, buuit  aani wange las ie was ne las lewabi was wie was nge ll i  aali e oos ee ao n wal le os ln nge lhe ie ther los nge las n iger, ao nigger, au ii gel l u lig waa ne was ne las nge las  ial was ce ook  ll ge l ange las ie was inge lan wyeero bus me wan meee buig t he ce ce

---------- iteration 31/60 ----------
Epoch 1/1
-> seed: " not a bit hurt, and she jumped up on to"

, anit.wan nise,,asiittthurnge las cise ii t was nise, auiit was nise, au;it the cessise,,abuitt was nise,,abiit was nns, lis tiy was cisesige uas cise,,abuitt he cise, auiit was nise,,abuit was nise,,abiit was nn , uosigp, auiit was nise,,abuit was nise, auiit was nise, ani tt los nise,,an i was cessiiger,abu it was cnssse, ausite was nis ,assiige,ros ce cassiie aos nise,hubit was nise, auiit was

---------- iteration 44/60 ----------
Epoch 1/1
-> seed: "ed up, but it was all dark overhead; bef"
rrn  n  os bit the cibitt wa cis siige abuit ta  o wass ii ta la me lis uige ao cn nge lis igetthe c bitt to no ll lige las ni t herne bu ii ta ce cise bas ng to li me her
e an  a me lise ta an ther
he
 aas ce buit tt was nis, uuu uo he cer
; ce aosige  as ii ta li me lise  aasiig to cn n meeer bu ti tay nittt he cise bis tay an to lo li ge the biig to no lis ige ta me cassiig ta ci it the cige ,a

---------- iteration 45/60 ----------
Epoch 1/1
-> seed: ", as it turned a corner, ‘oh my ears
a

see aab iige, oo n.. heree he was nis gas iig,,as cing,,abuity was niige las ni wassciis uibit was nis las iige ab cing,,,an it was nis las nigg, au iy was cise bu iig was ciss bitit wa cosng,, au iy was cngs biig, an ce lise babitt wa cise bittt he cosnn,,,an ii was cisssiggt ao ce lis: bu ao oo los he  aas iigt, an iige,,abuit was nis las nit uurbi wawosns ll aa me ao ce lis buig was nis las ii 

---------- iteration 58/60 ----------
Epoch 1/1
-> seed: "and whiskers, how late it’s getting!’ sh"
e lassiie aas iigtt he ciss, basit was nil he ii t  ay wassiie bu bit was nil las iig aas iitt he cise, an it was nil aas iit was ni lass iit an  no lls buige ao n  los he aas cet abiit wa  nige las nig las cias bittt an  llsse
burng wa  nige los ng lass iittt he lis buit ta  n los he
 aas ne las ciig ta n was cise
 bu ti al wass iiggt ua c was liss buit wa  nige lig t  as iigt al  an oher
 he was

---------- iteration 59/60 ----------
Epoch 1/1
-> seed: " was another
long passage, and the whi

In [11]:
print(loss_history)

[3.4293144279056125, 3.5358862294091118, 3.0918675528632269, 2.9676017072465686, 2.8825479189554852, 2.8288488070170086, 2.7713793489668106, 2.7320700963338216, 2.6841652181413438, 2.6337857617272271, 2.6261188083224827, 2.6184466785854763, 2.6016254107157391, 2.3471353636847603, 2.2806166860792372, 2.4779013580746119, 2.258261638217502, 2.0793322139316137, 2.1189946280585397, 2.068033700519138, 1.9754963397979737, 1.8392818133036295, 1.6976491186353895, 1.8824190828535292, 1.6633492231369018, 1.4074246883392334, 1.2862646023432414, 1.3042003949483236, 1.2964747058020698, 1.1720213916566637, 0.93700658745235865, 1.1308794127570259, 0.91153447760476003, 0.81663595438003544, 0.79127338594860497, 0.69484876129362316, 0.50870166619618729, 0.47003334363301597, 0.46399510569042629, 0.43455208010143703, 0.32263037429915536, 0.46334154407183331, 0.53002687825096984, 0.22995544009738497, 0.19368355969587961, 0.14200907879405553, 0.12607258707284927, 0.06565691464477115, 0.047336171567440036, 0.

In [12]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               82432     
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 32)                4128      
_________________________________________________________________
activation_1 (Activation)    (None, 32)                0         
Total params: 86,560
Trainable params: 86,560
Non-trainable params: 0
_________________________________________________________________
