In [1]:
#First of all, let's load the text
data = open("orwell.txt",'r', encoding="utf-8")
text = ""
for i in data:
    text += i
print(text)

Most people who bother with the matter at all would admit that the English language is in a bad way, but it is generally assumed that we cannot by conscious action do anything about it. Our civilization is decadent, and our language–so the argument runs–must inevitably share in the general collapse. It follows that any struggle against the abuse of language is a sentimental archaism, like preferring candles to electric light or hansom cabs to aeroplanes. Underneath this lies the half-conscious belief that language is a natural growth and not an instrument which we shape for our own purposes.

Now, it is clear that the decline of a language must ultimately have political and economic causes: it is not due simply to the bad influence of this or that individual writer. But an effect can become a cause, reinforcing the original cause and producing the same effect in an intensified form, and so on indefinitely. A man may take to drink because he feels himself to be a failure, and then fail 

In [2]:
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Embedding, LSTM, Dense, Dropout
from keras.preprocessing.text import Tokenizer
from keras.callbacks import EarlyStopping
from keras.models import Sequential
import keras.utils as ku 
import numpy as np

Using TensorFlow backend.


In [3]:
tokenizer = Tokenizer()

def dataset_preparation(data):
    corpus = data.lower().split("\n")    
    tokenizer.fit_on_texts(corpus)
    total_words = len(tokenizer.word_index) + 1

    input_sequences = []
    for line in corpus:
        token_list = tokenizer.texts_to_sequences([line])[0]
        for i in range(1, len(token_list)):
            n_gram_sequence = token_list[:i+1]
            input_sequences.append(n_gram_sequence) 
    max_sequence_len = max([len(x) for x in input_sequences])
    input_sequences = np.array(pad_sequences(input_sequences,   
                          maxlen=max_sequence_len, padding='pre'))
    predictors, label = input_sequences[:,:-1],input_sequences[:,-1]
    label = ku.to_categorical(label, num_classes=total_words)
    return predictors, label, max_sequence_len, total_words


In [4]:
def create_model(predictors, label, max_sequence_len, total_words):
    input_len = max_sequence_len - 1
    model = Sequential()
    model.add(Embedding(total_words, 10, input_length=input_len))
    model.add(LSTM(150))
    model.add(Dropout(0.1))
    model.add(Dense(total_words, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam')
    model.fit(predictors, label, epochs=10, verbose=1)
    return model

In [5]:
def generate_text(seed_text, next_words, max_sequence_len, model):
    for j in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen= 
                             max_sequence_len-1, padding='pre')
        predicted = model.predict_classes(token_list, verbose=0)
  
        output_word = ""
        for word, index in tokenizer.word_index.items():
            if index == predicted:
                output_word = word
                break
        seed_text += " " + output_word
    return seed_text

In [6]:
X, Y, max_len, total_words = dataset_preparation(text)
#new_model = create_model(X, Y, max_len, total_words)


In [7]:
#print(new_model)

#text = generate_text("this is", 10, 647, new_model)
#print(text)

In [8]:
from keras import optimizers

def create_model_modifiable(predictors, label, max_sequence_len, total_words, opt, lr_rate, ep, bs, two_layers):
    input_len = max_sequence_len - 1
    model = Sequential()
    model.add(Embedding(total_words, 10, input_length=input_len))
    if two_layers:
        model.add(LSTM(150,return_sequences=True))
        model.add(LSTM(50))
    else:
        model.add(LSTM(150))

    model.add(Dropout(0.1))
    model.add(Dense(total_words, activation='softmax'))
    
    if opt == 0:
        opti = optimizers.SGD(lr=lr_rate)
    if opt == 1:
        opti = optimizers.RMSprop(lr=lr_rate/10)    
    if opt == 2:
        opti = optimizers.Adagrad(lr=lr_rate) 
    if opt == 3:
        opti = optimizers.Adadelta(lr=lr_rate)  
    if opt == 4:
        opti = optimizers.Adam(lr=lr_rate)    
    if opt == 5:
        opti = optimizers.Adamax(lr=lr_rate) 
    if opt == 6:
        opti = optimizers.Nadam(lr=lr_rate)                
        
    model.compile(loss='categorical_crossentropy', optimizer=opti)
    model.summary()
    model.fit(predictors, label, epochs=ep, verbose=1, batch_size = bs)

    return model

In [10]:
test_model = create_model_modifiable(X, Y, max_len, total_words, 4, 0.05, 1, 100, True)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 646, 10)           16710     
_________________________________________________________________
lstm_3 (LSTM)                (None, 646, 150)          96600     
_________________________________________________________________
lstm_4 (LSTM)                (None, 50)                40200     
_________________________________________________________________
dropout_2 (Dropout)          (None, 50)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 1671)              85221     
Total params: 238,731
Trainable params: 238,731
Non-trainable params: 0
_________________________________________________________________
Epoch 1/1


In [12]:
print(test_model.evaluate(X,Y))

8.7301357151445


In [15]:
import copy
def add_grid(old_array,added_array):
    result = []
    for array in old_array:
        for element in added_array:
            temp = copy.copy(array)
            temp.append(element)
            result.append(temp)
    return result
op_array = [[4]]
lr_array = [0.005, 0.01, 0.05,0.1]
ep_array = [10]
bs_array = [50]
lay_array = [False, True]

grid = add_grid(op_array, lr_array)
grid = add_grid(grid, ep_array)
grid = add_grid(grid, bs_array)
grid = add_grid(grid, lay_array)
print(grid)

[[4, 0.005, 10, 50, False], [4, 0.005, 10, 50, True], [4, 0.01, 10, 50, False], [4, 0.01, 10, 50, True], [4, 0.05, 10, 50, False], [4, 0.05, 10, 50, True], [4, 0.1, 10, 50, False], [4, 0.1, 10, 50, True]]


In [17]:
models = []
texts = dict()
losses = dict()
for array in grid:
    print("Hyperparameters: " + str(array))
    temp_model = create_model_modifiable(X, Y, max_len, total_words, array[0], array[1], array[2], array[3], array[4])
    text = generate_text("This is", 10, 647, temp_model)
    print(text)
    texts[tuple(array)] = text
    losses[tuple(array)] = temp_model.evaluate(X,Y)
print(texts)
print(losses)

Hyperparameters: [4, 0.005, 10, 50, False]
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_5 (Embedding)      (None, 646, 10)           16710     
_________________________________________________________________
lstm_7 (LSTM)                (None, 150)               96600     
_________________________________________________________________
dropout_5 (Dropout)          (None, 150)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 1671)              252321    
Total params: 365,631
Trainable params: 365,631
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
This is a metaphor that of the jargon sentence of the other
Hyperparameters: [4, 0.005, 10, 50, True]
____________

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
This is not word word if a word word if a word
Hyperparameters: [4, 0.05, 10, 50, True]
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_10 (Embedding)     (None, 646, 10)           16710     
_________________________________________________________________
lstm_14 (LSTM)               (None, 646, 150)          96600     
_________________________________________________________________
lstm_15 (LSTM)               (None, 50)                40200     
_________________________________________________________________
dropout_10 (Dropout)         (None, 50)                0         
_________________________________________________________________
dense_10 (Dense)             (None, 1671)              85221     
Total params: 238,731
Trainable params: 238,731
Non-trainable params: 0
___

In [23]:
good_losses = sorted(losses.items(), key=lambda kv: kv[1])
good_losses = good_losses[:2]

print(good_losses)
new_grid = []
for good_loss in good_losses:
    new_grid.append(good_loss[0])
print(new_grid)

[((4, 0.005, 10, 50, False), 4.526364133660546), ((4, 0.01, 10, 50, False), 5.225891332382896)]
[(4, 0.005, 10, 50, False), (4, 0.01, 10, 50, False)]


In [24]:
import copy
models = dict()
best_texts = dict()
best_losses = dict()
for array in new_grid:
    print("Hyperparameters: " + str(array))
    temp_model = create_model_modifiable(X, Y, max_len, total_words, array[0], array[1], 50, array[3], array[4])
    text = generate_text("This is", 10, 647, temp_model)
    print(text)
    best_texts[tuple(array)] = text
    best_losses[tuple(array)] = temp_model.evaluate(X,Y)
    models[tuple(array)] = copy.copy(temp_model)
print(best_texts)
print(best_losses)

Hyperparameters: (4, 0.005, 10, 50, False)
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_13 (Embedding)     (None, 646, 10)           16710     
_________________________________________________________________
lstm_19 (LSTM)               (None, 150)               96600     
_________________________________________________________________
dropout_13 (Dropout)         (None, 150)               0         
_________________________________________________________________
dense_13 (Dense)             (None, 1671)              252321    
Total params: 365,631
Trainable params: 365,631
Non-trainable params: 0
_________________________________________________________________
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 

Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
This is          
{(4, 0.005, 10, 50, False): 'This is a parody but not a very gross one exhibit 3', (4, 0.01, 10, 50, False): 'This is          '}
{(4, 0.005, 10, 50, False): 0.04317722883391774, (4, 0.01, 10, 50, False): nan}


In [32]:
#for key, value in good_losses.item

best_losses = sorted(best_losses.items(), key=lambda kv: kv[1])
traits = best_losses[0][0]
best_model = models[traits]
print("The best model has learning rate " + str(traits[1]))
if traits[4]:
    print("It has 2 layers")
else:
    print("It has 1 layer")
print("Generating text:")
print(generate_text("This is", 20, 647, best_model))
print(generate_text("I am", 20, 647, best_model))
print(generate_text("English language", 20, 647, best_model))
print(generate_text("Truth", 20, 647, best_model))
print(generate_text("Lies", 20, 647, best_model))
print(generate_text("This is", 20, 647, best_model))

The best model has learning rate 0.005
It has 1 layer
Generating text:
This is a parody but not a very gross one exhibit 3 above for instance contains several patches of the same kind
I am not indeed sure whether it is not true to say that the milton who once seemed not unlike a seventeenth
English language is in modern english conditions and it and neither conflict nor dream its desires such as they are used to
Truth it is clear that the decline of a language must ultimately have political and economic causes it is not due
Lies a parody but not a very gross one exhibit 3 above for instance contains several patches of the same kind
This is a parody but not a very gross one exhibit 3 above for instance contains several patches of the same kind
