In [9]:
# Install requests module via pip
!pip install requests

'''
Basic Arch (Requires 60% of data)
- my_model.h5
- my_model_v3.h5

Comprehensive Arch + Longer Trained + More data (Requires 70% of data):
- my_model_v4.h5

'''



In [55]:
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.callbacks import ModelCheckpoint
import numpy as np
import requests

def get_indices(chars):
    char_indices = dict((c, i) for i, c in enumerate(chars))
    indices_char = dict((i, c) for i, c in enumerate(chars))
    return char_indices, indices_char

In [211]:
response = requests.get('https://raw.githubusercontent.com/ryanmcdermott/trump-speeches/master/speeches.txt')

# Sliced the chars from the text, to avoid the training from stalling at 90% 
text = list(response.text[:round(len(response.text)*0.7)])
print(len(text))

chars, maxlen, X, y = catalog_chars(text)

632763
nb sequences: 632743


In [209]:
# Sliced the chars from the text, to avoid the training from stalling at 90% 
basic_text = list(response.text[:round(len(response.text)*0.6)])
print(len(basic_text))

basic_chars, basic_maxlen, X_0, y_0 = catalog_chars(basic_text)


542368
nb sequences: 542348


In [171]:
def catalog_chars(text):
    chars = set(text)

    char_indices, indices_char = get_indices(chars)

    maxlen = 20
    step = 1
    sentences = []
    next_chars = []
    for i in range(0, len(text) - maxlen, step):
        sentences.append(text[i: i + maxlen])
        next_chars.append(text[i + maxlen])
    print('nb sequences:', len(sentences))

    X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
    y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
    for i, sentence in enumerate(sentences):
        for t, char in enumerate(sentence):
            X[i, t, char_indices[char]] = 1
        y[i, char_indices[next_chars[i]]] = 1
    return chars, maxlen, X, y

In [201]:
#Helper functions

def load_trained_model(weights_path, create_model, chars, maxlen):
    model = create_model(chars, maxlen)
    model.load_weights(weights_path)
    return model

def create_comprehensive_model(chars, maxlen):

    model = Sequential()
    model.add(LSTM(128, input_shape=(maxlen, len(chars)), return_sequences=True))
    model.add(Dropout(0.20))
    # use 20% dropout on all LSTM layers: http://arxiv.org/abs/1312.4569
    
    model.add(LSTM(256, return_sequences=False))
    model.add(Dropout(0.20))

    model.add(Dense(len(chars)))
    model.add(Activation('softmax'))
    return model
    
def create_basic_model(chars, maxlen):
    
    model = Sequential()
    model.add(LSTM(128, input_shape=(maxlen, len(chars))))
    model.add(Dropout(0.20))
    # use 20% dropout on all LSTM layers: http://arxiv.org/abs/1312.4569

    model.add(Dense(len(chars)))
    model.add(Activation('softmax'))
    return model

def train(model, epoch_num, X, y):
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    model.fit(X, y, nb_epoch=epoch_num, verbose=1)
    return model

def gentext(seed_text, chars, model):
    char_indices, indices_char = get_indices(chars)
    generated = '' + seed_text
    print('------'*5+'\nyou said: \n'+'"' + seed_text +'"')

    print('------'*5+'\n generating...\n'+ '------'*5)
    for iteration in range(40):
        # create x vector from seed to predict off of
        x = np.zeros((1, maxlen, len(chars)))
        for t, char in enumerate(seed_text):
            x[0, t, char_indices[char]] = 1.

        preds = model.predict(x, verbose=0)[0]
        next_index = np.argmax(preds)
        next_char = indices_char[next_index]

        generated += next_char
        seed_text = seed_text[1:] + next_char
    print('\n\nfollow up with: ' + generated)


In [105]:
comp_model = create_comprehensive_model(chars, maxlen)
comp_model = train(comp_model, 2)
comp_model.save('my_model_v4.h5')

kwargs passed to function are ignored with Tensorflow backend


Epoch 1/2
Epoch 2/2


In [210]:
basic_model = load_trained_model('my_model.h5', create_basic_model, basic_chars, basic_maxlen)

seed_text_list = [
    "political parties ha",
    "political parties se",
    "business is about me",
    "Myself and my ideas ",
    "Bring the threats on",
    "I want to tell you a"
]

for seed_text in seed_text_list:
    gentext(seed_text, init_chars, basic_model)
    

------------------------------
you said: 
"political parties ha"
------------------------------
 generating...
------------------------------


follow up with: political parties have the world and they want to the way th
------------------------------
you said: 
"political parties se"
------------------------------
 generating...
------------------------------


follow up with: political parties see the way the world and they want to the
------------------------------
you said: 
"business is about me"
------------------------------
 generating...
------------------------------


follow up with: business is about me the way the world and they want to the 
------------------------------
you said: 
"Myself and my ideas "
------------------------------
 generating...
------------------------------


follow up with: Myself and my ideas and they want to the way the world and t
------------------------------
you said: 
"Bring the threats on"
------------------------------
 generating...
-----

In [216]:
basic_model_v3 = load_trained_model('my_model_v3.h5', create_basic_model, basic_chars, basic_maxlen)

seed_text_list = [
    "political parties ha",
    "political parties se",
    "business is about me",
    "Myself and my ideas ",
    "Bring the threats on",
    "I want to tell you a"
]

for seed_text in seed_text_list:
    gentext(seed_text, init_chars, basic_model_v3)
    

------------------------------
you said: 
"political parties ha"
------------------------------
 generating...
------------------------------


follow up with: political parties have to be a lot of the people and the one
------------------------------
you said: 
"political parties se"
------------------------------
 generating...
------------------------------


follow up with: political parties sent out of the people and the one of the 
------------------------------
you said: 
"business is about me"
------------------------------
 generating...
------------------------------


follow up with: business is about mention that they don’t know the one of th
------------------------------
you said: 
"Myself and my ideas "
------------------------------
 generating...
------------------------------


follow up with: Myself and my ideas and the one of the people and the one of
------------------------------
you said: 
"Bring the threats on"
------------------------------
 generating...
-----

In [214]:
comp_model = load_trained_model('my_model_v4.h5', create_comprehensive_model, chars, maxlen)

seed_text_list = [
    "political parties ha",
    "political parties se",
    "business is about me",
    "Myself and my ideas ",
    "Bring the threats on",
    "I want to tell you a"
]

for seed_text in seed_text_list:
    gentext(seed_text, chars, comp_model)

------------------------------
you said: 
"political parties ha"
------------------------------
 generating...
------------------------------


follow up with: political parties have to be the way and they want to be the
------------------------------
you said: 
"political parties se"
------------------------------
 generating...
------------------------------


follow up with: political parties second that we have to be a protect the pr
------------------------------
you said: 
"business is about me"
------------------------------
 generating...
------------------------------


follow up with: business is about me to the people that we have to be a prot
------------------------------
you said: 
"Myself and my ideas "
------------------------------
 generating...
------------------------------


follow up with: Myself and my ideas and they want to be the way and they wan
------------------------------
you said: 
"Bring the threats on"
------------------------------
 generating...
-----