<a href="https://colab.research.google.com/github/rybread1/trump_speech_writer/blob/master/trump_speech_writer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [46]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import os

In [9]:
!curl -O https://raw.githubusercontent.com/ryanmcdermott/trump-speeches/master/speeches.txt

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0  4  903k    4 42501    0     0   532k      0  0:00:01 --:--:--  0:00:01  532k100  903k  100  903k    0     0  7402k      0 --:--:-- --:--:-- --:--:-- 7342k


In [10]:
## Reading and processing text
with open('speeches.txt', 'r') as fp:
    text = fp.read()
    
start_indx = text.find('Thank you so much')

text = text[start_indx:].lower()  # trimmed text doc
char_set = set(text) # unique character set
char_set_sorted = sorted(char_set)

char_2_int_dict = {ch:i for i,ch in enumerate(char_set_sorted)} # dict mapping char to int
char_array = np.array(char_set_sorted) # array mapping idx to char

text_encoded = np.array(
    [char_2_int_dict[ch] for ch in text],
    dtype=np.int32)

In [100]:
ds_text_encoded = tf.data.Dataset.from_tensor_slices(text_encoded)

seq_length = 120 
chunk_size = seq_length + 1
ds_chunks = ds_text_encoded.batch(chunk_size, drop_remainder=True) 

## define the function for splitting x & y
def split_input_target(chunk):
    input_seq = chunk[:-1]
    target_seq = chunk[1:]
    return input_seq, target_seq

ds_sequences = ds_chunks.map(split_input_target)

# Batch size
BATCH_SIZE = 64
BUFFER_SIZE = 200000

tf.random.set_seed(1)
ds = ds_sequences.shuffle(BUFFER_SIZE).batch(BATCH_SIZE)

def get_test_train_split(text, chunk_size, batch_size, train_split=0.85):
    return np.floor(len(text) / chunk_size / batch_size) * train_split

train_batches = get_test_train_split(text_encoded, chunk_size, BATCH_SIZE)

ds_train = ds.take(train_batches)
ds_valid = ds.skip(train_batches)

In [101]:

def build_model(input_size, vocab_size, embedding_dim, rnn_units, dropout=True):
    inputs = tf.keras.Input(input_size)
    x = tf.keras.layers.Embedding(vocab_size, embedding_dim)(inputs)
    x = tf.keras.layers.LSTM(rnn_units, return_sequences=True)(x)
    x = tf.keras.layers.Dropout(0.2)(x)
    x = tf.keras.layers.LSTM(rnn_units, return_sequences=True)(x)
    x = tf.keras.layers.Dropout(0.2)(x)
    outputs = tf.keras.layers.Dense(vocab_size)(x)
    model = tf.keras.Model(inputs, outputs)
    return model

tf.random.set_seed(42)

model = build_model(input_size=seq_length, vocab_size=len(char_array), 
                    embedding_dim=256, rnn_units=512)

model.compile(optimizer=tf.keras.optimizers.Adam(), 
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True))

model.summary()

Model: "functional_29"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_21 (InputLayer)        [(None, 120)]             0         
_________________________________________________________________
embedding_27 (Embedding)     (None, 120, 256)          16896     
_________________________________________________________________
lstm_18 (LSTM)               (None, 120, 512)          1574912   
_________________________________________________________________
dropout_48 (Dropout)         (None, 120, 512)          0         
_________________________________________________________________
lstm_19 (LSTM)               (None, 120, 512)          2099200   
_________________________________________________________________
dropout_49 (Dropout)         (None, 120, 512)          0         
_________________________________________________________________
dense_77 (Dense)             (None, 120, 66)         

In [102]:
checkpoint_path = "training_1/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=0)

results = model.fit(ds_train, validation_data=ds_valid, epochs=30, callbacks=[cp_callback])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [111]:
def generate_text(model, starting_str, 
           len_generated_text=500, 
           max_input_length=80,
           scale_factor=1.0):
    
    starting_str = starting_str.lower()
    encoded_input = [char_2_int_dict[s] for s in starting_str]
    encoded_input = tf.reshape(encoded_input, (1, -1))

    generated_str = starting_str

    model.reset_states()
    for i in range(len_generated_text):
        logits = model(encoded_input)        
        logits = tf.squeeze(logits, 0)

        scaled_logits = logits * scale_factor
        new_char_indx = tf.random.categorical(scaled_logits, num_samples=1)
        new_char_indx = tf.squeeze(new_char_indx)[-1].numpy()          
        generated_str += str(char_array[new_char_indx])
        
        new_char_indx = tf.expand_dims([new_char_indx], 0)

        encoded_input = tf.concat(
            [encoded_input, new_char_indx],
            axis=1)
        encoded_input = encoded_input[:, -max_input_length:]

    return generated_str

generated_text = generate_text(model, 
                               starting_str='build the wall', 
                               scale_factor=3, 
                               len_generated_text=4000,
                               max_input_length=seq_length)

In [112]:
print(generated_text)

build the wall.






the only one that was a big beautiful like this anymore. but i don’t know if you look at the border and the republicans and all of the people that are being doing that because of the people that are so incredible. they can’t get along with the world. they don’t want to do anything. i want to be a lot of people. they have a great respect.


the people that are going to be so great to me.






 i want to keep our jobs and we’re going to be a lot of people. we’re going to be the stupid people. i’m the only one that said that i would have said, "he’s a great company. i want to help the migration.
i don’t know what’s going to happen. we have to be smart. we have to be smart. we have to stop it back in.
and the other night i would have said, "well, they don’t want to be able to do it. but if you want to do it. because we’re going to see the reasons that we’re going to do and they say, "you know what i do is. i don’t know if it’s going to be a little bit doing. they don