# Learning to Tweet Like Trump
In this demo, we'll be using Keras to train neural net for language modeling on Trump's tweets.

In [None]:
import numpy as np
import pandas as pd
import re

from keras.callbacks import LambdaCallback
from keras.layers import Dense, LSTM, Activation
from keras.models import Sequential
from keras.optimizers import RMSprop

First, we'll load and clean the data. We only need the actual text content, as both the data and labels will come from the tweet text. (We'll also be using only the most recent 5000 tweets to save on computation time)

In [None]:
df = pd.read_csv('trump_tweets.csv', delimiter=',', header=0)
df = df[pd.notnull(df['text'])]
text = [re.sub(r'https?:\/\/.*[\r\n]*', '', sent, flags=re.MULTILINE)
          .strip() for sent in df['text']][:5000]

In [None]:
df

We'll need to extract the proper windows from the text and reformat the text as series of one hot vectors.

In [None]:
char_to_index = {}
index_to_char = {}
start_token = 0
end_token = 1
num_chars = 2
for sentence in text:
    for char in sentence:
        if char not in char_to_index:
            char_to_index[char] = num_chars
            index_to_char[num_chars] = char
            num_chars += 1

In [None]:
input_len = 25
data = []
labels = []
for sentence in text:
    sent_list = [start_token] + [char_to_index[c] for c in sentence] + [end_token]
    sent_onehot = np.concatenate((np.zeros((input_len-1, num_chars)),
                                  np.eye(num_chars)[sent_list]), axis=0)
    for i in range(len(sent_list) - 1):
        data.append(sent_onehot[i:i+input_len])
        labels.append(sent_onehot[i+input_len])
data = np.stack(data, axis=0)
labels = np.stack(labels, axis=0)

Here we will make the RNN model.

In [None]:
# Code here

In [None]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

def generate_sentence():
    max_sent_length = 140
    end_sentence = False
    sent = np.zeros((input_len, num_chars))
    sent[-1, start_token] = 1
    
    generated = ''
    sent_len = 0
    while not end_sentence:
        # Code here

In [None]:
for _ in range(5):
    generate_sentence()

In [None]:
# Code here