# Import the libraries 
The idea is to implement a LSTM model to generate jokes using Keras.

In [None]:
import sys
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

# Input
Read the input file and convert all characters to lowercase. This reduces the vocabulary size for the model to learn.

In [None]:
#read the file and convert all characters to lowercase
filename = "shortjokes.csv"
text = open(filename).read().lower()

# Mapping
Create a list of unique characters read from the input file
<br>
Create a mapping from character to index and vice-versa
<br>
no_of_chars = total number of characters in the input file
<br>
vocab_size = total number of unique characters in the input file

In [None]:
chars = sorted(list(set(text))) #list of distinct characters
#mapping
char_to_int = dict((c, i) for i, c in enumerate(chars))
int_to_char = dict((i, c) for i, c in enumerate(chars))
no_of_chars = len(text)
vocab_size = len(chars)

# Preprocessing
Split the text into sequences of 100 characters<br>
Each training pattern consists of 50 time steps of one character (X) followed by one character output (y)
Example:
Iteration #1:<br>
Input : HAPPI<br>
Output: N<br>
Iteration #2:<br>
Input: APPIN<br>
Output : E<br>

In [None]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 100
step = 100
X_data = []
Y= []
for i in range(0, n_chars - seq_length, step):
    input = text[i:i + seq_length] 
    output = raw_text[i + seq_length]
    X_data.append([char_to_int[char] for char in input])
    Y.append(char_to_int[output])
no_of_patterns = len(X_data)
# reshape X to be [samples, time steps, features] as LSTM requires 3D
X = numpy.reshape(X_data, (no_of_patterns, seq_length, 1)) #vector encodings
X = X / float(vocab_size) #normalize input
# one hot encode the output variable
y = np_utils.to_categorical(Y)

# LSTM Model
The model defined is a LSTM with 128 hidden units. The model is a single layer LSTM with 128 units and a dropout layer with probability 0.2

In [None]:
# define the LSTM model
model = Sequential()
model.add(LSTM(128, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

# Checkpoint
The best weights of the model are stored in a file 

In [None]:
# define the checkpoint
filepath="best_weights_1.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]
# fit the model
model.fit(X, y, epochs=50, batch_size=64, callbacks=callbacks_list)

In [None]:
filename = "best_weights_1.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')


# Generation of text
To generate text, we pick a random seed and the number of characters, we want the model to generate.
The seed sequence serves as an input to the model, it then generates the next character which it assumes to be most likely to appear and then the seed is updated by removing the ifrst character and adding the generated character in the end, essentially like sliding a window of fixed length one character at a time.

In [None]:
def generate_text(seed,chars):
# pick a random seed
    for k in range(seed):
        start = numpy.random.randint(0, len(X)-1)
        start_seed = X_data[start]
        print ("\"",''.join([int_to_char[value] for value in start_seed]),"\"")
        # generate characters
        for i in range(chars):
            x = numpy.reshape(pattern, (1, len(pattern), 1))
            x = x / float(vocab_size)
            prediction = model.predict(x, verbose=0)
            index = numpy.argmax(prediction)
            result = int_to_char[index]
            sys.stdout.write(result)
            start_seed.append(index)
            start_seed = start_seed[1:len(start_seed)]

In [None]:
generate_text(100,50)