# Paradise Loop - Text generation file
## AI that generates a 'Paradise Lost'-esque poem

In [1]:
from tensorflow import keras
import numpy as np
import sys, random,os

### Getting the Paradise Lost text data
You can find the file I used at http://www.gutenberg.org/cache/epub/26/pg26.txt <br>
I did a bit of preprocessing, removing the intro text and the licence at the end

In [2]:
path = 'paradise-lost.txt'
text = open(path).read().lower()
print("Corpus length:",len(text))

Corpus length: 453692


In [3]:
# Making a char index

chars = sorted(list(set(text)))
char_index = dict((char, chars.index(char)) for char in chars)

In [4]:
# Get a list of lines of the poem

lines = text.split("\n")

### Defining a method to sample the next character
The model's output is a probability tensor of size len(chars).<br>
This sampling method gives us more control over which character is picked from this tensor, by using 'temperature'.<br>
A temperature 1 means weighted randomness, so a char with value 0.3 gets picked 30% of the time.
A temperature of 0 means always picking the char with the highest value
This is a spectrum, so try out different values between 0 and 1

In [5]:
def sample(preds,temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    if (temperature == 0):
        # Avoiding a division by 0 error
        return np.argmax(preds)
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1,preds,1)
    return np.argmax(probas)

### Defining a method to generate text
The text generation works like this:<br>
1. Start with a random part of the original Paradise Lost
2. Store that in a variable 'generated_text'
3. Pass the last 80 characters of 'generated_text' to the model
4. The model predicts a character which should go next
5. Append that character to 'generated_text'
6. Repeat steps 3. to 5. until you've reached the desired length

In [6]:
def gen_text(gen_length,epoch,temperature,start_lines=5,debug_messages=False):
    maxlen = 80
    # Loading the model
    model_dir = "paradise_loop_models" # Change this if you used a different one
    model_path = model_dir+"/epoch_"+str(epoch)+"_2lstms.hdf5"
    model = keras.models.load_model(model_path)
    if debug_messages:
        print("model",model_path,"loaded")
    start_index = random.randint(0,len(lines)-start_lines)
    generated_text = "\n".join(lines[start_index:start_index+start_lines])
    for i in range(gen_length):
        if i % 100 == 0 and debug_messages:
            # Keeps track of the program's progress in case you want long text files
            print(i,"/",gen_length,"characters added")
        
        # Sampled holds the last MAXLEN characters
        sampled = np.zeros((1,maxlen,len(chars)))
        for j, char in enumerate(generated_text[-maxlen:]):
            sampled[0,j,char_index[char]] = 1.

        with np.errstate(divide='ignore'):
            preds = model.predict(sampled,verbose=0)[0] # gets the next character (in one-hot-encode form)
            next_index = sample(preds,temperature) # gets index of next character (as int)
            next_char = chars[next_index]

        # Updates the 'generated_text' variable
        generated_text += next_char

    return generated_text

### Generating text
Here you can define which models and temperatures to use, how long the outputs should be, and where to put them.

In [None]:
epoch_numbers_to_try = [15]
temperatures_to_try = [0.3,0.75,1.0]
output_length = 1500
output_dir_name = "paradise_loop_text_output"
try:
    os.mkdir(output_dir_name)
except:
    print("Directory already exists, inserting files in existing directory")
for epoch in epoch_numbers_to_try:
    for temp in temperatures_to_try:
        fname = output_dir_name+"/epoch_"+str(epoch)+"length_"+str(output_length)+"temp_"+str(temp)+".txt"
        with open(fname,"w+") as f:
            f.write(gen_text(output_length,epoch,temp,debug_messages=True))
    

Directory already exists, inserting files in existing directory
