In [1]:
import numpy as np
import pandas as pd
import random
import sys

import tensorflow
import keras

from keras import layers

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


In [14]:
# Stochastic Sampling: introducing randomness into the generative process

# Temperature range: 0.01 - 1.0
# Higher temp result in higher entropy sampling distributions = more surprising and unstructured
# Lower temp result in less randomness and more predictable generated text

def reweight_distribution(original_distribution, temperature=0.5):
    distribution = np.log(original_distribution) / temperature
    distribution = np.exp(distribution)
    
    return distribution / np.sum(distribution)

In [9]:
text = (open('lyrics_aggregated.txt').read().lower())
print(f'Corpus length: {len(text)}')

Corpus length: 49563


In [11]:
%%time
# Vectorizing sequences of characters

maxlen = 60
step = 3
sentences = []
next_chars = []

for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
    
print(f'Number of sequences: {len(sentences)}')

chars = sorted(list(set(text)))
print(f'Unique chracters: {len(chars)}')
char_indices = dict((char, chars.index(char)) for char in chars)

print('Vectorization...')

x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Number of sequences: 16501
Unique chracters: 57
Vectorization...
CPU times: user 323 ms, sys: 26.2 ms, total: 349 ms
Wall time: 368 ms


In [12]:
# Small baseline model

model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))
optimizer = keras.optimizers.RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

Instructions for updating:
Colocations handled automatically by placer.


In [18]:
# Reweighting the original probability distribution returned from model and draw a character from it

def sample(preds, temperature=0.5):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [16]:
%%time
# Training and generating text

for epoch in range(1, 60):
    print(f'epoch {epoch}')
    model.fit(x, y, batch_size=128, epochs=1)
    start_index = random.randint(0, len(text) - maxlen - 1)
    generated_text = text[start_index: start_index + maxlen]
    print('--- Generating with seed: "' + generated_text + '"')
    
    for temperature in [0.2, 0.5, 1.0, 1.2]:
        print(f'------ temperature: {temperature}')
        sys.stdout.write(generated_text)
        
        for i in range(400):
            sampled = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(generated_text):
                sampled[0, t, char_indices[char]] = 1
                
            preds = model.predict(sampled, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = chars[next_index]
            
            generated_text += next_char
            generated_text = generated_text[1:]
            
            sys.stdout.write(next_char)

epoch 1
Epoch 1/1
--- Generating with seed: "  "you can whip your porsche",
            "i been in the va"
------ temperature: 0.2
  "you can whip your porsche",
            "i been in the vars in that in the ine thit the wind the thit in thit ine thin the thit in that the whing thing the care the wind in thit in the wind in thit ind thit ing thi thithing that ind in in thit thing the will thin't in that ind thin the ming the thit ing thin thing the thit the bill thind in ine that ind the thit ind thand thin the will the that ine thind in in the wirle thit ind thit ind in thind thing ------ temperature: 0.5
 ine thind in in the wirle thit ind thit ind in thind thing thill the hame it ine hin youthi thinl is burte",
               "don on b and lilit tring thind sillithics itlild mad ars don iili don taad int and ive tore that inigi biee the yourin thine yor b ind the miche bure inin ing",
                   "in't no onicame thind ing mind bul trad thi tort the lithe ine thare thind you

In [17]:
model.save('billie3_128_60epochs.h5')