## Lab 7: LSTM in Keras

This lab shows you a toy-example of using LSTM in Keras to generate music in abc notation

Let's first deal with data preprocessing

In [1]:
import numpy as np
import random

# Read in the text file
with open('data/abc.txt') as f:
    text = f.read()

# Take first 50000 characters (the txt file is too long, but you can experiment with the whole thing)
text = text[:50000]
print('corpus length:', len(text))

# Character mapping
chars = sorted(list(set(text)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

# cut the text in sequences of maxlen characters
maxlen = 40
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

('corpus length:', 50000)
('total chars:', 75)
('nb sequences:', 49960)
Vectorization...


In [2]:
# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

('nb sequences:', 16654)
Vectorization...


## Now, let's build the model

In [3]:
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM
from keras.optimizers import RMSprop


# build the model: a single LSTM
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

model.summary()

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               104448    
_________________________________________________________________
dense_1 (Dense)              (None, 75)                9675      
_________________________________________________________________
activation_1 (Activation)    (None, 75)                0         
Total params: 114,123
Trainable params: 114,123
Non-trainable params: 0
_________________________________________________________________


Run the model!

In [17]:
# Some hyper parameters
num_epoch = 2
batch_size = 64
model.fit(x, y,
          batch_size=batch_size,
          epochs= num_epoch)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f2180538990>

Next, let's create a sample function so that we can generate things

In [18]:
def sample(preds):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    exp_preds = np.exp(np.log(preds))            
    preds = exp_preds / np.sum(exp_preds)        # softmax
    probas = np.random.multinomial(1, preds, 1)  # draw one example
    return np.argmax(probas)


In [23]:
# pick a random starting point (character)
#start_index = random.randint(0, len(text) - maxlen - 1)
start_index = 0
diversity = 1.0

generated = ''
sentence = text[start_index: start_index + maxlen]
generated += sentence
print('----- Generating with seed: \n"' + sentence + '"\n')


# generate 400-char
for i in range(400):
    # construct one-hot encoding for each character
    x_pred = np.zeros((1, maxlen, len(chars)))  
    for t, char in enumerate(sentence):
        x_pred[0, t, char_indices[char]] = 1.

    # predict the next x
    preds = model.predict(x_pred, verbose=0)[0]
    next_index = sample(preds)
    next_char = indices_char[next_index]

    generated += next_char
    sentence = sentence[1:] + next_char

print('----- Created: \n"' + generated + '"\n')

----- Generating with seed: 
"T:'G Iomain Nan Gamhna
M:9/8
K:Cmaj
G E "

----- Created: 
"T:'G Iomain Nan Gamhna
M:9/8
K:Cmaj
G E G E C D E | C E G E D E A | G F D E /2 C /2 C 2 D | E D E C 2 C D E | G, G c d /2 B /2 /2 F /2 G 2 | G 2 G F D D 2 | C 2 G E E 2 C B, | C A c d d c e e | d c B c d e f e /2 e /2 | e /2 d  B c f e f g b | c' a (3 a g c c c c B e f | e 2 d e d c =B /2 A /2 | G c d e f d B c | c c d d e f f g /2 a /2 | f g 2 b E c' c G /2 A /2 c a g /2 g /2 e /2 f /2 a /2 a /2 a /2 a /2 c /2 c /2 e /2 g c' | b /2 a /"



You may listen to how it sounds like by copy and paste the above text to http://www.mandolintab.net/abcconverter.php

Try tune with the parameters or build your own model. Have fun!