In [1]:
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, LSTM
from keras.utils import np_utils
import random
import sys

Using Theano backend.
Using gpu device 0: GeForce GT 820M (CNMeM is enabled with initial size: 75.0% of memory, cuDNN not available)


In [2]:
data = open('tunes.txt','r').read()

In [13]:
# list of unique characters in the data
chars=list(set(data))
# length of the character vocabulary we have
vocabulary_size = len(chars)
# no of neurons in the hidden layer
hidden_dim = 100
# no. of characters in one sequence.. one sequence is an input to one neural network
sequence_len = 10
# number of epochs
no_epoch = 2
# len of characters to be predicted and printed
len_to_print = 500

In [4]:
# mapping from char to int
char_to_index = dict((c, i) for i, c in enumerate(chars))
# mapping from int to char
index_to_char = dict((i, c) for i, c in enumerate(chars))

In [5]:
# X_train is a 3D np array
X_train = np.zeros((len(data),sequence_len,vocabulary_size))
# for a sequence, y_train stores the next character index
y_train = np.zeros((len(data),vocabulary_size))

# stores different sequences possible
time_seq = []
# stores their corresponding next characters
next_char = []

for i in range(0, len(data)-sequence_len):
    time_seq.append(data[i:i+sequence_len])
    next_char.append(data[i+sequence_len])
    
# making X_train and y_train one hot vectors
for i, sequence in enumerate(time_seq):
    for t, c in enumerate(sequence):
        X_train[i, t, char_to_index[c]] = 1
    y_train[i, char_to_index[next_char[i]]] = 1
    
print X_train.shape
print y_train.shape

(129993, 10, 87)
(129993, 87)


In [6]:
model = Sequential()
model.add(LSTM(100,input_shape=(sequence_len, vocabulary_size)))
model.add(Dense(vocabulary_size))
model.add(Activation('softmax'))
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
lstm_1 (LSTM)                    (None, 100)           75200       lstm_input_1[0][0]               
____________________________________________________________________________________________________
dense_1 (Dense)                  (None, 87)            8787        lstm_1[0][0]                     
____________________________________________________________________________________________________
activation_1 (Activation)        (None, 87)            0           dense_1[0][0]                    
Total params: 83,987
Trainable params: 83,987
Non-trainable params: 0
____________________________________________________________________________________________________


In [7]:
model.compile(loss='categorical_crossentropy',optimizer='adagrad',metrics=['accuracy'])

In [17]:
# iterating for number of epochs specified
text = "X: 94\nT:Fi"
for i in range(1, no_epoch):
    
    print ('-'*50)
    print ("EPOCH: ", i)
    
    model.fit(X_train,y_train,batch_size=128,nb_epoch=1)
    
    # randomly taking a starting index.. it acts as a starting point for predicted text
    #start_index = random.randint(0, 1000)
    #text=data[start_index:start_index+sequence_len]
    print "Seed: ", text
    
    sys.stdout.write(text)
    
    for i in range(len_to_print):
        
        x = np.zeros((1, sequence_len, vocabulary_size))
        
        for t, c in enumerate(text):
            x[0, t, char_to_index[c]]=1
        
        #predictions
        pred_index = model.predict(x)[0]
        prob = np.random.multinomial(1, np.asarray(pred_index).astype('float64'), 1)
        next_index = np.argmax(prob)
        next_char = index_to_char[next_index]
        text = text[1:] + next_char
        
        sys.stdout.write(next_char)
    print ()

--------------------------------------------------
('EPOCH: ', 1)
Epoch 1/1
Seed:  X: 94
T:Fi
X: 94
T:FiStinayt, ,aar PhihPCoRof
6:6/2
K:B
/2G/2|"G"b2E "E7"c3|"D7"A2D "F"D2A|"A7"GGc A2||"A"AGc B3|"Em"BGB "G7"GBG|"C"c3 ezf:|
Tg fe aaAneeSa% NFottngham Music Databaseie:DcP
% NoBtgngha  QrscereJ\
% Notingaaaa ui'a
PPFCNt/de
/NBoo D
 Dhi6lelatBe Riga% NttbtngaamMMuuic DDbbaafeA| "G"b2B e3|"Am"eBc A^GB|"A"GBF "D7"AFF|"A"E2A "C"cGG|
"C"cBc "G"GB:|


X: 091
T:Aa l3 RhenRHo
% No
tingham Pssic lheks oit, saa  hPi Aiilee lcnsre
% Nottiggham Musc DDatabae

P::A3

::3
':3/2ee|
^G "B7degf|"Em"aeg "A7"a2-|
3|"G()
