In [0]:

import numpy as np
import glob
from tqdm import tqdm
import sys
import os



In [0]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint

Using TensorFlow backend.


In [0]:

# Loading the data.
paths = glob.glob('data/midi_abc/*.txt')

In [0]:
paths

['data/midi_abc/reelsa-c.txt',
 'data/midi_abc/reelsu-z.txt',
 'data/midi_abc/slip.txt',
 'data/midi_abc/ashover.txt',
 'data/midi_abc/playford.txt',
 'data/midi_abc/xmas.txt',
 'data/midi_abc/reelsm-q.txt',
 'data/midi_abc/hpps.txt',
 'data/midi_abc/morris.txt',
 'data/midi_abc/waltzes.txt',
 'data/midi_abc/jigs.txt',
 'data/midi_abc/reelsh-l.txt',
 'data/midi_abc/reelsr-t.txt',
 'data/midi_abc/reelsd-g.txt']

In [0]:

# add them into one big string
data = ""
for path in paths:
    data += (open(path).read())

data_split = []
tmp = ""

In [0]:
# Split the data at every 'X' so that it is easier to train is small pieces. Also remove newlines.
for i in data:
    if i=='X' and tmp!="":
        tmp = tmp.rstrip()
        data_split.append(tmp)
        tmp=""
tmp += i

In [0]:

del data_split[0]

In [0]:
for i in data_split:
    data += i

In [0]:

#Proceed to creating a dictionary with which  can convert characters to integers and vice versa (basically vectorization)
chars = sorted(list(set(data)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [0]:
n_chars = len(data)
n_vocab = len(chars)

In [0]:
n_chars 


452499

In [0]:
# The sequences have a length of 100 characters
seq_length = 100

dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
	seq_in = data[i:i + seq_length]
	seq_out = data[i + seq_length]
	dataX.append([char_to_int[char] for char in seq_in])
	dataY.append(char_to_int[seq_out])


In [0]:
#to find rows and columns
rows  = len(dataX)
cols = len(dataX[0])
print(rows,cols)

452399 100


In [0]:
n_samples = len(dataX)
X = np.reshape(dataX, (n_samples, seq_length, 1))
X.shape

(452399, 100, 1)

In [0]:
X = X / float(n_vocab)

y = np_utils.to_categorical(dataY)  #np.utils.to_categorical is used to convert array of labeled data(from 0 to nb_classes-1) to one-hot vector.




In [0]:
X.shape

(452399, 100, 1)

In [0]:
# Building the Keras model
model = Sequential()
model.add(LSTM(512, input_shape=(X.shape[1], X.shape[2]), return_sequences=True)) # Since there is more than one LSTM layer we need to return the sequences
model.add(Dropout(0.2)) 
model.add(LSTM(512))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))

In [0]:
import os

In [0]:
if os.path.isfile('best_weights.hdf5'):
	model.load_weights('best_weights.hdf5')
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [0]:

filepath = "/output/weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5" # Saving the model each time it improves
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [0]:
# Fitting the model to the data.
model.fit(X, y, epochs=50, batch_size=1024, callbacks=callbacks_list)

Epoch 1/50
 15360/452399 [>.............................] - ETA: 12:45 - loss: 1.2492

KeyboardInterrupt: ignored

In [0]:
for k in range(5):
	start = np.random.randint(0, len(dataX)-1)
	count = 0
	pattern = dataX[start] # Pick up a random character from the dataset to start from.
	
	prediction_string = ""

	# Start predicting a sequence of characters.
	for i in tqdm(range(1000)):
		x = np.reshape(pattern, (1, len(pattern), 1)) # Resize the data to fit to the model.
		x = x/float(n_vocab)

		pred = model.predict(x, verbose=0) # Make prediction
		index = np.argmax(pred) # Get the character with the highest probability
		result = int_to_char[index] # Convert the one index of the character to a char.
		#seq_in = [int_to_char[value] for value in pattern] 
		prediction_string += result # add the result to the prediction string
		#sys.stdout.write(result)
		pattern.append(index) # Add the predicted character to the pattern that will be fed next time the model.
						 	  # For example if the pattern is 'abb' and the model predicts 'a'. make sure to change the pattern that
						 	  # will be fed next to 'abba' and shorten it to fit so it becomes 'bba'.
		pattern = pattern[1:len(pattern)]
		# Due to the small size of the dataset the model will probably slightly overfit. To counter that and add a bit 
		# of variation I added something like noise to it's predictions. So I add random paths of the data to force
		# the neural network to change the pattern and choose a different not to make the music a bit more interesting.
		start+=np.random.randint(0, 1)
		count+=1
		if count%250==0:
			start += np.random.randint(-100*i, 100*2*i)
			while start+np.random.randint(0, 200) > len(dataX):
				start = start-np.random.randint(0, 500)
			pattern.extend(dataX[start][:50])
			pattern = pattern[50:len(pattern)]
	print('Done')

	# Write the predictions to a .txt file.
	fl = open('big_pred_'+ str(k) +'.txt', 'w')
	fl.write(prediction_string)
print(prediction_string)

100%|██████████| 1000/1000 [01:46<00:00,  9.35it/s]
  0%|          | 1/1000 [00:00<01:45,  9.47it/s]

Done


100%|██████████| 1000/1000 [01:46<00:00,  9.43it/s]
  0%|          | 1/1000 [00:00<01:45,  9.46it/s]

Done


100%|██████████| 1000/1000 [01:45<00:00,  9.49it/s]
  0%|          | 1/1000 [00:00<01:46,  9.34it/s]

Done


100%|██████████| 1000/1000 [01:45<00:00,  9.47it/s]
  0%|          | 1/1000 [00:00<01:44,  9.60it/s]

Done


100%|██████████| 1000/1000 [01:45<00:00,  9.49it/s]

Done
/2c/2|"D"d/2d/2d/2d/2 "A"e/2e/2e/2e/2|"D"f/2d/2d/2f/2 "A"e/2d/2e/2e/2|\
"D"f/2d/2d/2f/2 "A7"e/2d/2e/2e/2|"D"d/2d/2d/2d/2 "A"e/2d/2d/2f/2|
"D"d/2d/2d/2f/2 "A"e/2f/2e/2e/2|"D"f/2f/2f/2f/2 "A"e/2e/2e/2e/2|\
"D"f/2d/2d/2f/2 "A7"e/2g/2e/2e/2|"D"d/2d/2d/2d"d/2d/2d/2d/2 "A"e/2e/2e/2e/2|"D"d/2d/2d/2f/2 "A"e/2e/2e/2e/2|\
"D"d/2d/2d/2f/2 "A7"e/2d/2e/2e/2|"D"d/2d/2d/2d/2 "A"e/2d/2d/2f/2|
"D"d/2d/2d/2f/2 "A"e/2f/2e/2e/2|"D"f/2f/2f/2f/2 "A"e/2e/2e/2e/2|\
"D"f/2d/2d/2f/2 "A7"e/2g/2e/2e/2|"D"d/2d/2d/2d/2 "A"e/d/2 "D7"d/2d/2d/2d/2|
"G"g/2d/2g/2d/2 "D"g/2f/2e/2d/2|"G"g/2d/2d/2d/2 "D"e/2d/2e/2e/2|\
"G"g/2d/2d/2d/2 "A7"e/2d/2e/2e/2|"D"d/2d/2d/2d/2 "A7"e/2d/2g/2e/2|
"D"d/2d/2d/2f/2 "A"e/2e/2e/2e/2|"D"d/2d/2d/2f/2 "A"e/2e/2e/2e/2|\
"D"f/2d/2d/2f/2 "A7"e/2g/2e/2A/2 AF|"D"F/2F/2A/2F/2 F/2F/2F/2F/2|"D"F/2F/2A/2F/2 "D"A/2A/2F/2F/2|\
"G"G/2G/2G/2F/2 "A7"A/2F/2F/2F/2|"D"D/2D/2D/2F/2 "A7"E/2F/2E/2F/2|
"D"F/2F/2F/2F/2 "D"A/2F/2F/2F/2|"D"F/2F/2A/2F/2 "D"A/2d/2d/2d/2|\
"G"B/2d/2d/2d/2 "A7"e/2d/2e/2c/2|"D"d/2d/2




# Convert the .txt files to midi 