# Generating music using LSTM Networks and Keras Functional API.

# Introduction

# Preprocessing

In [0]:
import keras
from keras.layers import Embedding, Dense, CuDNNLSTM, Input, Concatenate, Dropout, BatchNormalization
from keras.optimizers import RMSprop
from keras.models import Model

import numpy as np

from google.colab import files

Using TensorFlow backend.


In [0]:
f = files.upload()

In [0]:
array = np.load("training_data.npy")
midi_starts = np.load("midi_starts.npy")

notes = array[0]
times = array[1]
velocities = array[2]
durations = array[3]

note_range = max(notes) + 1
velocity_range = max(velocities) + 1
time_range = max(times) + 1
duration_range = max(durations) + 1

print("Data length:", len(notes), "\n")
print("Note range:", note_range)
print("Time range:", time_range)
print("Velocity range:", velocity_range)
print("Duration range:", duration_range)

Data length: 116004 

Note range: 88
Time range: 230
Velocity range: 30
Duration range: 40


In [0]:
from sklearn.utils import class_weight

def get_class_weight(x):
    y = class_weight.compute_class_weight('balanced', np.unique(x), x)
    y_weights = {i: y[i] for i in range(len(y))}
    return y_weights

note_weight = get_class_weight(notes)
velocity_weight = get_class_weight(velocities)
time_weight = get_class_weight(times)
duration_weight = get_class_weight(durations)

In [0]:
data_length = 100000
sequence_length = 512

training_data_length = data_length - len(midi_starts) * sequence_length

note_inputs = []
note_outputs = []

velocity_inputs = []
velocity_outputs = []

time_inputs = []
time_outputs = []

duration_inputs = []
duration_outputs = []

j = 0
for i in range(training_data_length):
   
  if i + sequence_length in midi_starts:
    j += sequence_length
  i += j

  note_sequence = notes[i:i + sequence_length]
  note_out = notes[i + sequence_length]
  
  velocity_sequence = velocities[i:i + sequence_length]
  velocity_out = velocities[i + sequence_length]
  
  time_sequence = times[i:i + sequence_length]
  time_out = times[i + sequence_length]
  
  duration_sequence = durations[i:i + sequence_length]
  duration_out = durations[i + sequence_length]
  
  note_inputs.append(note_sequence)
  note_outputs.append(note_out)
  
  velocity_inputs.append(velocity_sequence)
  velocity_outputs.append(velocity_out)
  
  time_inputs.append(time_sequence)
  time_outputs.append(time_out)
  
  duration_inputs.append(duration_sequence)
  duration_outputs.append(duration_out)
  
note_inputs = np.array(note_inputs).reshape((training_data_length, sequence_length))
note_outputs = np.array(note_outputs).reshape((training_data_length))

velocity_inputs = np.array(velocity_inputs).reshape((training_data_length, sequence_length))
velocity_outputs = np.array(velocity_outputs).reshape((training_data_length))

time_inputs = np.array(time_inputs).reshape((training_data_length, sequence_length))
time_outputs = np.array(time_outputs).reshape((training_data_length))

duration_inputs = np.array(duration_inputs).reshape((training_data_length, sequence_length))
duration_outputs = np.array(duration_outputs).reshape((training_data_length))

# Model

In [0]:
note_input = Input(shape=(sequence_length,), dtype='int32', name='note_input')
velocity_input = Input(shape=(sequence_length,), dtype='int32', name='velocity_input')
time_input = Input(shape=(sequence_length,), dtype='int32', name='time_input')
duration_input = Input(shape=(sequence_length,), dtype='int32', name='duration_input')

note_embedding = Embedding(note_range, 16, input_length=sequence_length)(note_input)
velocity_embedding = Embedding(velocity_range, 8, input_length=sequence_length)(velocity_input)
time_embedding = Embedding(time_range, 32, input_length=sequence_length)(time_input)
duration_embedding = Embedding(duration_range, 8, input_length=sequence_length)(duration_input)

concatinate_inputs = Concatenate()([note_embedding, velocity_embedding, time_embedding, duration_embedding])


lstm_1 = CuDNNLSTM(512)(concatinate_inputs)
final = Dense(512, activation="relu")(lstm_1)


note_output = Dense(note_range, activation="softmax", name='note_output')(final)
velocity_output = Dense(velocity_range, activation="softmax", name='velocity_output')(final)
time_output = Dense(time_range, activation="softmax", name='time_output')(final)
duration_output = Dense(duration_range, activation="softmax", name='duration_output')(final)

model = Model(inputs=[note_input, velocity_input, time_input, duration_input],
              outputs=[note_output, velocity_output, time_output, duration_output])

model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
note_input (InputLayer)         (None, 512)          0                                            
__________________________________________________________________________________________________
velocity_input (InputLayer)     (None, 512)          0                                            
__________________________________________________________________________________________________
time_input (InputLayer)         (None, 512)          0                                            
__________________________________________________________________________________________________
duration_input (InputLayer)     (None, 512)          0                                            
__________________________________________________________________________________________________
embedding_

In [0]:
model.compile(optimizer=RMSprop(),
              
              loss={'note_output': 'sparse_categorical_crossentropy',
                    'velocity_output': 'sparse_categorical_crossentropy',
                    'time_output': 'sparse_categorical_crossentropy',
                    'duration_output': 'sparse_categorical_crossentropy'},
              
              loss_weights={'note_output': 0.3,
                            'velocity_output': 0.2,
                            'time_output': 0.3,
                            'duration_output': 0.2})

# Training

In [0]:
history = model.fit({'note_input': note_inputs, 
                     'velocity_input': velocity_inputs,
                     'time_input': time_inputs,
                     'duration_input': duration_inputs},

                    {'note_output': note_outputs,
                     'velocity_output': velocity_outputs,
                     'time_output': time_outputs,
                     'duration_output': duration_outputs},

                    epochs=64,

                    batch_size=64)

# Evaluation

In [0]:
generation_length = 512

random_input = np.random.randint(0, training_data_length - 1)

note_predictions = note_inputs[random_input].copy().reshape((1, sequence_length))
velocity_predictions = velocity_inputs[random_input].copy().reshape((1, sequence_length))
time_predictions = time_inputs[random_input].copy().reshape((1, sequence_length))
duration_predictions = duration_inputs[random_input].copy().reshape((1, sequence_length))

note_final = np.zeros((generation_length,), dtype=np.int32)
velocity_final = np.zeros((generation_length,), dtype=np.int32)
time_final = np.zeros((generation_length,), dtype=np.int32)
duration_final = np.zeros((generation_length,), dtype=np.int32)


for i in range(generation_length):

    predictions = model.predict({"note_input": note_predictions,
                             "velocity_input": velocity_predictions,
                             "time_input": time_predictions,
                             "duration_input": duration_predictions})

    note = np.argmax(predictions[0][0])
    velocity = np.argmax(predictions[1][0])
    time = np.argmax(predictions[2][0])
    duration = np.argmax(predictions[3][0])

    note_predictions[0] = np.roll(note_predictions[0], -1)
    note_predictions[0][-1] = note

    velocity_predictions[0] = np.roll(velocity_predictions[0], -1)
    velocity_predictions[0][-1] = velocity

    time_predictions[0] = np.roll(time_predictions[0], -1)
    time_predictions[0][-1] = time

    duration_predictions[0] = np.roll(duration_predictions[0], -1)
    duration_predictions[0][-1] = duration

    note_final[i] = note
    velocity_final[i] = velocity
    time_final[i] = time
    duration_final[i] = duration
    
    
np.save("generated_data.npy", [note_final, time_final, velocity_final, duration_final])