# Multi-layer LSTM for Music Generation (with Magenta)

In [9]:
import os
import pickle
import numpy as np
import tensorflow as tf
import note_seq
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, BatchNormalization, Dropout, Dense, Activation, Lambda, Softmax
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import ModelCheckpoint
from magenta.scripts.convert_dir_to_note_sequences import convert_directory
from note_seq.midi_io import midi_file_to_note_sequence

note_divisions = 16

In [81]:
def create_network(network_input, n_vocab):
    """ Define constants """
    hidden_layers = 256
    dropout = 0.4
    temp = 0.6
    
    """ Initializing model """
    model = Sequential()
    
    """ Adding LSTM Layers to Model """
    model.add(
        Bidirectional(
            LSTM(
                hidden_layers,
                dropout=dropout,
                return_sequences=True
            ),
            input_shape=(network_input.shape[1], network_input.shape[2])
        )
    )
    model.add(
        Bidirectional(
            LSTM(
                hidden_layers,
                dropout=dropout,
                return_sequences=True
            )
        )
    )
    model.add(
        Bidirectional(
            LSTM(
                hidden_layers,
                dropout=dropout
            )
        )
    )
    
    """ Add other layers after LSTM """
    model.add(BatchNormalization())
    model.add(Dropout(dropout))
    model.add(Dense(hidden_layers // 2))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(Dropout(dropout))
    model.add(Dense(n_vocab))
    model.add(Lambda(lambda x: x / temp))
    model.add(Softmax())
    
    """ Define the loss function for the model """
    model.compile(optimizer='adam', loss='categorical_crossentropy')
    
    return model
    

In [6]:
# vocab key format "flag pitch"
# flag 0 is next channel, flag 1 is new note, flag 2 is next step, flag 3 is continue note

def is_start(target, start):
    return target + 0.02 > start and target - 0.02 < start

def is_note_valid(target, note, instrument_infos):
    return (
        target - 0.02 < note.end_time and
        target + 0.02 > note.start_time and
        instrument_infos[note.instrument].name != 'no'
    )

def get_vocab():
    vocab = set(['0 0', '2 0'])
    for file in os.listdir('samples/small'):
        data = midi_file_to_note_sequence('samples/small/' + file)
        total_time = data.time_signatures[1].time
        iteration = 0
        while iteration / note_divisions < total_time:
            continue_notes = [
                note for note in data.notes
                if (not is_start(iteration / note_divisions, note.start_time) and
                is_note_valid(iteration / note_divisions, note, data.instrument_infos))
            ]
            new_notes = [note for note in data.notes if is_start(iteration / note_divisions, note.start_time)]
            for note in continue_notes:
                vocab.add(str(3) + ' ' + str(note.pitch))
            for note in new_notes:
                vocab.add(str(1) + ' ' + str(note.pitch))
            iteration += 1
    vocab = list(vocab)
    vocab.sort()
    pickle.dump(vocab, open('vocab.p', 'wb'))
    return vocab

def get_notes(vocab):
    vocab_dict = {k: v for v, k in enumerate(vocab)}
    notes = []
    for file in os.listdir('samples/small'):
        data = midi_file_to_note_sequence('samples/small/' + file)
        total_time = data.time_signatures[1].time
        iteration = 0
        while iteration / note_divisions < total_time:
            notes.append(vocab_dict['0 0'])
            all_notes = [
                note for note in data.notes
                if is_note_valid(iteration / note_divisions, note, data.instrument_infos)
            ]
            all_notes.sort(key=lambda note: note.pitch, reverse=True)
            all_notes.sort(key=lambda note: note.instrument)
            prev_instrument = 0
            for note in all_notes:
                if note.instrument != prev_instrument:
                    if prev_instrument == 'p1':
                        if note.instrument == 'tr':
                            notes.append(vocab_dict['2 0'])
                        notes.append(vocab_dict['2 0'])
                    else:
                        notes.append(vocab_dict['2 0'])
                    prev_instrument = note.instrument
                if is_start(iteration / note_divisions, note.start_time):
                    notes.append(vocab_dict[str(1) + ' ' + str(note.pitch)])
                else:
                    notes.append(vocab_dict[str(3) + ' ' + str(note.pitch)])
            iteration += 1
    pickle.dump(notes, open('notes.p', 'wb'))
    return notes

def prepare_sequences(notes, n_vocab):
    """ Prepare the sequences used by the Neural Network """
    sequence_length = 16

    network_input = []
    network_output = []

    for i in range(0, len(notes) - sequence_length, 1):
        network_input.append(notes[i:i + sequence_length])
        network_output.append(notes[i + sequence_length])

    n_patterns = len(network_input)

    # reshape the input into a format compatible with LSTM layers
    network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    
    # normalize input
    network_input = network_input / float(n_vocab)

    network_output = to_categorical(network_output)

    return (network_input, network_output)

In [85]:
def train_network():
    """ Train a Neural Network to generate music """
    vocab = get_vocab()
    notes = get_notes(vocab)
    n_vocab = len(vocab)
    network_input, network_output = prepare_sequences(notes, n_vocab)
    
    print('Done preparing sequences!')
    
    model = create_network(network_input, n_vocab)
    
    model.summary()
 
    checkpoint = ModelCheckpoint(
        "weights.hdf5",
        monitor='loss',
        verbose=0,
        save_best_only=True,
        mode='min'
    )
    
    callbacks_list = [checkpoint]
    
    # Your line of code here
    model.fit(x=network_input, y=network_output, epochs=2, callbacks=callbacks_list)

In [86]:
train_network()

Done preparing sequences!
Model: "sequential_13"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_39 (Bidirectio (None, 16, 512)           528384    
_________________________________________________________________
bidirectional_40 (Bidirectio (None, 16, 512)           1574912   
_________________________________________________________________
bidirectional_41 (Bidirectio (None, 512)               1574912   
_________________________________________________________________
batch_normalization_26 (Batc (None, 512)               2048      
_________________________________________________________________
dropout_26 (Dropout)         (None, 512)               0         
_________________________________________________________________
dense_26 (Dense)             (None, 128)               65664     
_________________________________________________________________
activation_13 (Activation) 

In [87]:
def prepare_sequences_prediction(notes, n_vocab):
    sequence_length = 16
    network_input = []
    for i in range(0, len(notes) - sequence_length, 1):
        network_input.append(notes[i:i + sequence_length])

    n_patterns = len(network_input)

    # reshape the input into a format compatible with LSTM layers
    normalized_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    # normalize input
    normalized_input = normalized_input / float(n_vocab)

    return (network_input, normalized_input)

def generate_notes(model, network_input, vocab, n_vocab):
    """ Generate notes from the neural network based on a sequence of notes """
    # Starts the melody by picking a random sequence from the input as a starting point
    start = np.random.randint(0, len(network_input)-1)

    int_to_note = dict((number, note) for number, note in enumerate(vocab))

    pattern = network_input[start]
    prediction_output = []

    for note_index in range(200):
        prediction_input = np.reshape(pattern, (1, len(pattern), 1))
        prediction_input = prediction_input / float(n_vocab)

        
        ### Copy the line below from your above implementation.
        prediction = model.predict(prediction_input)

        index = np.random.choice(prediction.shape[1], p=(prediction[0] / sum(prediction[0])))
        result = int_to_note[index]
        prediction_output.append(result)

        pattern.append(index)
        pattern = pattern[1:len(pattern)]

    return prediction_output

In [88]:
def generate():
    vocab = pickle.load(open('vocab.p', 'rb'))
    notes = pickle.load(open('notes.p', 'rb'))
    n_vocab = len(vocab)

    network_input, normalized_input = prepare_sequences_prediction(notes, n_vocab)
    model = create_network(normalized_input, n_vocab)
    
    ### Add a line to load the weights here
    
    model.load_weights('weights.hdf5')
    
    prediction_output = generate_notes(model, network_input, vocab, n_vocab)
    return prediction_output[100:200]

In [10]:
# generate()
datas = []

for file in os.listdir('samples/small'):
    data = midi_file_to_note_sequence('samples/small/' + file)
    datas.append(data)
note_seq.plot_sequence(datas[0])