# Multi-layer LSTM for Music Generation (with Magenta)

In [1]:
import os
import pickle
import numpy as np
import tensorflow as tf
import note_seq
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, BatchNormalization, Dropout, Dense, Activation, Softmax
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import ModelCheckpoint
from note_seq.midi_io import midi_file_to_note_sequence

note_divisions = 16
sequence_length = 16

In [2]:
def create_network(network_input, n_vocab):
    """ Define constants """
    hidden_layers = 256
    dropout = 0.4
    
    """ Initializing model """
    model = Sequential()
    
    """ Adding LSTM Layers to Model """
    model.add(
        Bidirectional(
            LSTM(
                hidden_layers,
                dropout=dropout,
                return_sequences=True
            ),
            input_shape=(network_input.shape[1], network_input.shape[2])
        )
    )
    model.add(
        Bidirectional(
            LSTM(
                hidden_layers,
                dropout=dropout,
                return_sequences=True
            )
        )
    )
    model.add(
        Bidirectional(
            LSTM(
                hidden_layers,
                dropout=dropout
            )
        )
    )
    
    """ Add other layers after LSTM """
    model.add(BatchNormalization())
    model.add(Dropout(dropout))
    model.add(Dense(hidden_layers // 2))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(Dropout(dropout))
    model.add(Dense(n_vocab))
    model.add(Softmax())
    
    """ Define the loss function for the model """
    model.compile(optimizer='adam', loss='categorical_crossentropy')
    
    return model
    

In [3]:
# vocab -4 is next channel, vocab -3 is new note, vocab -2 is next step, vocab -1 is continue note, other vocab pitches

def is_start(target, start):
    return target + (0.5 / note_divisions) > start and target - (0.5 / note_divisions) < start

def is_note_valid(target, note, instrument_infos):
    return (
        note.end_time - note.start_time >= 1 / note_divisions and
        target - (0.5 / note_divisions) < note.end_time and
        target + (0.5 / note_divisions) > note.start_time and
        instrument_infos[note.instrument].name != 'no'
    )

def get_vocab():
    vocab = set(['NXT_CHNL', 'NEW_NOTE', 'NXT_STEP', 'CNT_NOTE'])
    for file in os.listdir('samples/small'):
        data = midi_file_to_note_sequence('samples/small/' + file)
        total_time = data.time_signatures[1].time
        iteration = 0
        while iteration / note_divisions < total_time:
            notes = [
                note for note in data.notes
                if is_note_valid(iteration / note_divisions, note, data.instrument_infos)
            ]
            for note in notes:
                vocab.add(str(note.pitch))
            iteration += 1
    vocab = list(vocab)
    vocab.sort()
    pickle.dump(vocab, open('vocab.p', 'wb'))
    return vocab

def get_notes(vocab):
    vocab_dict = {k: v for v, k in enumerate(vocab)}
    notes = []
    for file in os.listdir('samples/small'):
        data = midi_file_to_note_sequence('samples/small/' + file)
        total_time = data.time_signatures[1].time
        iteration = 0
        while iteration / note_divisions < total_time:
            notes.append(vocab_dict['NXT_CHNL'])
            all_notes = [
                note for note in data.notes
                if is_note_valid(iteration / note_divisions, note, data.instrument_infos)
            ]
            all_notes.sort(key=lambda note: note.pitch, reverse=True)
            all_notes.sort(key=lambda note: data.instrument_infos[note.instrument].name)
            prev_instrument = None
            for note in all_notes:
                curr_instrument = data.instrument_infos[note.instrument].name
                if curr_instrument != prev_instrument:
                    if not prev_instrument or prev_instrument == 'p1':
                        if curr_instrument == 'tr':
                            notes.append(vocab_dict['NXT_STEP'])
                        if curr_instrument != 'p1':
                            notes.append(vocab_dict['NXT_STEP'])
                    else:
                        notes.append(vocab_dict['NXT_STEP'])
                    prev_instrument = curr_instrument
                if is_start(iteration / note_divisions, note.start_time):
                    notes.append(vocab_dict['NEW_NOTE'])
                    notes.append(vocab_dict[str(note.pitch)])
                else:
                    notes.append(vocab_dict['CNT_NOTE'])
                    notes.append(vocab_dict[str(note.pitch)])
            if not prev_instrument or prev_instrument == 'p1':
                notes.append(vocab_dict['NXT_STEP'])
                notes.append(vocab_dict['NXT_STEP'])
            elif prev_instrument == 'p2':
                notes.append(vocab_dict['NXT_STEP'])
            iteration += 1
    pickle.dump(notes, open('notes.p', 'wb'))
    return notes

def prepare_sequences(notes, n_vocab):
    """ Prepare the sequences used by the Neural Network """

    network_input = []
    network_output = []

    for i in range(0, len(notes) - sequence_length, 1):
        network_input.append(notes[i:i + sequence_length])
        network_output.append(notes[i + sequence_length])

    n_patterns = len(network_input)

    # reshape the input into a format compatible with LSTM layers
    network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    
    # normalize input
    network_input = network_input / float(n_vocab)

    network_output = to_categorical(network_output)

    return (network_input, network_output)

In [4]:
def train_network():
    """ Train a Neural Network to generate music """
    vocab = get_vocab()
    notes = get_notes(vocab)
    n_vocab = len(vocab)
    network_input, network_output = prepare_sequences(notes, n_vocab)
    
    print('Done preparing sequences!')
    
    model = create_network(network_input, n_vocab)
    
    model.summary()
 
    checkpoint = ModelCheckpoint(
        "weights.hdf5",
        monitor='loss',
        verbose=0,
        save_best_only=True,
        mode='min'
    )
    
    callbacks_list = [checkpoint]
    
    # Your line of code here
    model.fit(x=network_input, y=network_output, epochs=20, callbacks=callbacks_list)

In [5]:
train_network()

Done preparing sequences!
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional (Bidirectional (None, 16, 512)           528384    
_________________________________________________________________
bidirectional_1 (Bidirection (None, 16, 512)           1574912   
_________________________________________________________________
bidirectional_2 (Bidirection (None, 512)               1574912   
_________________________________________________________________
batch_normalization (BatchNo (None, 512)               2048      
_________________________________________________________________
dropout (Dropout)            (None, 512)               0         
_________________________________________________________________
dense (Dense)                (None, 128)               65664     
_________________________________________________________________
activation (Activation)      (

In [6]:
def prepare_sequences_prediction(notes, n_vocab):
    network_input = []
    for i in range(0, len(notes) - sequence_length, 1):
        network_input.append(notes[i:i + sequence_length])

    n_patterns = len(network_input)

    # reshape the input into a format compatible with LSTM layers
    normalized_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    # normalize input
    normalized_input = normalized_input / float(n_vocab)

    return (network_input, normalized_input)

def generate_notes(model, network_input, vocab, n_vocab):
    """ Generate notes from the neural network based on a sequence of notes """
    # Starts the melody by picking a random sequence from the input as a starting point
    start = np.random.randint(0, len(network_input)-1)

    int_to_note = dict((number, note) for number, note in enumerate(vocab))

    pattern = network_input[start]
    prediction_output = []

    for note_index in range(1000):
        prediction_input = np.reshape(pattern, (1, len(pattern), 1))
        prediction_input = prediction_input / float(n_vocab)

        
        ### Copy the line below from your above implementation.
        prediction = model.predict(prediction_input)

        index = np.random.choice(prediction.shape[1], p=(prediction[0] / sum(prediction[0])))
        result = int_to_note[index]
        prediction_output.append(result)

        pattern.append(index)
        pattern = pattern[1:len(pattern)]

    return prediction_output

In [7]:
def generate():
    vocab = pickle.load(open('vocab.p', 'rb'))
    notes = pickle.load(open('notes.p', 'rb'))
    n_vocab = len(vocab)

    network_input, normalized_input = prepare_sequences_prediction(notes, n_vocab)
    model = create_network(normalized_input, n_vocab)
    
    ### Add a line to load the weights here
    
    model.load_weights('weights.hdf5')
    
    prediction_output = generate_notes(model, network_input, vocab, n_vocab)
    return prediction_output[400:500]

In [8]:
generate()

['NXT_STEP',
 'NXT_STEP',
 'NXT_STEP',
 'NXT_STEP',
 'NXT_STEP',
 'NXT_STEP',
 'NXT_STEP',
 'NXT_STEP',
 'NXT_STEP',
 'CNT_NOTE',
 'NXT_STEP',
 'NXT_STEP',
 'NXT_STEP',
 'NEW_NOTE',
 'NEW_NOTE',
 'NXT_STEP',
 'NEW_NOTE',
 'NEW_NOTE',
 'NEW_NOTE',
 'NXT_STEP',
 'NXT_STEP',
 'NEW_NOTE',
 'NXT_STEP',
 'NEW_NOTE',
 'NXT_STEP',
 'NXT_STEP',
 'NXT_STEP',
 'NXT_STEP',
 'NXT_STEP',
 'CNT_NOTE',
 'NXT_STEP',
 'NEW_NOTE',
 'NXT_STEP',
 'NEW_NOTE',
 'NEW_NOTE',
 'NEW_NOTE',
 'NXT_STEP',
 'NXT_STEP',
 'NXT_STEP',
 'NXT_STEP',
 'NXT_STEP',
 'NXT_STEP',
 'CNT_NOTE',
 'NEW_NOTE',
 'CNT_NOTE',
 'NXT_STEP',
 'NXT_STEP',
 'NXT_STEP',
 'NEW_NOTE',
 'NEW_NOTE',
 'CNT_NOTE',
 'NXT_STEP',
 'NXT_STEP',
 'NEW_NOTE',
 'NEW_NOTE',
 'NXT_STEP',
 'NXT_STEP',
 'CNT_NOTE',
 'NXT_STEP',
 'NXT_STEP',
 'NXT_STEP',
 'NXT_STEP',
 'NXT_STEP',
 'NXT_STEP',
 'NEW_NOTE',
 'NEW_NOTE',
 'NXT_STEP',
 'NEW_NOTE',
 'NEW_NOTE',
 'NXT_STEP',
 'NEW_NOTE',
 'NXT_STEP',
 'NXT_STEP',
 'NXT_STEP',
 'NEW_NOTE',
 'NXT_STEP',
 'NEW_NOTE',

In [10]:
vocab = get_vocab()
notes = get_notes(vocab)
vocab_dict = {k: v for k, v in enumerate(vocab)}
notes_list = [vocab_dict[note] for note in notes]
notes_list[:100]

['NXT_CHNL',
 'NXT_STEP',
 'NXT_STEP',
 'NXT_CHNL',
 'NXT_STEP',
 'NEW_NOTE',
 '62',
 'NXT_STEP',
 'NXT_CHNL',
 'NEW_NOTE',
 '62',
 'NXT_STEP',
 'CNT_NOTE',
 '62',
 'NXT_STEP',
 'NXT_CHNL',
 'CNT_NOTE',
 '62',
 'NXT_STEP',
 'CNT_NOTE',
 '62',
 'NXT_STEP',
 'NXT_CHNL',
 'CNT_NOTE',
 '62',
 'NXT_STEP',
 'CNT_NOTE',
 '62',
 'NXT_STEP',
 'NXT_CHNL',
 'CNT_NOTE',
 '62',
 'NXT_STEP',
 'CNT_NOTE',
 '62',
 'NXT_STEP',
 'NXT_CHNL',
 'CNT_NOTE',
 '62',
 'NXT_STEP',
 'CNT_NOTE',
 '62',
 'NXT_STEP',
 'NXT_CHNL',
 'CNT_NOTE',
 '62',
 'NXT_STEP',
 'CNT_NOTE',
 '62',
 'NXT_STEP',
 'NEW_NOTE',
 '50',
 'NXT_CHNL',
 'CNT_NOTE',
 '62',
 'NXT_STEP',
 'CNT_NOTE',
 '62',
 'NXT_STEP',
 'CNT_NOTE',
 '50',
 'NXT_CHNL',
 'CNT_NOTE',
 '62',
 'NXT_STEP',
 'CNT_NOTE',
 '62',
 'NXT_STEP',
 'CNT_NOTE',
 '50',
 'NXT_CHNL',
 'CNT_NOTE',
 '62',
 'NXT_STEP',
 'CNT_NOTE',
 '62',
 'NXT_STEP',
 'CNT_NOTE',
 '50',
 'NXT_CHNL',
 'CNT_NOTE',
 '62',
 'NXT_STEP',
 'CNT_NOTE',
 '62',
 'NXT_STEP',
 'CNT_NOTE',
 '50',
 'NXT_CHNL',