<a href="https://colab.research.google.com/github/juyee1698/Electra/blob/main/Music_Generation(Bi_LSTM).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Importing libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

import glob
import pickle

import tensorflow as tf

from music21 import converter, instrument, stream, note, chord

#Run version 2.1.6
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, Activation, Bidirectional, Flatten
from keras import utils
from keras.callbacks import ModelCheckpoint
from keras_self_attention import SeqSelfAttention
from keras.layers import GlobalMaxPooling1D, MaxPooling1D


In [None]:
from tensorflow.keras.utils import to_categorical

In [None]:
!pip install keras_self_attention

# Data Preparation & Model training

In [None]:
def train_network(notes, n_vocab):
    """ Train a Neural Network to generate music """
    network_input, network_output = prepare_sequences(notes, n_vocab)

    model = create_network(network_input, n_vocab)

    train(model, network_input, network_output)

In [None]:
network_input, network_output = prepare_sequences(notes, n_vocab)

## Build the notes and chords object for each instrument

In [None]:
def get_notes():
    notes = []
    durations = []

    for file in glob.glob("/content/park/*.mid"):
        midi = converter.parse(file)

        print("Parsing %s" % file)

        notes_to_parse = None

        try: # file has instrument parts
            s2 = instrument.partitionByInstrument(midi) #Currently we are only working with piano
            for part in s2:
              if isinstance(part.getInstrument(), instrument.Piano):
                print("True")
                notes_to_parse = part.recurse()
                print(len(notes_to_parse))
            #notes_to_parse = s2.parts[0].recurse()
        except: # file has notes in a flat structure
            notes_to_parse = midi.flat.notes
            print(len(notes_to_parse))

        try:
          for element in notes_to_parse:
              if isinstance(element, note.Note):
                  notes.append(str(element.pitch) + " " +  str(element.quarterLength))
              elif isinstance(element, chord.Chord):
                  notes.append('.'.join(str(n) for n in element.normalOrder) + " " + str(element.quarterLength))
              elif isinstance(element, note.Rest):
                  notes.append(str(element.name)  + " " + str(element.quarterLength))
        except:
          continue

    print("Notes",notes)

    return notes

## Prepare input and output sequences for the model to learn

In [None]:
def prepare_sequences(notes, n_vocab):
    """ Prepare the sequences used by the Neural Network """
    sequence_length = 100 #Track will be divided into sequences of 100 notes/chords for the model to learn

    # get all pitch names
    pitchnames = sorted(set(item for item in notes))

    #print(pitchnames)

     # create a dictionary to map pitches to integers
    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

    network_input = []
    network_output = []

    # create input sequences and the corresponding outputs
    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])
        network_output.append(note_to_int[sequence_out])
        #print("Network Output ",network_output)

    n_patterns = len(network_input)
    print(n_patterns)

    # reshape the input into a format compatible with LSTM layers
    network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))

    # normalize input
    network_input = network_input / float(n_vocab)

    print(len(set(network_output)))
    network_output = to_categorical(network_output)
    print(len(network_output))
    print(network_input.shape[1],network_input.shape[2])
    print(network_output.shape)

    return (network_input, network_output)

## Build and configure the ML model

In [None]:
def create_network(network_input, n_vocab):
    """ create the structure of the neural network """
    model = Sequential()
    model.add(Bidirectional(LSTM(512,
        input_shape=(network_input.shape[1], network_input.shape[2]), #n_time_steps, n_features?
        return_sequences=True)))
    model.add(SeqSelfAttention(attention_activation='sigmoid'))
    model.add(Dropout(0.3))

    model.add(LSTM(512,return_sequences=True))
    model.add(Dropout(0.3))

    #model.add(Flatten()) #Supposedly needed to fix stuff before dense layer
    model.add(GlobalMaxPooling1D())
    model.add(Dense(n_vocab))
    model.add(Activation('softmax'))
    learning_rate = 0.005
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer)
    #model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(
    #      from_logits=True), optimizer=optimizer)

    #model.summary()

    return model

def train(model, network_input, network_output):
    """ train the neural network """
    filepath = os.path.abspath("weights-3LSTMAttLayer-final.hdf5")
    checkpoint = ModelCheckpoint(
        filepath,
        period=10, #Every 10 epochs
        monitor='loss',
        verbose=1,
        patience=5,
        save_weights_only=True,
        restore_best_weights=True,
        mode='min'
    )
    callbacks_list = [checkpoint]

    model.fit(network_input, network_output, epochs=200, batch_size=32, callbacks=callbacks_list)

In [None]:
#load files in
notes = get_notes()

# get amount of pitch names
n_vocab = len(set(notes))

In [None]:
n_vocab

693

## Train the Model

In [None]:
#train
train_network(notes, n_vocab)

In [None]:
# model = Sequential()
# model.add(Bidirectional(LSTM(512,return_sequences=True),input_shape=(network_input.shape[1], network_input.shape[2]))) #n_time_steps, n_features? Needed input_shape in first layer, which is Bid not LSTM
# model.add(SeqSelfAttention(attention_activation='sigmoid'))
# model.add(Dropout(0.3))

# model.add(LSTM(512,return_sequences=True))
# model.add(Dropout(0.3))

# #model.add(Flatten()) #Supposedly needed to fix stuff before dense layer
# model.add(GlobalMaxPooling1D())
# model.add(Dense(n_vocab))
# model.add(Activation('softmax'))
# model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
# model.load_weights('/content/weights-3LSTMAttLayer-final.hdf5')
# train(model, network_input, network_output)

# Generate output sequence

In [None]:
def generate():
    """ Generate a piano midi file """
    #load the notes used to train the model
    # with open('data/notes', 'rb') as filepath:
    #     notes = pickle.load(filepath)

    # Get all pitch names
    pitchnames = sorted(set(item for item in notes))
    # Get all pitch names
    n_vocab = len(set(notes))

    network_input, normalized_input = prepare_sequences_output(notes, pitchnames, n_vocab)
    model = create_network_add_weights(normalized_input, n_vocab)
    prediction_output = generate_notes(model, network_input, pitchnames, n_vocab)
    create_midi(prediction_output)

In [None]:
def prepare_sequences_output(notes, pitchnames, n_vocab):
    """ Prepare the sequences used by the Neural Network """
    # map between notes and integers and back
    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

    sequence_length = 25
    network_input = []
    output = []
    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])
        output.append(note_to_int[sequence_out])

    n_patterns = len(network_input)

    # reshape the input into a format compatible with LSTM layers
    normalized_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    # normalize input
    normalized_input = normalized_input / float(n_vocab)

    return (network_input, normalized_input)

In [None]:
def create_network_add_weights(network_input, n_vocab):
    """ create the structure of the neural network """
    model = Sequential()

    model.add(Bidirectional(LSTM(512,return_sequences=True),input_shape=(network_input.shape[1], network_input.shape[2]))) #n_time_steps, n_features? Needed input_shape in first layer, which is Bid not LSTM
    model.add(SeqSelfAttention(attention_activation='sigmoid'))
    model.add(Dropout(0.3))

    #model.add(LSTM(512,return_sequences=True))
    #model.add(Dropout(0.3))

    #model.add(Flatten()) #Supposedly needed to fix stuff before dense layer
    model.add(GlobalMaxPooling1D())
    model.add(Dense(n_vocab))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

    # Load the weights to each node
    model.load_weights('/content/weights-3LSTMAttLayer-final.hdf5')

    return model


Model uses its weights to predict the next note in the sequence based on the previous sequences

In [None]:
def generate_notes(model, network_input, pitchnames, n_vocab):
    """ Generate notes from the neural network based on a sequence of notes """
    # pick a random sequence from the input as a starting point for the prediction
    start = np.random.randint(0, len(network_input)-1)

    int_to_note = dict((number, note) for number, note in enumerate(pitchnames))

    pattern = network_input[start]
    prediction_output = []

    # generate 500 notes
    for note_index in range(150):
        prediction_input = np.reshape(pattern, (1, len(pattern), 1))
        prediction_input = prediction_input / float(n_vocab)

        prediction = model.predict(prediction_input, verbose=0)

        index = np.argmax(prediction)
        result = int_to_note[index]
        prediction_output.append(result)

        pattern.append(index)
        pattern = pattern[1:len(pattern)]

    print(prediction_output)

    return prediction_output

# Create MIDI file

In [None]:
def create_midi(prediction_output):
    """ convert the output from the prediction to notes and create a midi file
        from the notes """
    offset = 0
    output_notes = []
    output_notes.append(instrument.Piano())
    # create note and chord objects based on the values generated by the model
    for pattern in prediction_output:
        pattern = pattern.split()
        temp = pattern[0]
        duration = pattern[1]
        pattern = temp
        # pattern is a chord
        if ('.' in pattern) or pattern.isdigit():
            notes_in_chord = pattern.split('.')
            notes = []
            for current_note in notes_in_chord:
                new_note = note.Note(int(current_note))
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)
        # pattern is a rest
        elif('rest' in pattern):
            new_rest = note.Rest(pattern)
            new_rest.offset = offset
            new_rest.storedInstrument = instrument.Piano() #???
            output_notes.append(new_rest)
        # pattern is a note
        else:
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            output_notes.append(new_note)
        # increase offset each iteration so that notes do not stack
        offset += 0.5

    midi_stream = stream.Stream(output_notes)
    midi_stream.write('midi', fp='piano_comp_29092021.mid')

#From: https://stackoverflow.com/questions/1806278/convert-fraction-to-float
def convert_to_float(frac_str):
    try:
        return float(frac_str)
    except ValueError:
        num, denom = frac_str.split('/')
        try:
            leading, num = num.split(' ')
            whole = float(leading)
        except ValueError:
            whole = 0
        frac = float(num) / float(denom)
        return whole - frac if whole < 0 else whole + frac

In [None]:
generate()

['9.1 0.5', 'B5 7/3', 'F6 0.75', 'E3 2.0', 'B-5 4/3', '2.6.9 2.5', 'G#5 1.5', 'B-4 1/3', 'B4 8.25', 'C#5 0.75', '7.11 5/3', '8.11 2.0', '0.2 2.0', '6.8 1/3', 'E-4 4.0', 'F#2 3.0', '6.9.1 1.0', 'C4 1.75', '2.6 2.0', 'F#5 3.0', '8.11 2.0', '4.7 4.0', 'C5 4.0', '1 1.0', 'E4 4.0', '9.0 6.0', 'B-2 0.75', 'E3 7.0', 'B-3 1.5', '7.11 4/3', 'G#5 4.0', 'E4 4.0', '4.9 1.5', '2.7 1.0', '2.7 1.25', '7.11 0.25', 'B2 0.75', '8.10 1.0', 'G2 2.75', '4.8 2.0', '2 7.75', '11.1 5.0', 'F#2 3.0', '5.7 2.0', '1.4.8 3.0', 'B2 0.75', '0.2.7 4.0', '2.7 1.25', '7.0 0.25', '2.7 5/3', 'E4 4.0', '0.4 1.0', '5.10 2.0', 'B2 0.75', '0.4.7 4.0', 'E4 4.0', '1.6 3.0', '6.11 4.0', '6.11 4.0', 'C3 3.0', '9.0 6.0', '11.1.6 3.0', 'E2 5.0', '6.11 2.75', '0.2.7 4.0', '1 0.5', '4.9 4/3', '2.7 5/3', '11 7.0', '7.0 0.25', '8.10 1.0', '8.10 1.0', 'E3 3.75', '4.8 2.0', 'C4 1.75', '1 1.0', '7.0 0.5', 'E3 3.75', 'G5 3.5', 'C#6 2.0', '10 5.0', '7.10 1.0', '5.10 1.5', '4.9 4/3', 'E5 7/3', 'B2 0.75', 'C4 1.75', 'G#5 1.5', 'E5 3.25', '6.

In [None]:
def plot_piano_roll(notes: pd.DataFrame, count: Optional[int] = None):
  if count:
    title = f'First {count} notes'
  else:
    title = f'Whole track'
    count = len(notes['pitch'])
  plt.figure(figsize=(20, 4))
  plot_pitch = np.stack([notes['pitch'], notes['pitch']], axis=0)
  plot_start_stop = np.stack([notes['start'], notes['end']], axis=0)
  plt.plot(
      plot_start_stop[:, :count], plot_pitch[:, :count], color="b", marker=".")
  plt.xlabel('Time [s]')
  plt.ylabel('Pitch')
  _ = plt.title(title)