# Music Generator

In [115]:
import torch
from torch import nn
import numpy as np
import scipy.signal
# from keras.utils import np_utils
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization, Activation
import pickle
import glob 
%matplotlib inline
import matplotlib.pyplot as plt
%config InlineBackend.figure_format='retina'
import music21

# for making midi file
import mido
from mido import MidiFile, MidiTrack, Message

In [116]:
from music21 import converter, instrument, note, chord
notes = []
for file in glob.glob("midi_songs/*.mid"):
    midi = converter.parse(file)
    notes_to_parse = None
    parts = instrument.partitionByInstrument(midi)
    if parts: # file has instrument parts
        notes_to_parse = parts.parts[0].recurse()
    else: # file has notes in a flat structure
        notes_to_parse = midi.flat.notes
    for element in notes_to_parse:
        if isinstance(element, note.Note):
            notes.append(str(element.pitch))
        elif isinstance(element, chord.Chord):
            notes.append('.'.join(str(n) for n in element.normalOrder))

print("len:", len(notes))
print(notes[:50])

len: 45976
['F3', 'F3', 'B-4', 'F3', 'G4', 'G#4', 'F4', 'F3', 'G4', 'G3', 'E-4', 'G#3', 'F3', 'F5', 'G5', 'G#5', 'F3', 'B-5', 'G5', 'G#5', 'F3', 'B-5', 'C6', 'B-5', 'F3', 'E-6', 'C#4', 'F6', 'C4', 'F3', 'F3', 'B-4', 'F3', 'G4', 'G#4', 'F4', 'F3', 'G4', 'G3', 'E-4', 'G#3', 'F3', 'F5', 'G5', 'G#5', 'F3', 'B-5', 'G5', 'G#5', 'F3']


In [117]:
sequence_length = 50
# get all pitch names
pitchnames = sorted(set(item for item in notes))
# create a dictionary to map pitches to integers
note_to_int = dict((note, number) for number, note in enumerate(pitchnames))
network_input = []
network_output = []
# create input sequences and the corresponding outputs
for i in range(0, len(notes) - sequence_length, 1):
    sequence_in = notes[i:i + sequence_length]
    # print("in: ", sequence_in)
    sequence_out = notes[i + sequence_length]
    # print("out: ", sequence_out)
    network_input.append([note_to_int[char] for char in sequence_in])
    network_output.append(note_to_int[sequence_out])
    print("network_output: ", network_output)
n_patterns = len(network_input)
# reshape the input into a format compatible with LSTM layers
network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
# normalize input
network_input = network_input / float(len(pitchnames))
network_output = to_categorical(network_output, num_classes=len(pitchnames))


print("network_input: ", network_input, "shape: ", network_input.shape)

print("network_output: ", network_output, "shape: ", network_output.shape)

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



network_input:  [[[0.95092025]
  [0.95092025]
  [0.8006135 ]
  ...
  [0.99386503]
  [0.97546012]
  [0.95092025]]

 [[0.95092025]
  [0.8006135 ]
  [0.95092025]
  ...
  [0.97546012]
  [0.95092025]
  [0.80368098]]

 [[0.8006135 ]
  [0.95092025]
  [0.99079755]
  ...
  [0.95092025]
  [0.80368098]
  [0.86196319]]

 ...

 [[0.47546012]
  [0.79447853]
  [0.89570552]
  ...
  [0.33128834]
  [0.86196319]
  [0.94785276]]

 [[0.79447853]
  [0.89570552]
  [0.85276074]
  ...
  [0.86196319]
  [0.94785276]
  [0.33128834]]

 [[0.89570552]
  [0.85276074]
  [0.94785276]
  ...
  [0.94785276]
  [0.33128834]
  [0.86196319]]] shape:  (45926, 50, 1)
network_output:  [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]] shape:  (45926, 326)


In [118]:
def create_network(network_input, n_vocab):
    """ create the structure of the neural network """
    model = Sequential()
    model.add(LSTM(
        512,
        input_shape=(network_input.shape[1], network_input.shape[2]),
        recurrent_dropout=0.3,
        return_sequences=True
    ))
    model.add(LSTM(512, return_sequences=True, recurrent_dropout=0.3,))
    model.add(LSTM(512))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    model.add(Dense(n_vocab, activation='softmax'))  # Ensure this matches the number of unique notes
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

    # Load the weights to each node
    # model.load_weights('weights.hdf5')

    return model

n_vocab = len(network_input)
model = create_network(network_input, n_vocab)
print("model: ", model)

model:  <Sequential name=sequential_14, built=True>


In [125]:
import numpy as np

def generate_notes(model, network_input, pitchnames, n_vocab):
    """Generate notes from the neural network based on a sequence of notes"""
    start = np.random.randint(0, len(network_input)-1)
    int_to_note = dict((number, note) for number, note in enumerate(pitchnames))

    # Make sure to copy the initial pattern to avoid altering the original input
    pattern = list(network_input[start].flatten())  # Flatten and convert to list to ensure it is mutable and 1D
    prediction_output = []

    # Generate 500 notes
    for note_index in range(500):
        # Ensure the pattern is a 2D array with shape (1, length of pattern, 1)
        prediction_input = np.reshape(pattern, (1, len(pattern), 1))
        prediction_input = prediction_input / float(n_vocab)

        prediction = model.predict(prediction_input, verbose=0)
        index = np.argmax(prediction)
        result = int_to_note[index]
        prediction_output.append(result)

        # Append the predicted index and ensure the list is kept to the same length
        pattern.append(index)
        pattern = pattern[1:]  # Slide the window of the pattern

    return prediction_output

# Example usage, assuming model, network_input, pitchnames, n_vocab are defined and initialized correctly
prediction_output = generate_notes(model, network_input, pitchnames, n_vocab)
print(len(prediction_output))
print(prediction_output)


500
['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 

In [126]:
def prepare_sequences(notes, pitchnames, n_vocab):
    """ Prepare the sequences used by the Neural Network """
    # map between notes and integers and back
    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

    sequence_length = 100
    network_input = []
    output = []
    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])
        output.append(note_to_int[sequence_out])

    n_patterns = len(network_input)

    # reshape the input into a format compatible with LSTM layers
    normalized_input = numpy.reshape(network_input, (n_patterns, sequence_length, 1))
    # normalize input
    normalized_input = normalized_input / float(n_vocab)

    return (network_input, normalized_input)


In [127]:
from music21 import stream, note


def create_midi(prediction_output):
    """ convert the output from the prediction to notes and create a midi file
        from the notes """
    offset = 0
    output_notes = []

    # create note and chord objects based on the values generated by the model
    for pattern in prediction_output:
        # pattern is a chord
        if ('.' in pattern) or pattern.isdigit():
            notes_in_chord = pattern.split('.')
            notes = []
            for current_note in notes_in_chord:
                new_note = note.Note(int(current_note))
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)
        # pattern is a note
        else:
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            output_notes.append(new_note)

        # increase offset each iteration so that notes do not stack
        offset += 0.5

    midi_stream = stream.Stream(output_notes)

    midi_stream.write('midi', fp='test_output.mid')

In [128]:
  create_midi(prediction_output)