In [42]:
# Misc
import pickle

# Data manipulation
import numpy as np
import pandas as pd

# Music
import music21 as m21
from music21 import converter, instrument, note, chord, stream

# Data Visualiation
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import seaborn as sns

# System
import os
import random
import shutil

# Performance metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import BatchNormalization as BatchNorm
from tensorflow.keras import utils as np_utils
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow.keras.regularizers as regularizers
from tensorflow.keras.utils import to_categorical


In [None]:
## Create a train set ##

In [2]:
source_dir = "../../data_raw/"

# Define the percentage of data to use to training the modle 
train_percentage = 90  


train_dir = "../../data_split/train/"
test_dir = "../../data_split/test/"


os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)


all_files = os.listdir(source_dir)

num_train_files = int(len(all_files) * (train_percentage / 100))


random.shuffle(all_files)

# Split the files into training and testing sets
train_files = all_files[:num_train_files]
test_files = all_files[num_train_files:]


for file in train_files:
    source_file = os.path.join(source_dir, file)
    destination_file = os.path.join(train_dir, file)
    shutil.copy(source_file, destination_file)


for file in test_files:
    source_file = os.path.join(source_dir, file)
    destination_file = os.path.join(test_dir, file)
    shutil.copy(source_file, destination_file)

In [6]:
## Create the vocabulary ##

In [4]:
def load_midi_into_notes():
    # Extracted notes for each file
    notes = []

    # Save the indexes in notes in which a new composition started to create consistent sequences
    new_composition_indexes = []

    # Load THE !!TRAINING!! MIDI files to create the vocabulary
    input_path = "../../data_split/test"
    for file in os.listdir(input_path):
        file_path = os.path.join(input_path, file)
        
        # Add new index to alert that it's a new composition
        if new_composition_indexes and new_composition_indexes[-1] != len(notes):
            new_composition_indexes.append(len(notes))

        try:
            # Convert the music into a score object
            score = converter.parse(file_path)

            print("Parsing %s" % file)

            elements_in_part = None

            try:  # File has instrument parts
                # Given a score that represents the MIDI, partition it into parts for each unique instrument found
                partitions_by_instrument = instrument.partitionByInstrument(score)
                # Visit all the elements (notes, chords, rests, and more) of each of its internal "measures."
                elements_in_part = partitions_by_instrument.parts[0].recurse()

            except:  # File has notes in a flat structure
                elements_in_part = score.flat.notes

            # Scroll through all the elements (notes or chords) picked up
            for element in elements_in_part:
                # If the element is a note...
                if isinstance(element, note.Note):
                    # Add note to array
                    notes.append(str(element.pitch))
                # If the element is a chord (a set of notes --> e.g., C4 F4)
                elif isinstance(element, chord.Chord):
                    # Extract each note from the chord and insert it into the array in the format Note1.Note2.Note3
                    notes.append('.'.join(str(n) for n in element.normalOrder))

        except Exception as e:
            print(f"Error parsing {file}: {str(e)}")
            continue

    print("✅Loading training done")

    # Save the 'notes' list to a pickle file
    os.makedirs("../../data_vocab/")
    with open("../../data_vocab/notes.pkl", "wb") as filepath:
        pickle.dump(notes, filepath)

    # Return notes and new composition indexes
    return notes, new_composition_indexes


In [5]:
notes, new_composition_indexes = load_midi_into_notes()



Parsing mazrka11.mid
Parsing mazrka41.mid
Parsing mazrka03.mid
Parsing mazrka07.mid




Parsing ch_wal03.mid
Error parsing lechi4.mid: badly formatted midi bytes, got: b'RIFF*\xa4\x00\x00RMIDdata\x1e\xa4\x00\x00'




Parsing chonoc11.mid
Parsing ballade2.mid
Parsing scherzo1.mid
Parsing chop23b.mid
Parsing mazrka18.mid
Parsing algrcrt3.mid
Parsing mazrka02.mid
✅Loading training done


In [6]:
len(notes)

18497

In [7]:
def prepare_sequences(notes, n_vocab, new_composition_indexes):
    """ Prepare the sequences used by the Neural Network """
    sequence_length = 100

    # get all pitch names
    pitchnames = sorted(set(item for item in notes))

    # create a dictionary to map pitches to integers
    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

    network_input = []
    network_output = []

    wait = 0
    # create input sequences and the corresponding outputs
    for i in range(0, len(notes) - sequence_length, 1):

        # if the ground truth index is a note/chord that belongs to a new composition
        if (i + sequence_length) in new_composition_indexes:
            wait = sequence_length - 1
            continue
        if wait != 0:
            wait = wait - 1
            continue

        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]

        network_input.append([note_to_int[char] for char in sequence_in])
        network_output.append(note_to_int[sequence_out])

    n_patterns = len(network_input)

    # reshape the input into a format compatible with LSTM layers
    network_input_training = np.reshape(network_input, (n_patterns, sequence_length, 1))

    # normalize input
    network_input_training = network_input_training / float(n_vocab)
    
    # one-hot encoding of the output
    network_output_training = to_categorical(network_output, num_classes=n_vocab)

    return network_input_training, network_output_training, network_output

In [8]:
notes

['B4',
 '4.9',
 '7',
 'F#4',
 'B3',
 '4.7.11',
 'A2',
 'D5',
 '9.0.4',
 'D5',
 'C5',
 '9.0.4',
 'D3',
 '11.0.4',
 'A4',
 '11.2.4',
 'F#4',
 '9.0.2',
 'G4',
 'G4',
 'G2',
 'C5',
 '2.7',
 'B4',
 'B2',
 '6.9.11.0',
 'B2',
 '11.1',
 'D5',
 'B4',
 '3.6.9.11',
 'E5',
 'E3',
 'F#5',
 'G5',
 '7.11',
 'C6',
 'B5',
 '7.11',
 'B5',
 '7.11',
 'B-5',
 'E3',
 '6.10.1',
 'B-3',
 'A5',
 '6.9.0',
 'A5',
 'E3',
 '3.6.9',
 'E3',
 'A5',
 '4.7',
 'B4',
 'B2',
 '11.0',
 '6.9.11.1',
 '11.2',
 '3.6.9.11',
 '3.6.9.11',
 'E5',
 'E3',
 'G6',
 '6.7',
 '6.7',
 'B3',
 'E6',
 'C6',
 'B5',
 'F#2',
 'B-5',
 'C#6',
 'F#2',
 '11.1.3.4.6',
 'G#5',
 '1.4.6',
 '6.10',
 '6.10',
 'B2',
 '11.3',
 '6.11',
 '6.11',
 'B4',
 'E2',
 'A4',
 '7.11',
 'G3',
 'F#4',
 '4.7.11',
 'A2',
 '4.7.11',
 'D5',
 '9.0.4',
 'C5',
 '9.0.4',
 'D3',
 '11.0.4',
 'A4',
 '11.2.4',
 'D3',
 'F#4',
 'D4',
 '9.0',
 'G4',
 'G4',
 'C5',
 'G2',
 '2.7',
 'B4',
 'B4',
 'B2',
 'C5',
 '6.9.11.1',
 '11.2',
 '3.6.9.11',
 'E5',
 'E3',
 'F#5',
 '7.11',
 'G5',
 'E3',


In [9]:
# get the vocabulary (unique notes)
n_vocab = len(set(notes))
n_vocab 

569

In [12]:
network_input_training, network_output_training, network_output_training_to_plot = prepare_sequences(notes, n_vocab, new_composition_indexes)

In [13]:
print(len(network_input_training))

18397


In [14]:
model = Sequential()
model.add(LSTM(
    int(n_vocab),
    input_shape=(network_input_training.shape[1], network_input_training.shape[2]),
    return_sequences=True,
))


model.add(LSTM(
        n_vocab,
        return_sequences=True,
        recurrent_dropout=0.3,
    ))

model.add(LSTM(int(n_vocab/2)))


model.add(BatchNorm())
model.add(Dropout(0.3))


model.add(Dense(n_vocab))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

In [15]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 100, 569)          1299596   
                                                                 
 lstm_1 (LSTM)               (None, 100, 569)          2592364   
                                                                 
 lstm_2 (LSTM)               (None, 284)               970144    
                                                                 
 batch_normalization (Batch  (None, 284)               1136      
 Normalization)                                                  
                                                                 
 dropout (Dropout)           (None, 284)               0         
                                                                 
 dense (Dense)               (None, 569)               162165    
                                                        

In [40]:
os.makedirs("../../checkpoint_lstm", exist_ok=True)
file_path = "../../checkpoint_lstm/best_weigths.h5"

checkpoint = ModelCheckpoint(
    file_path,
    monitor='accuracy',
    verbose=0,
    save_best_only=True,
    mode='max'
)

es = EarlyStopping(patience=5, restore_best_weights=True)

In [41]:
callbacks_list = [checkpoint,es]

history = model.fit(network_input_training, network_output_training, epochs=200, batch_size=64 ,callbacks=callbacks_list, validation_split=0.2)

Epoch 1/200
 15/230 [>.............................] - ETA: 4:12 - loss: 5.8924 - accuracy: 0.0312

KeyboardInterrupt: 

In [19]:
#load the notes used to train the model
with open('../../data_vocab/notes.pkl', 'rb') as filepath:
    notes = pickle.load(filepath)

In [20]:
 # Get all pitch names
pitchnames = sorted(set(item for item in notes))
# Get all pitch names
n_vocab = len(set(notes))

In [21]:
#prepare the possible sequences to start from

# map between notes and integers and back
note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

sequence_length = 100
network_input = []
output = []
for i in range(0, len(notes) - sequence_length, 1):
    sequence_in = notes[i:i + sequence_length]
    sequence_out = notes[i + sequence_length]
    network_input.append([note_to_int[char] for char in sequence_in])
    output.append(note_to_int[sequence_out])

n_patterns = len(network_input)

# reshape the input into a format compatible with LSTM layers
normalized_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
# normalize input
normalized_input = normalized_input / float(n_vocab)

In [35]:
model.load_weights("../../checkpoint_lstm/best_weigths.h5")

FileNotFoundError: [Errno 2] Unable to open file (unable to open file: name = '../../checkpoint_lstm/best_weigths.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

Generate notes

In [28]:
  # pick a random sequence from the input as a starting point for the prediction
  start = np.random.randint(0, len(network_input)-1)

  int_to_note = dict((number, note) for number, note in enumerate(pitchnames))

  pattern = network_input[25345]
  prediction_output = []

  #save a probability here, useful just for the documentation
  probability_distribution = None

  id_notes = np.arange(0, n_vocab)
  # generate 500 notes
  for note_index in range(500):
      prediction_input = np.reshape(pattern, (1, len(pattern), 1))
      prediction_input = prediction_input / float(n_vocab)

      prediction_probabilities = model.predict(prediction_input, verbose=0)

      #for the documentation
      probability_distribution = prediction_probabilities[0]

      index = np.random.choice(id_notes,1, p = prediction_probabilities[0])
      #print(index)
      result = int_to_note[index[0]]
      prediction_output.append(result)

      pattern.append(index[0])
      pattern = pattern[1:len(pattern)]

IndexError: list index out of range

In [30]:
## Create midi files

In [37]:
offset = 0
output_notes = []

# create note and chord objects based on the values generated by the model
for pattern in prediction_output:
    # pattern is a chord
    if ('.' in pattern) or pattern.isdigit():
        notes_in_chord = pattern.split('.')
        notes = []
        for current_note in notes_in_chord:
            new_note = note.Note(int(current_note))
            new_note.storedInstrument = instrument.Piano()
            notes.append(new_note)
        new_chord = chord.Chord(notes)
        new_chord.offset = offset
        output_notes.append(new_chord)
    # pattern is a note
    else:
        new_note = note.Note(pattern)
        new_note.offset = offset
        new_note.storedInstrument = instrument.Piano()
        output_notes.append(new_note)

    # increase offset each iteration so that notes do not stack
    offset += 0.5

midi_stream = stream.Stream(output_notes)

os.makedirs("../../generated_music_from_lstm", exist_ok=True)

midi_stream.write('midi', fp='../../generated_music_from_lstm')

NameError: name 'prediction_output' is not defined

In [32]:
np.max(probability_distribution)

NameError: name 'probability_distribution' is not defined

In [43]:
figure(figsize=(15, 10), dpi=80)

plt.plot(probability_distribution)  
plt.ylabel('Probability')
plt.xlabel('Data')

NameError: name 'probability_distribution' is not defined

<Figure size 1200x800 with 0 Axes>