In [42]:
# Misc
import pickle

# Data manipulation
import numpy
import pandas as pd

# Music
import music21 as m21
from music21 import converter, instrument, note, chord

# Data Visualiation
import matplotlib.pyplot as plt
import seaborn as sns

# System
import os
import random
import shutil

# Performance metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Tensorflow
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import BatchNormalization as BatchNorm
from tensorflow.keras import utils as np_utils
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow.keras.regularizers as regularizers
from tensorflow.keras.utils import to_categorical


In [None]:
## Create a train set ##

In [43]:
source_dir = "../../data_raw/"

# Define the percentage of data to use to training the modle 
train_percentage = 80  


train_dir = "../../data_split/train/"
test_dir = "../../data_split/test/"


os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)


all_files = os.listdir(source_dir)

num_train_files = int(len(all_files) * (train_percentage / 100))


random.shuffle(all_files)

# Split the files into training and testing sets
train_files = all_files[:num_train_files]
test_files = all_files[num_train_files:]


for file in train_files:
    source_file = os.path.join(source_dir, file)
    destination_file = os.path.join(train_dir, file)
    shutil.copy(source_file, destination_file)


for file in test_files:
    source_file = os.path.join(source_dir, file)
    destination_file = os.path.join(test_dir, file)
    shutil.copy(source_file, destination_file)

In [6]:
## Create the vocabulary ##

In [44]:
def load_midi_into_notes():
    # Extracted notes for each file
    notes = []

    # Save the indexes in notes in which a new composition started to create consistent sequences
    new_composition_indexes = []

    # Load THE !!TRAINING!! MIDI files to create the vocabulary
    input_path = "../../data_split/test"
    for file in os.listdir(input_path):
        file_path = os.path.join(input_path, file)
        
        # Add new index to alert that it's a new composition
        if new_composition_indexes and new_composition_indexes[-1] != len(notes):
            new_composition_indexes.append(len(notes))

        try:
            # Convert the music into a score object
            score = converter.parse(file_path)

            print("Parsing %s" % file)

            elements_in_part = None

            try:  # File has instrument parts
                # Given a score that represents the MIDI, partition it into parts for each unique instrument found
                partitions_by_instrument = instrument.partitionByInstrument(score)
                # Visit all the elements (notes, chords, rests, and more) of each of its internal "measures."
                elements_in_part = partitions_by_instrument.parts[0].recurse()

            except:  # File has notes in a flat structure
                elements_in_part = score.flat.notes

            # Scroll through all the elements (notes or chords) picked up
            for element in elements_in_part:
                # If the element is a note...
                if isinstance(element, note.Note):
                    # Add note to array
                    notes.append(str(element.pitch))
                # If the element is a chord (a set of notes --> e.g., C4 F4)
                elif isinstance(element, chord.Chord):
                    # Extract each note from the chord and insert it into the array in the format Note1.Note2.Note3
                    notes.append('.'.join(str(n) for n in element.normalOrder))

        except Exception as e:
            print(f"Error parsing {file}: {str(e)}")
            continue

    print("✅Loading training done")

    # Save the 'notes' list to a pickle file
    os.makedirs("../../data_vocab/", exist_ok=True)
    with open("../../data_vocab/notes.pkl", "wb") as filepath:
        pickle.dump(notes, filepath)

    # Return notes and new composition indexes
    return notes, new_composition_indexes


In [19]:
notes, new_composition_indexes = load_midi_into_notes()

Parsing mazrka35.mid
Parsing dgffc49.mid
Parsing chopol14.mid
Parsing rondo73.mid
Parsing contreda.mid
Parsing mk_chim2.mid
Parsing chonoc14.mid
Parsing choval12.mid
Parsing chonoc17.mid
Parsing mazrka11.mid
Parsing mazrka04.mid
Parsing mazrka06.mid
Parsing chpnimpu.mid
Parsing chopineb.mid
Parsing waltz_am.mid
Parsing chetude1.mid
Parsing mazrka32.mid
Parsing mazrka22.mid
Parsing chopol12.mid
Parsing mazrka27.mid
Parsing mazrka09.mid
Parsing mazrka44.mid
Parsing mazrka41.mid
Parsing chopol15.mid
Parsing mazrka43.mid
Error parsing lecpsc3.mid: badly formatted midi bytes, got: b'RIFF\x0eN\x00\x00RMIDdata\x01N\x00\x00'
Parsing chlargo.mid
Parsing mazrka12.mid
Parsing choval13.mid
Parsing lecpsb3.mid
Parsing choval05.mid
Parsing chopol05.mid
Parsing pologm.mid
Parsing fugaam.mid
Parsing chopol08.mid
Parsing prelop45.mid
Parsing nocturne.mid
Parsing mazrka28.mid
Parsing chopnpol.mid
Parsing op72.mid
Parsing chpolfnt.mid
Parsing mazrka03.mid
Parsing varigerm.mid
Parsing chonoc08.mid
Parsing

In [22]:
len(notes)

143807

In [40]:
def prepare_sequences(notes, n_vocab, new_composition_indexes):
    """ Prepare the sequences used by the Neural Network """
    sequence_length = 100

    # get all pitch names
    pitchnames = sorted(set(item for item in notes))

    # create a dictionary to map pitches to integers
    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

    network_input = []
    network_output = []

    wait = 0
    # create input sequences and the corresponding outputs
    for i in range(0, len(notes) - sequence_length, 1):

        # if the ground truth index is a note/chord that belongs to a new composition
        if (i + sequence_length) in new_composition_indexes:
            wait = sequence_length - 1
            continue
        if wait != 0:
            wait = wait - 1
            continue

        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]

        network_input.append([note_to_int[char] for char in sequence_in])
        network_output.append(note_to_int[sequence_out])

    n_patterns = len(network_input)

    # reshape the input into a format compatible with LSTM layers
    network_input_training = np.reshape(network_input, (n_patterns, sequence_length, 1))

    # normalize input
    network_input_training = network_input_training / float(n_vocab)
    
    # one-hot encoding of the output
    network_output_training = to_categorical(network_output, num_classes=n_vocab)

    return network_input_training, network_output_training, network_output



In [24]:
notes

['G3',
 'G4',
 'F4',
 '0.3',
 '11.2',
 '0.3',
 '11.2.5',
 'G4',
 '7',
 '11.2.5',
 '0.3',
 '11.2.5',
 '7',
 '11.2.5',
 '0.3',
 'B3',
 'C4',
 'D4',
 'E-4',
 'D4',
 'D4',
 'C4',
 '7.10',
 '6.9',
 '7.10',
 '6.9.0',
 '2',
 '6.9.0',
 '7.10',
 '6.9.0',
 '2',
 '6.9.0',
 '7.10',
 'G3',
 '3.8',
 'C4',
 '3.8',
 'G3',
 'C4',
 'F#4',
 '2.7',
 '11.2',
 'F3',
 '0.3',
 '11.2',
 '0.3',
 '11.2.5',
 '7',
 '11.2.5',
 '0.3',
 '11.2.5',
 '7',
 '11.2.5',
 '0.3',
 'C3',
 '0.2',
 'C3',
 'B2',
 'C3',
 'D3',
 'E-3',
 'E-3',
 'D3',
 '1.4',
 '0.6',
 '7.10',
 '6.9',
 '7.10',
 '5.9',
 '9.1.4',
 '2',
 '5.9',
 'D3',
 '1.4.7',
 '2.5',
 '2.5',
 '5.9',
 '5.9',
 'D3',
 '1.4.7',
 '2.5',
 '9',
 'C5',
 'C4',
 '10.0',
 'B-3',
 'C5',
 'B-4',
 '9',
 'B-3',
 '7',
 'A4',
 'A3',
 '4',
 '5',
 'D3',
 'D4',
 '9',
 'C5',
 'C4',
 '10.0',
 'B-3',
 'D3',
 '10.0',
 '9',
 '7',
 'A4',
 'A3',
 '4',
 '5',
 'D3',
 'D4',
 'A4',
 'A3',
 'D3',
 'A4',
 'A3',
 '4',
 '5',
 'D3',
 'D4',
 'A3',
 'A4',
 '4',
 '5',
 'D3',
 'A3',
 '5',
 'D4',
 'D3',
 '7.

In [26]:
# get the vocabulary (unique notes)
n_vocab = len(set(notes))
n_vocab 


930

In [27]:
network_input_training, network_output_training, network_output_training_to_plot = prepare_sequences(notes, n_vocab, new_composition_indexes)

In [28]:
print(len(network_input_training))

143707


In [None]:
## Creat the model ##

In [30]:
model = Sequential()
model.add(LSTM(
    int(n_vocab),
    input_shape=(network_input_training.shape[1], network_input_training.shape[2]),
    return_sequences=True,
))

model.add(LSTM(int(n_vocab/2)))

model.add(Dense(n_vocab))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

In [31]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 100, 930)          3467040   
                                                                 
 lstm_1 (LSTM)               (None, 465)               2596560   
                                                                 
 dense (Dense)               (None, 930)               433380    
                                                                 
 activation (Activation)     (None, 930)               0         
                                                                 
Total params: 6496980 (24.78 MB)
Trainable params: 6496980 (24.78 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [32]:
file_path = os.makedirs("../../checklist_lstm/", exist_ok=True)

checkpoint = ModelCheckpoint(
    file_path,
    monitor='accuracy',
    verbose=0,
    save_best_only=True,
    mode='max'
)

callbacks_list = [checkpoint]

history = model.fit(network_input_training, network_output_training, epochs=200, batch_size=64 ,callbacks=callbacks_list, shuffle=True)

Epoch 1/200
 179/2246 [=>............................] - ETA: 43:54 - loss: 5.4085 - accuracy: 0.0175

KeyboardInterrupt: 