# import the libraries

In [1]:
import os
import sys

import numpy as np
from tqdm import tqdm

# import IPython
from IPython.display import Audio
from pretty_midi import PrettyMIDI
# import fluidsynth

from music21 import midi, converter, instrument, note, chord, stream
from midi2audio import FluidSynth



# file path
##### *file_path = './maestro-v3.0.0-midi/maestro-v3.0.0/'*

In [2]:
file_path = './maestro-v3.0.0-midi/maestro-v3.0.0/2004/'

# list midi
##### print all the midi files

In [3]:
def list_midi(file_path):
    for root, dirs, files in os.walk(file_path):
        for file in files:
            if file.endswith(".midi"):
                print(os.path.join(root, file))

list_midi(file_path)

# import glob
# def list_midi_glob(file_path):
#     for file in glob.glob(os.path.join(file_path, '**/*.midi'), recursive=True):
#         print(file)

# list_midi_glob(file_path)

./maestro-v3.0.0-midi/maestro-v3.0.0/2004/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.midi
./maestro-v3.0.0-midi/maestro-v3.0.0/2004/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_06_Track06_wav.midi
./maestro-v3.0.0-midi/maestro-v3.0.0/2004/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_08_Track08_wav.midi
./maestro-v3.0.0-midi/maestro-v3.0.0/2004/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_10_Track10_wav.midi
./maestro-v3.0.0-midi/maestro-v3.0.0/2004/MIDI-Unprocessed_SMF_05_R1_2004_01_ORIG_MID--AUDIO_05_R1_2004_02_Track02_wav.midi
./maestro-v3.0.0-midi/maestro-v3.0.0/2004/MIDI-Unprocessed_SMF_05_R1_2004_01_ORIG_MID--AUDIO_05_R1_2004_03_Track03_wav.midi
./maestro-v3.0.0-midi/maestro-v3.0.0/2004/MIDI-Unprocessed_SMF_05_R1_2004_02-03_ORIG_MID--AUDIO_05_R1_2004_06_Track06_wav.midi
./maestro-v3.0.0-midi/maestro-v3.0.0/2004/MIDI-Unprocessed_SMF_07_R1_2004_01_ORIG_MID--AUDIO_07_R1_2004_02_Track02_wa

# play a midi file
##### 

In [None]:
# play midi using pretty_midi fluidsynth method

def play_music(m):
    midi_stream = midi.MidiFile()
    midi_stream.open(m)
    midi_stream.read()
    midi_stream.close()
    s = midi.translate.midiFileToStream(midi_stream)
    s.show('midi')
    # display(Audio(data=midi, rate=44100))
    
play_music(file_path + './MIDI-Unprocessed_XP_16_R2_2004_01_ORIG_MID--AUDIO_16_R2_2004_02_Track02_wav.midi')

# converting all the midi to stream object
- We start by loading each file into a Music21 stream object using the converter.parse(file) function.

- Using this stream object we get a list of all the notes and chords in the file.


In [4]:
all_midis = []

# edit the func below and use the for loop from list_midi to load all midi files

def load_midi(file_path):
    for i in tqdm(os.listdir(file_path)):
        if i.endswith(".midi"):
            tr = file_path+i
            # print(tr)
            midi = converter.parse(tr)
            all_midis.append(midi)
    
    print("Done loading midi files")

load_midi(file_path)

100%|██████████| 132/132 [02:11<00:00,  1.00it/s]

Done loading midi files





# get the notes and durations
- We append the pitch of every note object using its string notation since the most significant parts of the note can be recreated using the string notation of the pitch.

- And we append every chord by encoding the id of every note in the chord together into a single string, with each note being separated by a dot.

- These encodings allows us to easily decode the output generated by the network into the correct notes and chords.


In [5]:
def get_notes(file):
    notes = []
    durations = []
    notes_to_parse = []

    for song in tqdm(file):

        # group by instrument parts and flatten into a single stream
        part = instrument.partitionByInstrument(song)
        # part = song

        if part: # if parts has instrument parts
            notes_to_parse = part.parts[0].recurse() # get first instrument part
        else:
            notes_to_parse = midi.flat.notes # if no instrument parts, notes are flat
        
        # for part in part.parts:
        #     notes_to_parse = part.recurse()

        for element in notes_to_parse:
            if isinstance(element, note.Note):
                notes.append(str(element.pitch))
                # durations.append(element.duration.quarterLength)
            elif isinstance(element, chord.Chord):
                notes.append('.'.join(str(n) for n in element.normalOrder))
                # durations.append(element.duration.quarterLength)

    # return notes, durations
    return notes

In [6]:
notes  = get_notes(all_midis)
n_vocab = len(set(notes))
print("Total notes in all the 2004 midis in the dataset:", len(notes))
# print("Total durations in all the 2004 midis in the dataset:", len(DURATION))

100%|██████████| 132/132 [01:28<00:00,  1.49it/s]

Total notes in all the 2004 midis in the dataset: 420460





In [7]:
print("First fifty values in the notes:\n", notes[:30])
# print("First fifty values in the Corpus:\n", DURATION[:30])

# print('pitch')
# idx = 0
# for n in zip(notes):
#     if idx < 50:
#         print(n)
#     idx = idx + 1

First fifty values in the notes:
 ['B4', 'G3', 'B4', 'B3', 'D4', 'C5', 'G4', 'A3', '0.2', 'C5', 'G4', 'F#4', '11.0', '0.4', '11.2', 'D4', 'F#4', 'G4', 'B4', 'D5', '6.11', 'G5', 'G4', '0.4', 'G5', '11.2', 'G5', 'C5', 'A3', 'B4']


In [8]:
import tensorflow as tf
from tensorflow import keras as k
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense, Dropout, Activation

In [9]:
def prepare_sequences(notes, n_vocab):
    """ Prepare the sequences used by the Neural Network """
    sequence_length = 100
    # get all pitch names
    pitchnames = sorted(set(item for item in notes))
    # create a dictionary to map pitches to integers
    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))
    network_input = []
    network_output = []
    # create input sequences and the corresponding outputs
    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])
        network_output.append(note_to_int[sequence_out])
    n_patterns = len(network_input)
    # reshape the input into a format compatible with LSTM layers
    network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    # normalize input
    network_input = network_input / float(n_vocab)
    # network_output = np_utils.to_categorical(network_output)
    network_output = tf.keras.utils.to_categorical(network_output)

    return (network_input, network_output)


In [10]:
network_input, network_output = prepare_sequences(notes, n_vocab)

In [11]:
# simple RNN model

def create_network(network_input, n_vocab):
    """ create the structure of the neural network """
    model = Sequential()
    model.add(SimpleRNN(512, input_shape=(network_input.shape[1], network_input.shape[2]), return_sequences=True))
    model.add(Dropout(0.3))
    model.add(SimpleRNN(512, return_sequences=True))
    model.add(Dropout(0.3))
    model.add(SimpleRNN(512))
    model.add(Dense(256))
    model.add(Dropout(0.3))
    model.add(Dense(n_vocab))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam')

    return model

In [12]:
model = create_network(network_input, n_vocab)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn (SimpleRNN)      (None, 100, 512)          263168    
                                                                 
 dropout (Dropout)           (None, 100, 512)          0         
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 100, 512)          524800    
                                                                 
 dropout_1 (Dropout)         (None, 100, 512)          0         
                                                                 
 simple_rnn_2 (SimpleRNN)    (None, 512)               524800    
                                                                 
 dense (Dense)               (None, 256)               131328    
                                                                 
 dropout_2 (Dropout)         (None, 256)               0