## Importing Libraries

In [17]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle 
import json
import os
from pathlib import Path
from IPython.display import Image, Audio
from music21 import note , chord , stream , instrument , converter   
# from midi2audio import FluidSynth          # to convert midi to wav file

## Loading Data

To accomplish this project, we are using MIDI files of classical piano music, to be more precise: <ins>**Beethoven's compositions**</ins>.

- Initially, we will generate a list comprising every song in the **Beethoven** folder,parsed as a *music21* stream.

- Later, we will create a function to extract both chords and notes from the data, transforming it into a **corpus**.

### Loading and parsing the midi files as stream

In [18]:
# parse the encoded data in a file object to midi stream
cwd = os.getcwd()
filepath = cwd + "/input/beeth/elise.mid"
midi = converter.parse(filepath)
type(midi)

music21.stream.base.Score

In [19]:
print("Now playing a sample audio file from dataset....")
Audio(os.getcwd() + '/input/samples/elise.mp3', rate=44100) 

Now playing a sample audio file from dataset....


In [20]:
midi.show('text')

{0.0} <music21.metadata.Metadata object at 0x16029eb90>
{0.0} <music21.stream.Part 0x1602d6450>
    {0.0} <music21.stream.Measure 1 offset=0.0>
        {0.0} <music21.instrument.Piano 'Piano right: Piano right'>
        {0.0} <music21.instrument.Piano 'Piano'>
        {0.0} <music21.clef.TrebleClef>
        {0.0} <music21.tempo.MetronomeMark Quarter=69.18>
        {0.0} <music21.key.Key of C major>
        {0.0} <music21.meter.TimeSignature 3/8>
        {0.0} <music21.note.Rest quarter>
        {1.0} <music21.tempo.MetronomeMark Quarter=69.67>
        {1.0} <music21.tempo.MetronomeMark adagietto Quarter=67.6>
        {1.0} <music21.tempo.MetronomeMark Quarter=68.09>
        {1.0} <music21.note.Note E>
        {1.25} <music21.tempo.MetronomeMark Quarter=68.59>
        {1.25} <music21.tempo.MetronomeMark Quarter=69.08>
        {1.25} <music21.note.Note E->
        {1.3333} <music21.tempo.MetronomeMark Quarter=69.57>
    {1.5} <music21.stream.Measure 2 offset=1.5>
        {0.0} <music21.t

In [21]:
# Flat all the elements - notes/chords
notes_to_parse = midi.flat.notes
print(len(notes_to_parse))

876


In [22]:
for element in notes_to_parse[:100]:
    print(element , element.offset)   # Offset refers to where the note is located in the piece

notes_to_parse[0]

<music21.note.Note E> 1.0
<music21.note.Note E-> 1.25
<music21.note.Note E> 1.5
<music21.note.Note E-> 1.75
<music21.note.Note E> 2.0
<music21.note.Note B> 2.25
<music21.note.Note D> 2.5
<music21.note.Note C> 2.75
<music21.note.Note A> 3.0
<music21.note.Note A> 3.0
<music21.note.Note E> 3.25
<music21.note.Note A> 3.5
<music21.note.Note C> 3.75
<music21.note.Note E> 4.0
<music21.note.Note A> 4.25
<music21.note.Note B> 4.5
<music21.note.Note E> 4.5
<music21.note.Note E> 4.75
<music21.note.Note G#> 5.0
<music21.note.Note E> 5.25
<music21.note.Note G#> 5.5
<music21.note.Note B> 5.75
<music21.note.Note C> 6.0
<music21.note.Note A> 6.0
<music21.note.Note E> 6.25
<music21.note.Note A> 6.5
<music21.note.Note E> 6.75
<music21.note.Note E> 7.0
<music21.note.Note E-> 7.25
<music21.note.Note E> 7.5
<music21.note.Note E-> 7.75
<music21.note.Note E> 8.0
<music21.note.Note B> 8.25
<music21.note.Note D> 8.5
<music21.note.Note C> 8.75
<music21.note.Note A> 9.0
<music21.note.Note A> 9.0
<music21.note.No

<music21.note.Note E>

In [23]:
# Pitch refers to the frequency of the sound, or how high or low a particular note is 
# and is represented with the letters [A, B, C, D, E, F, G], with A being the highest and G being the lowest
notes_to_parse[0].pitch , str(notes_to_parse[0].pitch)

(<music21.pitch.Pitch E5>, 'E5')

In [24]:
notes_to_parse[50]

<music21.note.Note A>

In [26]:
notes_demo = []

for element in notes_to_parse:
    
    # if the element is a Note , then store it's Pitch
    if isinstance(element , note.Note):
        notes_demo.append(str(element.pitch))
        
    # if the element is a Chord , split each of the note of the chord and join them with +
    elif isinstance(element , chord.Chord):
        notes_demo.append('+'.join(str(n) for n in element.normalOrder))

len(notes_demo)

876

In [27]:
print(notes_demo[32:50])

['B4', 'D5', 'C5', 'A4', 'A2', 'E3', 'A3', 'C4', 'E4', 'A4', 'B4', 'E2', 'E3', 'G#3', 'E4', 'C5', 'B4', 'A4']


In [29]:
# Get all the notes and chords from the midi files in the ./midi_songs directory 
notes = []
p = Path(os.getcwd() + "/input/beeth")

for file in p.glob("*.mid"):
    midi = converter.parse(file)
    # print(f"parsing {file}" , end = "  ")
    
    elements_to_parse = midi.flat.notes
    # print(f"length {len(elements_to_parse)}")
    
    for element in elements_to_parse:
        
        # if the element is a Note, then store it's Pitch
        if isinstance(element , note.Note):
            notes.append(str(element.pitch))
            
        # if the element is a Chord , then split each of the note and join with +
        elif isinstance(element , chord.Chord):
            notes.append("+".join(str(n) for n in element.normalOrder))

In [31]:
with open(os.getcwd() + "/notes" , "wb") as file:
    pickle.dump(notes , file)

In [32]:
with open(os.getcwd() + "/notes" , "rb") as file:
    notes = pickle.load(file)

In [33]:
print("Total notes: " , len(notes))
print("Unique notes: " , len(set(notes)))

Total notes:  81312
Unique notes:  349


In [34]:
n_vocab = len(set(notes))

In [35]:
# get all pitch names (unique classes)
pitchnames = sorted(set(notes))

# create a dictionary to map pitches to integers
note_to_int = dict((element , idx) for idx , element in enumerate(pitchnames))

# create a reverse mapping
int_to_note = {idx:element for element , idx in note_to_int.items()}

assert len(note_to_int) == n_vocab

In [36]:
# How many elements LSTM input should consider
sequence_len = 100

In [37]:
network_input = []     # input sequence data
network_output = []    # output data

for i in range(len(notes) - sequence_len):
    seq_in = notes[i : i+sequence_len]         # contains 100 values
    seq_out = notes[i+sequence_len]
    
    network_input.append([note_to_int[n] for n in seq_in])
    network_output.append(note_to_int[seq_out])

In [38]:
len(network_input) , len(network_output)

(81212, 81212)

In [39]:
np.asarray(network_input).shape

(81212, 100)

In [40]:
# reshape input data into a shape compatible with LSTM layers
normalised_network_input = np.reshape(network_input , (*(np.asarray(network_input).shape) , 1))  # input_samples, sequence_len, 1
print(normalised_network_input.shape)

(81212, 100, 1)


In [41]:
normalised_network_input = normalised_network_input/float(n_vocab)

In [42]:
# Network output are the classes, so encode into one hot vector
from tensorflow.keras.utils import to_categorical
network_output = to_categorical(network_output)

In [43]:
print(normalised_network_input.shape)
print(network_output.shape)

(81212, 100, 1)
(81212, 349)


In [44]:
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

In [45]:
model = Sequential()
model.add(LSTM(units = 512 , input_shape = (normalised_network_input.shape[1], normalised_network_input.shape[2])
               , return_sequences = True))
model.add(Dropout(0.3))

model.add(LSTM(units = 512 , return_sequences = True))
model.add(Dropout(0.3))

model.add(LSTM(units = 512))
model.add(Dense(256))
model.add(Dropout(0.3))

model.add(Dense(n_vocab , activation = 'softmax'))

In [46]:
model.compile(loss = "categorical_crossentropy", optimizer = "adam")

In [47]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 100, 512)          1052672   
                                                                 
 dropout (Dropout)           (None, 100, 512)          0         
                                                                 
 lstm_1 (LSTM)               (None, 100, 512)          2099200   
                                                                 
 dropout_1 (Dropout)         (None, 100, 512)          0         
                                                                 
 lstm_2 (LSTM)               (None, 512)               2099200   
                                                                 
 dense (Dense)               (None, 256)               131328    
                                                                 
 dropout_2 (Dropout)         (None, 256)               0

In [48]:
checkpoint = ModelCheckpoint("weights.h5", monitor = 'loss', save_best_only=True, mode = 'min')
hist = model.fit(normalised_network_input, network_output, epochs = 100, batch_size = 64, callbacks = [checkpoint])

Epoch 1/100