# Import necessary libraries
- The `music21` library is the main library used to parse the MIDI files, and extract the notes and chords from them.
- Alternatives to `music21` are `pretty_midi` and `miditoolkit`, but `music21` is the most popular and widely used library for MIDI file processing.

In [1]:
import json
from pathlib import Path
from music21 import converter, instrument, note, chord

# Get file paths
- The dataset resides in a local directory, and the file paths are obtained

In [2]:
songs = []
folder = Path('Data\\Random_Archive')

for file in folder.rglob('*.mid'):
  songs.append(file)

# Display the list of songs
songs

[WindowsPath('Data/Random_Archive/1. MLP - Sweet - C#min9-BMaj9-C#min9-BMaj9-G#7b9.mid'),
 WindowsPath('Data/Random_Archive/1.mid'),
 WindowsPath('Data/Random_Archive/10. MLP - Old Timer - Em9-A9-DMaj7-B7.mid'),
 WindowsPath('Data/Random_Archive/10.mid'),
 WindowsPath('Data/Random_Archive/11.mid'),
 WindowsPath('Data/Random_Archive/12.mid'),
 WindowsPath('Data/Random_Archive/13.mid'),
 WindowsPath('Data/Random_Archive/14.mid'),
 WindowsPath('Data/Random_Archive/15.mid'),
 WindowsPath('Data/Random_Archive/16.mid'),
 WindowsPath('Data/Random_Archive/17.mid'),
 WindowsPath('Data/Random_Archive/18.mid'),
 WindowsPath('Data/Random_Archive/19.mid'),
 WindowsPath('Data/Random_Archive/2. MLP - Jazz - Cmin11-F13-Dmin7-G13.mid'),
 WindowsPath('Data/Random_Archive/2.mid'),
 WindowsPath('Data/Random_Archive/20.mid'),
 WindowsPath('Data/Random_Archive/3. MLP - Sentimental Melody - GbMaj7-Ebm9.mid'),
 WindowsPath('Data/Random_Archive/3.mid'),
 WindowsPath('Data/Random_Archive/4. MLP - Crunchy - Em7-

# Feature Extraction
- The `music21` library is used to parse the MIDI files.
- MIDI files are partitioned by instrument, which in this case is mainly a piano. 
- Notes and chords are extracted from the MIDI files, and appended to a list.
- The list is subsequently dumped into a JSON file, which effectively acts as a cache for the extracted features.
- Due to the memory intensive nature of this process, it is separated from the main notebook for better memory management.
- This is done to avoid re-extraction of features every time the dataset may be changed.

In [3]:
# Create an empty list to store all the notes and chords
notes = []

In [4]:
# Extract notes and chords by instrument
for i, file in enumerate(songs):
    print(f"{i+1}: {file}")
    try:
        midi = converter.parse(file)
        notes_to_parse = None
        parts = instrument.partitionByInstrument(midi)
        if parts:  # file has instrument parts
            notes_to_parse = parts.parts[0].recurse()
        else:  # file has notes in a flat structure
            notes_to_parse = midi.flat.notes
        for element in notes_to_parse:
            if isinstance(element, note.Note):
                notes.append(str(element.pitch))
            elif isinstance(element, chord.Chord):
                notes.append(".".join(str(n) for n in element.normalOrder))
    except:
        print(f"FAILED: {i+1}: {file}")

1: Data\Random_Archive\1. MLP - Sweet - C#min9-BMaj9-C#min9-BMaj9-G#7b9.mid
2: Data\Random_Archive\1.mid
3: Data\Random_Archive\10. MLP - Old Timer - Em9-A9-DMaj7-B7.mid
4: Data\Random_Archive\10.mid
5: Data\Random_Archive\11.mid
6: Data\Random_Archive\12.mid
7: Data\Random_Archive\13.mid
8: Data\Random_Archive\14.mid
9: Data\Random_Archive\15.mid
10: Data\Random_Archive\16.mid
11: Data\Random_Archive\17.mid
12: Data\Random_Archive\18.mid
13: Data\Random_Archive\19.mid
14: Data\Random_Archive\2. MLP - Jazz - Cmin11-F13-Dmin7-G13.mid
15: Data\Random_Archive\2.mid
16: Data\Random_Archive\20.mid
17: Data\Random_Archive\3. MLP - Sentimental Melody - GbMaj7-Ebm9.mid
18: Data\Random_Archive\3.mid
19: Data\Random_Archive\4. MLP - Crunchy - Em7-Ebdim-Dmin11-G7b9.mid
20: Data\Random_Archive\4.mid
21: Data\Random_Archive\5. MLP - Confident - Ebmin7-Fmin7-DbMaj7.mid
22: Data\Random_Archive\5.mid
23: Data\Random_Archive\6. MLP - Bright - F#min9-EMaj7-C#7.mid
24: Data\Random_Archive\6.mid
25: Data\

In [5]:
# Display the list of notes
notes

# Write the list to a file
with open('notes.json', 'x') as file:
    json.dump(notes, file)