# Data Processing

## Imports

In [1]:
from music21 import converter, instrument, note, chord
import sys
import numpy as np
from imageio import imwrite
import os

## Selecting piano only pieces (one instrument only)

In [2]:
files_raw = [file for file in os.listdir('Data')]
len(files_raw)

125

In [3]:
files = []

for file in files_raw: 
    try: 
        mid = converter.parse(f'Data/{file}')
        file_instruments = instrument.partitionByInstrument(mid)
        if len(file_instruments)==1:
            files.append(file)
    except:
        pass

In [4]:
len(files)

88

In [6]:
files_removed = [file for file in files_raw if file not in files]
len(files_removed)

37

## Converting Midi files to images

In [7]:
def extractNote(element):
    return int(element.pitch.ps)

In [8]:
def extractDuration(element):
    return element.duration.quarterLength

In [9]:
def get_notes(notes_to_parse):

    """ Get all the notes and chords from the midi files in the ./midi_songs directory """
    durations = []
    notes = []
    start = []

    for element in notes_to_parse:
        if isinstance(element, note.Note):
            if element.isRest:
                continue

            start.append(element.offset)
            notes.append(extractNote(element))
            durations.append(extractDuration(element))
                
        elif isinstance(element, chord.Chord):
            if element.isRest:
                continue
            for chord_note in element:
                start.append(element.offset)
                durations.append(extractDuration(element))
                notes.append(extractNote(chord_note))

    return {"start":start, "pitch":notes, "dur":durations}

In [10]:
def midi2image(midi_path, max_repetitions = float("inf"), resolution = 0.25, lowerBoundNote = 21, upperBoundNote = 127, maxSongLength = 100):
    
    output_folder = f"Data_image/{midi_path.split('/')[-1].replace('.mid', '')}"
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    mid = converter.parse(midi_path)

    instruments = instrument.partitionByInstrument(mid)

    data = {}

    try:
        i=0
        for instrument_i in instruments.parts:
            notes_to_parse = instrument_i.recurse()

            notes_data = get_notes(notes_to_parse)
            if len(notes_data["start"]) == 0:
                continue

            if instrument_i.partName is None:
                data["instrument_{}".format(i)] = notes_data
                i+=1
            else:
                data[instrument_i.partName] = notes_data

    except:
        notes_to_parse = mid.flat.notes
        data["instrument_0"] = get_notes(notes_to_parse)

    for instrument_name, values in data.items():
        # https://en.wikipedia.org/wiki/Scientific_pitch_notation#Similar_systems

        pitches = values["pitch"]
        durs = values["dur"]
        starts = values["start"]

        index = 0
        while index < max_repetitions:
            matrix = np.zeros((upperBoundNote-lowerBoundNote,maxSongLength))


            for dur, start, pitch in zip(durs, starts, pitches):
                dur = int(dur/resolution)
                start = int(start/resolution)

                if not start > index*(maxSongLength+1) or not dur+start < index*maxSongLength:
                    for j in range(start,start+dur):
                        if j - index*maxSongLength >= 0 and j - index*maxSongLength < maxSongLength:
                            matrix[pitch-lowerBoundNote,j - index*maxSongLength] = 255

            if matrix.any(): # If matrix contains no notes (only zeros) don't save it
                output_filename = os.path.join(output_folder, midi_path.split('/')[-1].replace(".mid",f"_{instrument_name}_{index}.png"))
                imwrite(output_filename,matrix.astype(np.uint8))
                index += 1
            else:
                break

In [11]:
# Iterating on all files 

for file in files:
    file_path = f"Data/{file}"
    midi2image(file_path)