# MIDI DATA PROCESSING

Import MIDI processing libraries

In [1]:
# Midi reading
import mido
# File reading
import os
# Progress bar
import tqdm
# Data analysis
import pandas as pd

Try testing out some note and outputs

In [None]:
# Create note
msg = mido.Message('note_on', note=60)
# Create output stream to play note
outport = mido.open_output()
# Send the note
outport.send(msg)

If a note output, congratulations! Things are working!
If not there's probably some internal MIDI issues you should look @

Moving on, try to load in a file

In [None]:
# Import MIDI file
mfile = mido.MidiFile('Aha_-_Take_On_Me.mid')

# Get messages
for i, track in enumerate(mfile.tracks):
    print('Track {}: {}'.format(i, track.name))
    # Run the below if you want all messages
    #for msg in track:
    #    print(msg)

Now moving into drum tracks

All these drum tracks are downloaded from [this link](https://mega.nz/file/ZxgAAIZB#oMYIyy7iLYtnpnwRsKOuVRttOVrAHdQ2-DqPil2s7Lc)

Try to get all of the drum tracks imported into a class struct

In [2]:
# Path to look for is MIDI/800000_Drum_Percussion_MIDI_Archive[6_19_15]
mypath = 'MIDI/800000_Drum_Percussion_MIDI_Archive[6_19_15]'

# Dictionary to hold values to put into csv
csvdict = {'path' : [],
           'miditype': [],
           'tempo': [],
           'length': [],
           'trackcount' : [],
           'trackname' : [],
           'timesignature': [],
           'messagecount': []}

# Analysis function for a MIDI
def analyzeMID(mpath):
    # Get the file path without the starting folders
    filepath = mpath[len(mypath) + 1:]
    # Load in the .MID file
    try:
        mid = mido.MidiFile(mpath)
    except:
        # Something went wrong with reading, ignore the file and move on
        return
    # Check the number of tracks through midi type
    mtype = mid.type
    trackcount = len(mid.tracks)
    # Get metadata (tempo, message count)
    tempo = mid.ticks_per_beat
    mlen = mid.length
    #Extract metadata from meta messages
    msgcnt = []
    tname = []
    tsig = []
    for track in mid.tracks:
        
        msgcnt.append(str(len(track)))
        temptname = None
        temptsig = None
        
        for msg in track:
            # Metadata messages
            if msg.type == 'track_name':
                temptname = msg.name

            elif msg.type == 'time_signature':
                # Need to watch if there are two values
                temptsig = ("{}.{}".format(msg.numerator, msg.denominator))  
        
        # Check if values were obtained
        if temptname == None:
            tname.append("N/A")
        else:
            tname.append(temptname)
        if temptsig == None:
            tsig.append("N/A")
        else:
            tsig.append(temptsig)
            
    # All values loaded, add into dictionary
    #print("\n".join(msglist))
    csvdict['path'].append(filepath)
    csvdict['miditype'].append(mtype)
    csvdict['tempo'].append(tempo)
    csvdict['length'].append(mlen)
    csvdict['trackcount'].append(trackcount)
    csvdict['trackname'].append(";".join(tname))
    csvdict['timesignature'].append(";".join(tsig))
    csvdict['messagecount'].append(";".join(msgcnt))
    
    return

pbar = tqdm.tqdm(total=774268)

# Loop through the path to get all midi files analyzed
def getFiles(basepath):
    # Recursively get all files
    for f in os.listdir(basepath):
        # Ignore any mac zip artifacts
        if f == '.DS_Store':
            continue
        # Check if it's a file or a folder
        newpath = os.path.join(basepath, f)
        if not os.path.isfile(newpath):
            # Not a file, recursively print new files
            getFiles(newpath)
        elif not f.lower().endswith('.mid'):
            # Not a midi file, ignore
            continue
        else:
            # Analyze the data of the midi file
            analyzeMID(newpath)
            pbar.update(1)
    # EOF
    return 0

getFiles(mypath)

pbar.close()

# Output dictionary to CSV with pandas
df = pd.DataFrame.from_dict(csvdict)
df.to_csv("Dumdata.csv")

# TODO
# > Add length
# > Make sense of tempo
# > Fix time signature output (4/4 vs 4.4 or whatever)
# > 

100%|██████████| 774268/774268 [2:25:29<00:00, 88.70it/s]     


In [None]:
# Load the MIDI
testmid = mido.MidiFile("MIDI/800000_Drum_Percussion_MIDI_Archive[6_19_15]/Superior Drummer 2 Drum Midi [425,000 files]/00071@FullKit/0991@4#4_STRAIGHT_HiHat/EighthNote/Theme-98/Variation-0/Beat-72-3.mid")

print(testmid.length)
print(mido.tempo2bpm(testmid.ticks_per_beat))



In [None]:
a = {"Hello": ["a", "b", "c"], "There": [1, 2, 3], "Person": ["a,a", "b,b", "c,c"]}
df = pd.DataFrame.from_dict(a)
df.to_csv("Dumdata1.csv")