In [68]:
from mido import MidiFile
import numpy as np
import os

In [70]:
dirlist = os.listdir("data")
files = []
for f in dirlist:
    if f.endswith(".mid"):
        files.append(f)
    elif f != ".ipynb_checkpoints":
#         Then it is a subdirectory containing midi files
        subdirlist = os.listdir("data/{}".format(f))
        for subf in subdirlist:
            if f.endswith(".mid"):
                files.append(f)

In [71]:
# possible MIDI notes are 0-127; len=128
count = np.zeros((128, 128))

for f in files:
    fname = "data/" + f
    try:
        mid = MidiFile(fname, clip=False)
        for track in mid.tracks:
            msgs = [msg for msg in track if msg.type == 'note_on']
            for i in range(len(msgs)-1):
                note1 = msgs[i].note
                note2 = msgs[i+1].note
                count[note1,note2] += 1
    except OSError:
        print("Could not read file: {}".format(fname))

Could not read file: data/BuxWV69.mid
Could not read file: data/Sonataduodecima.mid
Could not read file: data/aurore.mid
Could not read file: data/fleur_jetee.mid


In [72]:
%store count

Stored 'count' (ndarray)


In [41]:
# %store -r count

In [117]:
# not checking for outside the keyboard range; expecting the high frequency bigrams to be in range
nonzero_count = {}
for i in range(len(count)):
    for j in range(len(count[i])):
        if count[i,j] > 0: # and i != j:
            ind1 = i
            ind2 = j
#             check for instances where one note is sharp of the other
            if ind1 % 2 > 0:
                ind1 -= 1
            if ind2 % 2 > 0:
                ind2 -= 1
#             check if both notes use same key
            if not ind1 == ind2:
#                 consolidate pairs regardless of order
                if ind2 < ind1:
                    temp = ind1
                    ind1 = ind2
                    ind2 = temp
#                 check if nonzero_count already contains an entry for this
                if (ind1, ind2) in nonzero_count:
                    nonzero_count[(ind1, ind2)] += count[i,j]
                else:
                    nonzero_count[(ind1, ind2)] = count[i,j]

In [118]:
%store nonzero_count

Stored 'nonzero_count' (dict)


In [109]:
# %store -r nonzero_count

In [119]:
nonzero_count = [[x, nonzero_count[x]] for x in nonzero_count]

In [120]:
nonzero_count

[[(32, 54), 108.0],
 [(98, 102), 87.0],
 [(62, 64), 40973.0],
 [(100, 106), 4.0],
 [(36, 68), 60.0],
 [(34, 52), 319.0],
 [(32, 44), 4382.0],
 [(48, 86), 7.0],
 [(76, 100), 130.0],
 [(24, 66), 1.0],
 [(38, 86), 4.0],
 [(54, 92), 16.0],
 [(52, 68), 1620.0],
 [(50, 84), 22.0],
 [(78, 86), 2353.0],
 [(42, 62), 1193.0],
 [(38, 40), 2534.0],
 [(30, 66), 67.0],
 [(44, 56), 12021.0],
 [(42, 88), 1.0],
 [(58, 94), 16.0],
 [(88, 98), 54.0],
 [(36, 40), 1094.0],
 [(78, 98), 17.0],
 [(44, 86), 14.0],
 [(24, 52), 9.0],
 [(54, 58), 29172.0],
 [(80, 94), 100.0],
 [(26, 54), 14.0],
 [(56, 58), 30765.0],
 [(70, 94), 336.0],
 [(82, 92), 312.0],
 [(28, 32), 205.0],
 [(22, 42), 1.0],
 [(64, 90), 268.0],
 [(30, 58), 80.0],
 [(60, 62), 37265.0],
 [(76, 90), 327.0],
 [(94, 102), 84.0],
 [(48, 50), 14137.0],
 [(56, 82), 487.0],
 [(22, 52), 2.0],
 [(50, 56), 14407.0],
 [(40, 60), 974.0],
 [(60, 104), 1.0],
 [(40, 42), 3618.0],
 [(84, 102), 1.0],
 [(32, 68), 127.0],
 [(46, 52), 6219.0],
 [(96, 104), 48.0],
 [(

In [121]:
ordered = sorted(nonzero_count, key=lambda x: x[1], reverse=True)

In [123]:
len(ordered)

729

In [94]:
def numToName(num):
    ones = "C C# D D# E F F# G G# A A# B".split()
    octave = num//12
    letters = num%12
    note = list(ones[letters])
    note.insert(1, str(octave))
    note = ''.join(note)
    return note

In [124]:
for note in ordered[0:100]:
    print("{}-{}: count {}".format(numToName(note[0][0]), numToName(note[0][1]), note[1]))

D5-E5: count 40973.0
E5-F5#: count 40167.0
F5#-G5#: count 38314.0
C6-D6: count 37453.0
C5-E5: count 37442.0
C5-D5: count 37265.0
A5#-C6: count 36979.0
G5#-A5#: count 36825.0
G4#-C5: count 33931.0
A4#-D5: count 33731.0
D6-E6: count 33684.0
D5-F5#: count 31336.0
G4#-A4#: count 30765.0
A4#-C5: count 30069.0
F4#-A4#: count 29172.0
F4#-G4#: count 27661.0
E5-G5#: count 27245.0
E4-F4#: count 26300.0
E4-G4#: count 25350.0
F5#-A5#: count 24951.0
G5#-C6: count 24791.0
E6-F6#: count 24494.0
A5#-D6: count 22339.0
D4-F4#: count 19839.0
C6-E6: count 19493.0
D4-E4: count 18768.0
G4#-D5: count 18055.0
F6#-G6#: count 17273.0
F4#-C5: count 17200.0
D6-F6#: count 16291.0
F4#-D5: count 16238.0
G4#-E5: count 15973.0
A4#-E5: count 15525.0
F5#-C6: count 14414.0
D4-G4#: count 14407.0
F5#-D6: count 14356.0
C5-F5#: count 14337.0
D5-G5#: count 14276.0
C4-D4: count 14137.0
E4-A4#: count 13437.0
E5-C6: count 12966.0
C4-E4: count 12741.0
G5#-D6: count 12229.0
E5-A5#: count 12202.0
G3#-G4#: count 12021.0
D5-A5#: coun