In [1]:
import json

In [8]:
data_dir = "C:/Users/Kshitij Alwadhi/Documents/GitHub/Clarinet/Data/Json/2018_clipped/"+'notes.json'
queries = "C:/Users/Kshitij Alwadhi/Documents/GitHub/Clarinet/Data/Json/2018_queries/"+'notes.json'

In [3]:
def splitNotes(notes):
    new_notes = []
    for note in notes:
        start = note[1]/1000.
        end = note[2]/1000.
        startSec = int(start)
        endSec = int(end)

        if startSec == endSec:
            new_notes.append(note)
        else:
            new_notes.append([note[0], note[1], (startSec+1)*1000, note[3]])
            for i in range(startSec+1, endSec-1):
                new_notes.append([note[0], int(i*1000), int(i+1)*1000, note[3]])
            new_notes.append([note[0], int(endSec*1000), note[2], note[3]])
    return new_notes


In [4]:
def getStrides(notes, size):
    notes = sorted(notes, key=lambda x: x[1])
    chunks = []
    for _ in range(len(notes)):
        chunks.append([])

    curT = 0
    nextidx = 0
    flag = False

    i = 0
    while(i < len(notes)):
        st = notes[i][1]
        maxT = curT + size
        if st >= maxT:
            curT += 1000
            i = nextidx
            flag = False
            continue

        if st % 1000 == 0 and flag == False and st != curT:
            nextidx = i
            flag = True

        pos = int(curT/1000)
        chunks[pos].append(notes[i])
        i += 1
    ans = []
    for c in chunks:
        if len(c) > 0:
            ans.append(c)
    return ans

In [5]:
with open(data_dir, 'r') as f:
    fname_to_notes = json.load(f)


In [6]:
# a function which calculates the edit distance between two strings
def edit_distance(s1, s2):
    m = len(s1)
    n = len(s2)
    dp = [[0 for x in range(n+1)] for x in range(m+1)]
    for i in range(m+1):
        for j in range(n+1):
            if i == 0:
                dp[i][j] = j
            elif j == 0:
                dp[i][j] = i
            elif s1[i-1] == s2[j-1]:
                dp[i][j] = dp[i-1][j-1]
            else:
                dp[i][j] = 1 + min(dp[i][j-1], dp[i-1][j], dp[i-1][j-1])
    return dp[m][n]


# a function which calculates the similarity between two strings
def similarity(s1, s2):
    return 1 - edit_distance(s1, s2) / max(len(s1), len(s2))


midiEt_to_note = {
    12: "C",
    13: "C#",
    14: "D",
    15: "D#",
    16: "E",
    17: "F",
    18: "F#",
    19: "G",
    20: "G#",
    21: "A",
    22: "A#",
    23: "B"
}


# a function which converts a sequence of midiEt to a sequence of notes
def midiEt_to_note_sequence(midiEt_sequence):
    note_sequence = []
    for midiEt in midiEt_sequence:
        num = midiEt % 12
        note_sequence.append(midiEt_to_note[num + 12])
    return "".join(note_sequence)

In [7]:
def compute_similarities(query):
    fname_to_similarity = {}
    for fname in fname_to_notes:
        sim = 0
        notes = fname_to_notes[fname]
        notes = splitNotes(notes)
        strides = getStrides(notes, 5000)

        for stride in strides:
            stride_notes = [note[0] for note in stride]
            note_sequence = midiEt_to_note_sequence(stride_notes)
            sim = max(sim, similarity(query, note_sequence))
        fname_to_similarity[fname] = sim
    return fname_to_similarity

In [24]:
query = "AAF#DAF#AADADF#AAF#F#DAF#AF#DAAF#AF#DAF#ADF#F#AF#EGC#F#EAF#DDF#"

In [26]:
fname = 'MIDI-Unprocessed_Chamber2_MID--AUDIO_09_R3_2018_wav--1_melody.mid'
notes = fname_to_notes[fname]  # (pitch,start,end,velocity)
updatedNotes = splitNotes(notes)
strides = getStrides(updatedNotes, 5000)
sim = 0
for stride in strides:
    stride_notes = [note[0] for note in stride]
    note_sequence = midiEt_to_note_sequence(stride_notes)
    #print(note_sequence)
    sim = max(sim, similarity(query, note_sequence))


In [25]:
sim

0.1578947368421053

In [27]:
sim

0.8095238095238095

In [9]:
with open(queries, 'r') as f:
    queries_to_notes = json.load(f)

In [12]:
qname = "MIDI-Unprocessed_Recital1-3_MID--AUDIO_01_R1_2018_wav--4_query.mid"


In [13]:
qnotes = queries_to_notes[qname]

In [14]:
qnotes = [note[0] for note in qnotes]
query = midiEt_to_note_sequence(qnotes)


In [16]:
fname_to_similarity = compute_similarities(query)

In [19]:
dict(sorted(fname_to_similarity.items(), key=lambda item: item[1],reverse=True))


{'MIDI-Unprocessed_Recital1-3_MID--AUDIO_01_R1_2018_wav--4.midi': 0.7735849056603774,
 'MIDI-Unprocessed_Recital17-19_MID--AUDIO_18_R1_2018_wav--2.midi': 0.6346153846153846,
 'MIDI-Unprocessed_Recital9-11_MID--AUDIO_10_R1_2018_wav--2.midi': 0.6226415094339622,
 'MIDI-Unprocessed_Recital13-15_MID--AUDIO_14_R1_2018_wav--4.midi': 0.5869565217391304,
 'MIDI-Unprocessed_Recital17-19_MID--AUDIO_19_R1_2018_wav--2.midi': 0.5769230769230769,
 'MIDI-Unprocessed_Recital4_MID--AUDIO_04_R1_2018_wav--5.midi': 0.5609756097560976,
 'MIDI-Unprocessed_Recital20_MID--AUDIO_20_R1_2018_wav--4.midi': 0.5476190476190477,
 'MIDI-Unprocessed_Recital12_MID--AUDIO_12_R1_2018_wav--2.midi': 0.5454545454545454,
 'MIDI-Unprocessed_Recital13-15_MID--AUDIO_13_R1_2018_wav--1.midi': 0.5254237288135593,
 'MIDI-Unprocessed_Schubert1-3_MID--AUDIO_05_R2_2018_wav.midi': 0.5121951219512195,
 'MIDI-Unprocessed_Schubert7-9_MID--AUDIO_16_R2_2018_wav.midi': 0.5121951219512195,
 'MIDI-Unprocessed_Recital9-11_MID--AUDIO_10_R1_2018_