In [78]:
import json

In [79]:
data_dir = "C:/Users/Kshitij Alwadhi/Documents/GitHub/Clarinet/Data/Json/2018_clipped/"+'melody.json'
queries = "C:/Users/Kshitij Alwadhi/Documents/GitHub/Clarinet/Data/Json/2018_queries/"+'melody.json'

In [80]:
def splitNotes(notes):
    new_notes = []
    for note in notes:
        start = note[1]/1000.
        end = note[2]/1000.
        startSec = int(start)
        endSec = int(end)

        if startSec == endSec:
            new_notes.append(note)
        else:
            new_notes.append([note[0], note[1], (startSec+1)*1000, note[3]])
            for i in range(startSec+1, endSec-1):
                new_notes.append([note[0], int(i*1000), int(i+1)*1000, note[3]])
            new_notes.append([note[0], int(endSec*1000), note[2], note[3]])
    return new_notes


In [81]:
def getStrides(notes, size):
    notes = sorted(notes, key=lambda x: x[1])
    chunks = []
    for _ in range(len(notes)+1000):
        chunks.append([])

    curT = 0
    nextidx = 0
    flag = False

    i = 0
    while(i < len(notes)):
        st = notes[i][1]
        maxT = curT + size
        if st >= maxT:
            curT += 1000
            i = nextidx
            flag = False
            continue

        if st % 1000 == 0 and flag == False and st != curT:
            nextidx = i
            flag = True

        pos = int(curT/1000)
        chunks[pos].append(notes[i])
        i += 1
    ans = []
    for c in chunks:
        if len(c) > 0:
            ans.append(c)
    return ans

In [82]:
with open(data_dir, 'r') as f:
    fname_to_notes = json.load(f)


In [83]:
# a function which calculates the edit distance between two strings
def edit_distance(s1, s2):
    m = len(s1)
    n = len(s2)
    dp = [[0 for x in range(n+1)] for x in range(m+1)]
    for i in range(m+1):
        for j in range(n+1):
            if i == 0:
                dp[i][j] = j
            elif j == 0:
                dp[i][j] = i
            elif s1[i-1] == s2[j-1]:
                dp[i][j] = dp[i-1][j-1]
            else:
                dp[i][j] = 1 + min(dp[i][j-1], dp[i-1][j], dp[i-1][j-1])
    return dp[m][n]


# a function which calculates the similarity between two strings
def similarity(s1, s2):
    return 1 - edit_distance(s1, s2) / max(len(s1), len(s2))


midiEt_to_note = {
    12: "C",
    13: "C#",
    14: "D",
    15: "D#",
    16: "E",
    17: "F",
    18: "F#",
    19: "G",
    20: "G#",
    21: "A",
    22: "A#",
    23: "B"
}


# a function which converts a sequence of midiEt to a sequence of notes
def midiEt_to_note_sequence(midiEt_sequence):
    note_sequence = []
    for midiEt in midiEt_sequence:
        num = midiEt % 12
        note_sequence.append(midiEt_to_note[num + 12])
    return "".join(note_sequence)

In [84]:
def compute_similarities(query):
    fname_to_similarity = {}
    for fname in fname_to_notes:
        sim = 0
        notes = fname_to_notes[fname]
        notes = splitNotes(notes)
        strides = getStrides(notes, 5000)

        for stride in strides:
            stride_notes = [note[0] for note in stride]
            note_sequence = midiEt_to_note_sequence(stride_notes)
            sim = max(sim, similarity(query, note_sequence))
        fname_to_similarity[fname] = sim
    return fname_to_similarity

In [85]:
with open(queries, 'r') as f:
    queries_to_notes = json.load(f)

In [86]:
qname = "MIDI-Unprocessed_Recital1-3_MID--AUDIO_01_R1_2018_wav--4_query_melody.mid"


In [87]:
qnotes = queries_to_notes[qname]

In [88]:
qnotes = [note[0] for note in qnotes]
query = midiEt_to_note_sequence(qnotes)


In [89]:
fname_to_similarity = compute_similarities(query)

In [92]:
dict(sorted(fname_to_similarity.items(), key=lambda item: item[1],reverse=True))


{'MIDI-Unprocessed_Recital1-3_MID--AUDIO_01_R1_2018_wav--4_melody.mid': 0.7619047619047619,
 'MIDI-Unprocessed_Recital13-15_MID--AUDIO_14_R1_2018_wav--4_melody.mid': 0.6875,
 'MIDI-Unprocessed_Recital17-19_MID--AUDIO_18_R1_2018_wav--2_melody.mid': 0.65,
 'MIDI-Unprocessed_Recital4_MID--AUDIO_04_R1_2018_wav--1_melody.mid': 0.65,
 'MIDI-Unprocessed_Recital12_MID--AUDIO_12_R1_2018_wav--2_melody.mid': 0.625,
 'MIDI-Unprocessed_Recital5-7_MID--AUDIO_05_R1_2018_wav--3_melody.mid': 0.625,
 'MIDI-Unprocessed_Recital9-11_MID--AUDIO_10_R1_2018_wav--2_melody.mid': 0.5789473684210527,
 'MIDI-Unprocessed_Recital5-7_MID--AUDIO_07_R1_2018_wav--2_melody.mid': 0.5625,
 'MIDI-Unprocessed_Chamber3_MID--AUDIO_10_R3_2018_wav--1_melody.mid': 0.52,
 'MIDI-Unprocessed_Recital13-15_MID--AUDIO_14_R1_2018_wav--2_melody.mid': 0.5,
 'MIDI-Unprocessed_Recital20_MID--AUDIO_20_R1_2018_wav--3_melody.mid': 0.5,
 'MIDI-Unprocessed_Recital4_MID--AUDIO_04_R1_2018_wav--5_melody.mid': 0.5,
 'MIDI-Unprocessed_Recital9-11_MID

In [91]:
def distance(s1:str,s2:str):
    m = len(s1)
    n = len(s2)
    dp = [[0 for x in range(n+1)] for x in range(m+1)]
    for i in range(m+1):
        for j in range(n+1):
            if i == 0:
                dp[i][j] = j
            elif j == 0:
                dp[i][j] = i
            elif s1[i-1] == s2[j-1]:
                dp[i][j] = dp[i-1][j-1]
            else:
                dp[i][j] = 1 + min(dp[i][j-1], dp[i-1][j], dp[i-1][j-1])
    return dp[m][n]


def similarityv2(query:str,data:str):
    query_len=len(query)
    data_len=len(data)

    s1=query

    score=float("inf")

    start=-1
    end=-1

    while end<data_len:
        start=start+1
        end=min(start+query_len,data_len)

        s2=data[start:end]
        

        edit_distance=distance(s1,s2)
        if edit_distance<score:
            score=edit_distance

    sim=1-score/query_len
    return(sim)

In [97]:
def compute_similaritiesv2(query):
    fname_to_similarity = {}
    for fname in fname_to_notes:
        sim = 0
        notes = fname_to_notes[fname]
        nn = []
        for n in notes:
            nn.append(n[0])
        note_sequence = midiEt_to_note_sequence(nn)
        sim = max(sim, similarityv2(query, note_sequence))
        fname_to_similarity[fname] = sim
    return fname_to_similarity


In [98]:
fname_to_similarity2 = compute_similaritiesv2(query)

In [99]:
fname_to_similarity2 = dict(sorted(fname_to_similarity2.items(), key=lambda item: item[1],reverse=True))

In [100]:
fname_to_similarity2

{'MIDI-Unprocessed_Recital1-3_MID--AUDIO_01_R1_2018_wav--4_melody.mid': 1.0,
 'MIDI-Unprocessed_Recital17-19_MID--AUDIO_18_R1_2018_wav--2_melody.mid': 0.8125,
 'MIDI-Unprocessed_Recital1-3_MID--AUDIO_02_R1_2018_wav--2_melody.mid': 0.6875,
 'MIDI-Unprocessed_Recital13-15_MID--AUDIO_13_R1_2018_wav--2_melody.mid': 0.6875,
 'MIDI-Unprocessed_Recital13-15_MID--AUDIO_15_R1_2018_wav--4_melody.mid': 0.6875,
 'MIDI-Unprocessed_Recital17-19_MID--AUDIO_17_R1_2018_wav--2_melody.mid': 0.6875,
 'MIDI-Unprocessed_Recital17-19_MID--AUDIO_19_R1_2018_wav--3_melody.mid': 0.6875,
 'MIDI-Unprocessed_Chamber6_MID--AUDIO_20_R3_2018_wav--2_melody.mid': 0.625,
 'MIDI-Unprocessed_Recital1-3_MID--AUDIO_02_R1_2018_wav--3_melody.mid': 0.625,
 'MIDI-Unprocessed_Recital12_MID--AUDIO_12_R1_2018_wav--2_melody.mid': 0.625,
 'MIDI-Unprocessed_Recital12_MID--AUDIO_12_R1_2018_wav--3_melody.mid': 0.625,
 'MIDI-Unprocessed_Recital13-15_MID--AUDIO_14_R1_2018_wav--2_melody.mid': 0.625,
 'MIDI-Unprocessed_Recital13-15_MID--AUD