In [None]:
import essentia.standard
from dtw import *
import os
import pretty_midi
from time import time
import tslearn.metrics
import matplotlib.pyplot as plt
from collections import OrderedDict
import librosa.feature
import numpy as np

In [None]:
def calculate_gradient_vector(melody):
    _gradients = []

    for index, note in enumerate(melody):
        if index == 0:
            continue

        _gradients.append(note - melody[index - 1])

    return _gradients

In [None]:
def read_melody_files(filename):
    with open(filename) as f:
        _notes = f.read()

    _notes = _notes.split(',')
    return [float(i) for i in _notes]

In [None]:
def normalize(series, mmin, mmax):
    return [((i-mmin)/(mmax-mmin)) for i in series]

<h1>Experimento 1 (solo vocal, sin ceros, chromas) - MRR: 0.23 - Top10: 60%</h1>

In [None]:
melodies_path = 'dataset/chromas/vocals/'
hummings_path = 'dataset/hummings/'
melodies = os.listdir(melodies_path)
hummings = os.listdir(hummings_path)

In [None]:
numerador_MRR = 0
numerador_TOP = 0
denominador = 0
for hum in hummings:
    denominador += 1
    results = {}
    print("........" + hum + "........")
    print("----------")
    loader = essentia.standard.EqloudLoader(filename=hummings_path + hum, sampleRate=44100)
    audio = loader()
    pitch_extractor = essentia.standard.PredominantPitchMelodia(frameSize=2048, hopSize=50)
    pitch_values, pitch_confidence = pitch_extractor(audio)
    pitch_values = pitch_values[pitch_values != 0]
    chroma = librosa.feature.chroma_stft(y=np.asarray(pitch_values), sr=44100, hop_length=50)
    for melodie in melodies:
        chroma_db = np.loadtxt(melodies_path + melodie)
        alignment = dtw(chroma.transpose(), chroma_db, step_pattern=rabinerJuangStepPattern(6, "c"), keep_internals=False, open_begin=True, open_end=True)
        results[melodie] = alignment.distance
    ordered_results = OrderedDict({k: v for k, v in sorted(results.items(), key=lambda item: item[1])})
    for i, rank in enumerate(ordered_results.items()):
        if hum.split('.')[0].split('+')[0] == rank[0].split('.')[0]:
            numerador_MRR += (1/(i+1))
            if i <= 10:
                numerador_TOP += 1
        print(i+1, rank[0], rank[1])
        print("----------")
    print("================================")
mrr = numerador_MRR / denominador
top10 = numerador_TOP / denominador * 100
print('MRR: ', mrr)
print('Top-10: ', top10)

<h1>Experimento 2 (solo vocal, secuencia completa, con ceros, normalizando y gradientes) - MRR: 0.26 - Top10: 72%</h1>

In [None]:
melodies_path = 'dataset/melodies/strings/vocals/'
hummings_path = 'dataset/hummings/'
melodies = os.listdir(melodies_path)
hummings = os.listdir(hummings_path)

In [None]:
numerador_MRR = 0
numerador_TOP = 0
denominador = 0
for hum in hummings:
    denominador += 1
    results = {}
    print("........" + hum + "........")
    print("----------")
    loader = essentia.standard.EqloudLoader(filename=hummings_path + hum, sampleRate=44100)
    audio = loader()
    pitch_extractor = essentia.standard.PredominantPitchMelodia(frameSize=2048, hopSize=180)
    pitch_values, pitch_confidence = pitch_extractor(audio)
    pitch_values = normalize(pitch_values, min(pitch_values), max(pitch_values))
    pitch_values = calculate_gradient_vector(pitch_values)
    for melodie in melodies:
        pitch_values_db = read_melody_files(melodies_path + melodie)
        pitch_values_db = normalize(pitch_values_db, min(pitch_values_db), max(pitch_values_db))
        pitch_values_db = calculate_gradient_vector(pitch_values_db)
        alignment = dtw(pitch_values, pitch_values_db, step_pattern=rabinerJuangStepPattern(6, "c"), keep_internals=False, open_begin=True, open_end=True)
        results[melodie] = alignment.distance
    ordered_results = OrderedDict({k: v for k, v in sorted(results.items(), key=lambda item: item[1])})
    for i, rank in enumerate(ordered_results.items()):
        if hum.split('.')[0].split('+')[0] == rank[0].split('.')[0]:
            numerador_MRR += (1/(i+1))
            if i <= 10:
                numerador_TOP += 1
        print(i+1, rank[0], rank[1])
        print("----------")
    print("================================")
mrr = numerador_MRR / denominador
top10 = numerador_TOP / denominador * 100
print('MRR: ', mrr)
print('Top-10: ', top10)

<h1>Experimento 3 (solo vocal, secuencia completa, sin ceros, normalizando y gradientes) - MRR: 0.28 - Top10: 56%</h1>

In [None]:
melodies_path = 'dataset/melodies/strings/vocals/'
hummings_path = 'dataset/hummings/'
melodies = os.listdir(melodies_path)
hummings = os.listdir(hummings_path)

In [None]:
numerador_MRR = 0
numerador_TOP = 0
denominador = 0
for hum in hummings:
    denominador += 1
    results = {}
    print("........" + hum + "........")
    print("----------")
    loader = essentia.standard.EqloudLoader(filename=hummings_path + hum, sampleRate=44100)
    audio = loader()
    pitch_extractor = essentia.standard.PredominantPitchMelodia(frameSize=2048, hopSize=180)
    pitch_values, pitch_confidence = pitch_extractor(audio)
    pitch_values = pitch_values[pitch_values != 0]
    pitch_values = normalize(pitch_values, min(pitch_values), max(pitch_values))
    pitch_values = calculate_gradient_vector(pitch_values)
    for melodie in melodies:
        pitch_values_db = read_melody_files(melodies_path + melodie)
        pitch_values_db = [i for i in pitch_values_db if i != 0]
        pitch_values_db = normalize(pitch_values_db, min(pitch_values_db), max(pitch_values_db))
        pitch_values_db = calculate_gradient_vector(pitch_values_db)
        alignment = dtw(pitch_values, pitch_values_db, step_pattern=rabinerJuangStepPattern(6, "c"), keep_internals=False, open_begin=True, open_end=True)
        results[melodie] = alignment.distance
    ordered_results = OrderedDict({k: v for k, v in sorted(results.items(), key=lambda item: item[1])})
    for i, rank in enumerate(ordered_results.items()):
        if hum.split('.')[0].split('+')[0] == rank[0].split('.')[0]:
            numerador_MRR += (1/(i+1))
            if i <= 10:
                numerador_TOP += 1
        print(i+1, rank[0], rank[1])
        print("----------")
    print("================================")
mrr = numerador_MRR / denominador
top10 = numerador_TOP / denominador * 100
print('MRR: ', mrr)
print('Top-10: ', top10)

<h1>Experimento 4 (concatenaciones, secuencia completa, sin ceros, normalizando y gradientes) - MRR: 0.22 - Top10: 64%</h1>

In [None]:
melodies_path = 'dataset/melodies/strings/concatenations/'
hummings_path = 'dataset/hummings/'
melodies = os.listdir(melodies_path)
hummings = os.listdir(hummings_path)

In [None]:
numerador_MRR = 0
numerador_TOP = 0
denominador = 0
for hum in hummings:
    denominador += 1
    results = {}
    print("........" + hum + "........")
    print("----------")
    loader = essentia.standard.EqloudLoader(filename=hummings_path + hum, sampleRate=44100)
    audio = loader()
    pitch_extractor = essentia.standard.PredominantPitchMelodia(frameSize=2048, hopSize=180)
    pitch_values, pitch_confidence = pitch_extractor(audio)
    pitch_values = pitch_values[pitch_values != 0]
    pitch_values = normalize(pitch_values, min(pitch_values), max(pitch_values))
    pitch_values = calculate_gradient_vector(pitch_values)
    for melodie in melodies:
        pitch_values_db = read_melody_files(melodies_path + melodie)
        pitch_values_db = [i for i in pitch_values_db if i != 0]
        pitch_values_db = normalize(pitch_values_db, min(pitch_values_db), max(pitch_values_db))
        pitch_values_db = calculate_gradient_vector(pitch_values_db)
        alignment = dtw(pitch_values, pitch_values_db, step_pattern=rabinerJuangStepPattern(6, "c"), keep_internals=False, open_begin=True, open_end=True)
        results[melodie] = alignment.distance
    ordered_results = OrderedDict({k: v for k, v in sorted(results.items(), key=lambda item: item[1])})
    for i, rank in enumerate(ordered_results.items()):
        if hum.split('.')[0].split('+')[0] == rank[0].split('.')[0]:
            numerador_MRR += (1/(i+1))
            if i <= 10:
                numerador_TOP += 1
        print(i+1, rank[0], rank[1])
        print("----------")
    print("================================")
mrr = numerador_MRR / denominador
top10 = numerador_TOP / denominador * 100
print('MRR: ', mrr)
print('Top-10: ', top10)

<h1>Experimento 5 (concatenaciones, secuencia completa, con ceros, normalizando y gradientes) - MRR: 0.24 - Top10: 76%</h1>

In [None]:
melodies_path = 'dataset/melodies/strings/concatenations/'
hummings_path = 'dataset/hummings/'
melodies = os.listdir(melodies_path)
hummings = os.listdir(hummings_path)

In [None]:
numerador_MRR = 0
numerador_TOP = 0
denominador = 0
for hum in hummings:
    denominador += 1
    results = {}
    print("........" + hum + "........")
    print("----------")
    loader = essentia.standard.EqloudLoader(filename=hummings_path + hum, sampleRate=44100)
    audio = loader()
    pitch_extractor = essentia.standard.PredominantPitchMelodia(frameSize=2048, hopSize=180)
    pitch_values, pitch_confidence = pitch_extractor(audio)
    pitch_values = normalize(pitch_values, min(pitch_values), max(pitch_values))
    pitch_values = calculate_gradient_vector(pitch_values)
    for melodie in melodies:
        pitch_values_db = read_melody_files(melodies_path + melodie)
        pitch_values_db = normalize(pitch_values_db, min(pitch_values_db), max(pitch_values_db))
        pitch_values_db = calculate_gradient_vector(pitch_values_db)
        alignment = dtw(pitch_values, pitch_values_db, step_pattern=rabinerJuangStepPattern(6, "c"), keep_internals=False, open_begin=True, open_end=True)
        results[melodie] = alignment.distance
    ordered_results = OrderedDict({k: v for k, v in sorted(results.items(), key=lambda item: item[1])})
    for i, rank in enumerate(ordered_results.items()):
        if hum.split('.')[0].split('+')[0] == rank[0].split('.')[0]:
            numerador_MRR += (1/(i+1))
            if i <= 10:
                numerador_TOP += 1
        print(i+1, rank[0], rank[1])
        print("----------")
    print("================================")
mrr = numerador_MRR / denominador
top10 = numerador_TOP / denominador * 100
print('MRR: ', mrr)
print('Top-10: ', top10)