In [18]:
from google.colab import drive
drive.mount('/content/gdrive/')

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).


In [None]:
!pip install pydub -q

In [None]:
import librosa
import numpy as np
from scipy.spatial.distance import cosine
import csv
import os
from pydub import AudioSegment
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import ndcg_score, f1_score, recall_score, precision_score

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [31]:
target_track = '/content/gdrive/MyDrive/data/test_music/Måneskin - Zitti E Buoni.wav'
track_list = [x for x in os.listdir('/content/gdrive/MyDrive/data/test_music') if x.endswith('.wav')]

In [32]:
track_list

['Stromae - Papaoutai.wav',
 'RHCP - Dani California.wav',
 'rkomi-insuperabile.wav',
 'outloud-waiting-for-your-love_(zzz.fm).wav',
 'Juanes-Tengo_La_Camisa_Negra-world76.spcs.bio.wav',
 'Key_Glock_-_Penny_(musmore.org).wav',
 'JAY1 - Bella (cdn.mp3id.me).wav',
 'liaze-equal-2003-mp3.wav',
 'Noze_MC_-_Ustrojj_Destrojj_65009419.wav',
 'Andy_Panda_SKRIPTONIT_104-PRIVYChKA-world76.spcs.bio.wav',
 'Bandiera Rossa - Bella Ciao (cdn.mp3id.me).wav',
 'jtm-feat.-jay-warren-burnell-washburn-to-the-top-456704962.wav',
 'Måneskin - HONEY (ARE U COMING_).wav',
 'Maneskin_Lividi_Sui_Gomiti.wav',
 'nickelback_-_animals_(muztune.me).wav',
 'maneskin-the-driver-mp3.wav',
 'bruno-mars-runaway-baby-mp3.wav',
 'Artic Monkeys - Old Yellow Bricks.wav',
 'Måneskin - Zitti E Buoni.wav',
 'Central Cee - 6 For 6 (cdn.mp3id.me).wav']

In [36]:
def convert_mp3_to_wav(source_folder):
    for filename in os.listdir(source_folder):
        if filename.endswith('.mp3'):
            mp3_path = os.path.join(source_folder, filename)
            wav_filename = os.path.splitext(filename)[0] + '.wav'
            wav_path = os.path.join(source_folder, wav_filename)

            audio = AudioSegment.from_mp3(mp3_path)
            audio.export(wav_path, format='wav')
            print(f'Converted {filename} to {wav_filename}')

            os.remove(mp3_path)
            print(f'Deleted {filename}')

source_folder = '/content/gdrive/MyDrive/data/test_music/'
convert_mp3_to_wav(source_folder)

# Calculating similarities between tracks

In [29]:
def calculate_mfcc(file_path, n_mfcc=13):
    """
    Calculate the mean MFCC for an audio file.

    :param file_path: Path to the audio file
    :param n_mfcc: Number of MFCC features to extract
    :return: Mean MFCC vector
    """
    y, sr = librosa.load(file_path)

    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    return np.mean(mfcc, axis=1)

In [27]:
calculate_mfcc(target_track)

array([  6.4874864,  88.45299  , -21.442245 ,  29.450092 ,   3.2618194,
        12.359001 ,   2.059028 ,   5.909114 ,   2.8668435,   3.627637 ,
         1.730883 ,   9.999974 ,  -3.4274147], dtype=float32)

In [82]:
def rank_tracks_cosine(target_track, track_list):
    """
    Rank tracks by cosine distance to the target track.

    :param target_track: Path to the target audio file
    :param track_list: List of paths to audio files to rank
    :return: List of tracks sorted by similarity to the target track
    """
    target_mfcc = calculate_mfcc(target_track)
    track_array = np.array(track_list)
    track_mfccs = list()

    for track in track_list:
        track_mfccs.append(calculate_mfcc(source_folder + track))

    track_mfccs = np.array(track_mfccs)

    distances = np.array([cosine(target_mfcc, track_mfcc) for track_mfcc in track_mfccs])

    sorted_indices = np.argsort(distances)
    sorted_tracks = track_array[sorted_indices]

    return list(sorted_tracks)


cosine_ranked_tracks = rank_tracks_cosine(target_track, track_list)
print(f'target: {os.path.basename(target_track)}\n')
for k, v in enumerate(cosine_ranked_tracks):
    print(k, v)

target: Måneskin - Zitti E Buoni.wav

0 Måneskin - Zitti E Buoni.wav
1 Maneskin_Lividi_Sui_Gomiti.wav
2 Måneskin - HONEY (ARE U COMING_).wav
3 outloud-waiting-for-your-love_(zzz.fm).wav
4 bruno-mars-runaway-baby-mp3.wav
5 maneskin-the-driver-mp3.wav
6 RHCP - Dani California.wav
7 nickelback_-_animals_(muztune.me).wav
8 rkomi-insuperabile.wav
9 Noze_MC_-_Ustrojj_Destrojj_65009419.wav
10 Bandiera Rossa - Bella Ciao (cdn.mp3id.me).wav
11 Juanes-Tengo_La_Camisa_Negra-world76.spcs.bio.wav
12 Artic Monkeys - Old Yellow Bricks.wav
13 Key_Glock_-_Penny_(musmore.org).wav
14 Stromae - Papaoutai.wav
15 Central Cee - 6 For 6 (cdn.mp3id.me).wav
16 Andy_Panda_SKRIPTONIT_104-PRIVYChKA-world76.spcs.bio.wav
17 liaze-equal-2003-mp3.wav
18 JAY1 - Bella (cdn.mp3id.me).wav
19 jtm-feat.-jay-warren-burnell-washburn-to-the-top-456704962.wav


In [83]:
def rank_tracks_knn(target_track, track_list):
    """
    Rank tracks by cosine distance to the target track.

    :param target_track: Path to the target audio file
    :param track_list: List of paths to audio files to rank
    :return: List of tracks sorted by similarity to the target track
    """
    target_features = calculate_mfcc(target_track)
    track_array = np.array(track_list)

    features_array = np.array([calculate_mfcc(source_folder + track) for track in track_list])

    model = NearestNeighbors(n_neighbors=len(track_list), metric='euclidean')
    model.fit(features_array)

    distances, indices = model.kneighbors(target_features.reshape(1, -1))

    sorted_tracks = [track_list[x] for x in indices[0]]
    return list(sorted_tracks)


knn_ranked_tracks = rank_tracks_knn(target_track, track_list)
print(f'target: {os.path.basename(target_track)}\n')
for k, v in enumerate(knn_ranked_tracks):
    print(k, v)

target: Måneskin - Zitti E Buoni.wav

0 Måneskin - Zitti E Buoni.wav
1 Maneskin_Lividi_Sui_Gomiti.wav
2 Måneskin - HONEY (ARE U COMING_).wav
3 bruno-mars-runaway-baby-mp3.wav
4 maneskin-the-driver-mp3.wav
5 outloud-waiting-for-your-love_(zzz.fm).wav
6 nickelback_-_animals_(muztune.me).wav
7 rkomi-insuperabile.wav
8 RHCP - Dani California.wav
9 Noze_MC_-_Ustrojj_Destrojj_65009419.wav
10 Juanes-Tengo_La_Camisa_Negra-world76.spcs.bio.wav
11 Bandiera Rossa - Bella Ciao (cdn.mp3id.me).wav
12 Artic Monkeys - Old Yellow Bricks.wav
13 Key_Glock_-_Penny_(musmore.org).wav
14 Stromae - Papaoutai.wav
15 Central Cee - 6 For 6 (cdn.mp3id.me).wav
16 Andy_Panda_SKRIPTONIT_104-PRIVYChKA-world76.spcs.bio.wav
17 JAY1 - Bella (cdn.mp3id.me).wav
18 liaze-equal-2003-mp3.wav
19 jtm-feat.-jay-warren-burnell-washburn-to-the-top-456704962.wav


In [81]:
# Test set where target is 'Måneskin - Zitti E Buoni.wav'
relevant_tracks = [
    'Måneskin - Zitti E Buoni.wav',
    'Maneskin_Lividi_Sui_Gomiti.wav',
    'Måneskin - HONEY (ARE U COMING_).wav',
    'nickelback_-_animals_(muztune.me).wav',
    'maneskin-the-driver-mp3.wav',
    'RHCP - Dani California.wav',
    'rkomi-insuperabile.wav',
    'outloud-waiting-for-your-love_(zzz.fm).wav',
    'Artic Monkeys - Old Yellow Bricks.wav',
    'Noze_MC_-_Ustrojj_Destrojj_65009419.wav',
    'Juanes-Tengo_La_Camisa_Negra-world76.spcs.bio.wav',
    'Stromae - Papaoutai.wav',
    'jtm-feat.-jay-warren-burnell-washburn-to-the-top-456704962.wav',
    'Bandiera Rossa - Bella Ciao (cdn.mp3id.me).wav',
    'bruno-mars-runaway-baby-mp3.wav',
    'Andy_Panda_SKRIPTONIT_104-PRIVYChKA-world76.spcs.bio.wav',
    'liaze-equal-2003-mp3.wav',
    'JAY1 - Bella (cdn.mp3id.me).wav',
    'Key_Glock_-_Penny_(musmore.org).wav',
    'Central Cee - 6 For 6 (cdn.mp3id.me).wav'
]

In [107]:
import numpy as np
from scipy.stats import kendalltau, spearmanr
from sklearn.metrics import ndcg_score

# Функция для получения рангов
def get_ranks(tracks, ranking):
    return [ranking.index(track) + 1 for track in tracks]

# Функция для расчета метрик
def calculate_metrics(true_ranking, predicted_ranking):
    true_ranks = get_ranks(true_ranking, true_ranking)
    predicted_ranks = get_ranks(true_ranking, predicted_ranking)

    # Kendall's Tau
    tau, _ = kendalltau(true_ranks, predicted_ranks) # измерения степени согласованности между двумя ранжированными переменными

    # Spearman's Rank Correlation Coefficient
    spearman, _ = spearmanr(true_ranks, predicted_ranks) # измерения степени монотонной связи между двумя переменными

    # Mean Reciprocal Rank (MRR)
    # MRR оценивает качество ранжирования путем вычисления среднего обратного ранга первого правильного результата
    def mean_reciprocal_rank(true_ranks, predicted_ranks):
        rank_dict = {rank: predicted_ranks.index(rank) + 1 for rank in true_ranks}
        reciprocal_ranks = [1 / rank_dict[rank] for rank in true_ranks]
        return np.mean(reciprocal_ranks)

    mrr = mean_reciprocal_rank(true_ranks, predicted_ranks)

    # Normalized Discounted Cumulative Gain (NDCG)
    # NDCG измеряет качество ранжирования на основе релевантности и порядка элементов.
    def ndcg(true_ranks, predicted_ranks, k):
        dcg = 0
        idcg = 0
        for i in range(k):
            dcg += (2 ** true_ranks[i] - 1) / np.log2(i + 2)
        true_ranks.sort(reverse=True)
        for i in range(k):
            idcg += (2 ** true_ranks[i] - 1) / np.log2(i + 2)
        return dcg / idcg

    ndcg_value = ndcg(predicted_ranks, true_ranks, len(true_ranks))

    return {
        "Kendall's Tau": tau,
        "Spearman's Rank Correlation Coefficient": spearman,
        "Mean Reciprocal Rank (MRR)": mrr,
        "Normalized Discounted Cumulative Gain (NDCG)": ndcg_value
    }

# Расчет метрик для cosine ранжирования
cosine_metrics = calculate_metrics(relevant_tracks, cosine_ranked_tracks)
print("Cosine Metrics:")
for metric, value in cosine_metrics.items():
    print(f"{metric}: {value}")

# Расчет метрик для knn ранжирования
knn_metrics = calculate_metrics(relevant_tracks, knn_ranked_tracks)
print("\nKNN Metrics:")
for metric, value in knn_metrics.items():
    print(f"{metric}: {value}")


Cosine Metrics:
Kendall's Tau: 0.6421052631578947
Spearman's Rank Correlation Coefficient: 0.7999999999999999
Mean Reciprocal Rank (MRR): 0.1798869828571841
Normalized Discounted Cumulative Gain (NDCG): 0.32590829803822585

KNN Metrics:
Kendall's Tau: 0.6736842105263158
Spearman's Rank Correlation Coefficient: 0.806015037593985
Mean Reciprocal Rank (MRR): 0.17988698285718405
Normalized Discounted Cumulative Gain (NDCG): 0.32418442662837393


# Вывод
#### Мы можем заметить, что метрики практически не различаются, поэтому мы можем взять и KNN и косинусное расстояние. Мы выбрали косинусное расстояние, так как оно менее требовательно и при большом количестве данных нагрузка на сервис будет не такой большой
