In [2]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
import glob
import os

import librosa
import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler

In [None]:
mfcc_columns = [f"mfcc_{i}" for i in range(1, 41)]
chroma_stft_columns = [f"chroma_stft_{i}" for i in range(1, 13)]
spec_contrast_columns = [f"spec_contrast_{i}" for i in range(1, 8)]
tonnetz_columns = [f"tonnetz_columns_{i}" for i in range(1, 7)]
audio_feature_names = (
    mfcc_columns + chroma_stft_columns + spec_contrast_columns + tonnetz_columns
)
column_names = ["track_id"] + audio_feature_names

In [3]:
def track_id(file):
    """Extracts a track id from an audiofile path"""
    return file.split("/")[-1].split(".")[0]


def build_dict(file, features):
    """Returns a dictionary of track_id of the file and its audio features"""
    row_dict = {"track_id": track_id(file)}
    feature_dict = {
        feature: val for (val, feature) in zip(features, audio_feature_names)
    }
    row_dict.update(feature_dict)
    return row_dict


def preprocessed_files(df, directory):
    return list(df["track_id"].map(lambda x: f"{directory}{x}.mp3"))

In [4]:
def extract_features(audio_path):
    """Define a function to extract features from an audio file"""
    y, sr = librosa.load(audio_path)

    # Mel-frequency cepstral coefficients (MFCCs)
    mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)

    # Chroma feature
    chroma_stft = np.mean(librosa.feature.chroma_stft(y=y, sr=sr).T, axis=0)

    # Spectral contrast
    spec_contrast = np.mean(
        librosa.feature.spectral_contrast(y=y, sr=sr).T, axis=0)

    # Tonnetz
    tonnetz = np.mean(librosa.feature.tonnetz(
        y=librosa.effects.harmonic(y), sr=sr).T, axis=0)

    return np.hstack((mfccs, chroma_stft, spec_contrast, tonnetz))


def recommend_songs(audio_path, knn, scaler, audio_files):
    """Find similar songs to a given song"""
    query_features = extract_features(audio_path)
    query_scaled = scaler.transform(query_features.reshape(1, -1))
    distances, indices = knn.kneighbors(query_scaled)

    recommended_songs = [audio_files[i] for i in indices.flatten()]
    return recommended_songs, distances

In [27]:
%%time

fma_mp3_asterisk = "../data-fma/fma_small_heap/*.mp3"
csv_file = "../data/audio_features.csv"

audio_files_all = glob.glob(fma_mp3_asterisk)
audio_files_all.sort()
audio_files = audio_files_all

print_frequency = 100

df = pd.DataFrame([], columns=column_names)
for audio_file in audio_files:
    features = extract_features(audio_file)
    features_dict = build_dict(audio_file, features)
    features_df = pd.DataFrame([features_dict])
    df = pd.concat([df, features_df], ignore_index=True)
    if len(df) % print_frequency == 0:
        df.to_csv(csv_file, index=False)
        print(f"{len(df)} songs processed, csv file updated.")
df.to_csv(csv_file, index=False)

# feature_list = []
# for audio_file in audio_files:
#     features = extract_features(audio_file)
#     feature_list.append(features)
#     if len(feature_list) % print_frequency == 0:
#         print(f'{len(feature_list)} songs processed')

X = df.drop(columns=["track_id"]).to_numpy()
print("FUCK YOU>")

100 songs processed, csv file updated.
200 songs processed, csv file updated.
300 songs processed, csv file updated.
400 songs processed, csv file updated.


[src/libmpg123/layer3.c:INT123_do_layer3():1841] error: dequantization failed!


500 songs processed, csv file updated.
600 songs processed, csv file updated.
700 songs processed, csv file updated.
800 songs processed, csv file updated.
900 songs processed, csv file updated.


[src/libmpg123/layer3.c:INT123_do_layer3():1801] error: dequantization failed!


1000 songs processed, csv file updated.
1100 songs processed, csv file updated.


[src/libmpg123/layer3.c:INT123_do_layer3():1801] error: dequantization failed!


1200 songs processed, csv file updated.
1300 songs processed, csv file updated.
1400 songs processed, csv file updated.
1500 songs processed, csv file updated.
1600 songs processed, csv file updated.


  return pitch_tuning(


1700 songs processed, csv file updated.
1800 songs processed, csv file updated.
1900 songs processed, csv file updated.
2000 songs processed, csv file updated.
2100 songs processed, csv file updated.
2200 songs processed, csv file updated.


[src/libmpg123/layer3.c:INT123_do_layer3():1773] error: part2_3_length (3360) too large for available bit count (3240)
[src/libmpg123/layer3.c:INT123_do_layer3():1773] error: part2_3_length (3328) too large for available bit count (3240)


2300 songs processed, csv file updated.
2400 songs processed, csv file updated.
2500 songs processed, csv file updated.
2600 songs processed, csv file updated.
2700 songs processed, csv file updated.
2800 songs processed, csv file updated.
2900 songs processed, csv file updated.
3000 songs processed, csv file updated.


  return pitch_tuning(


3100 songs processed, csv file updated.
3200 songs processed, csv file updated.
3300 songs processed, csv file updated.
3400 songs processed, csv file updated.
3500 songs processed, csv file updated.
3600 songs processed, csv file updated.
3700 songs processed, csv file updated.
3800 songs processed, csv file updated.
3900 songs processed, csv file updated.
4000 songs processed, csv file updated.
4100 songs processed, csv file updated.


KeyboardInterrupt: 

In [28]:
df = pd.read_csv(csv_file)
X = df.drop(columns=["track_id"]).to_numpy()

In [13]:
audio_files_all

[]

In [30]:
# Normalize the dataset and train a nearest neighbors model:

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Fit a k-nearest neighbors model
knn = NearestNeighbors(n_neighbors=10, metric="cosine")
knn.fit(X_scaled)

In [33]:
seed_song_path = "../data-fma/fma_small_heap/000213.mp3"
recommended_songs, distances = recommend_songs(seed_song_path, knn, scaler, audio_files)

for idx, song in enumerate(recommended_songs):
    print(f"Song {idx+1}: {song}, Distance: {distances[0][idx]}")

Song 1: ../data-fma/fma_small_heap/000213.mp3, Distance: 2.220446049250313e-16
Song 2: ../data-fma/fma_small_heap/051776.mp3, Distance: 0.15927701486898205
Song 3: ../data-fma/fma_small_heap/000204.mp3, Distance: 0.30269238447090097
Song 4: ../data-fma/fma_small_heap/070873.mp3, Distance: 0.32007755236248014
Song 5: ../data-fma/fma_small_heap/057628.mp3, Distance: 0.3581571372032304
Song 6: ../data-fma/fma_small_heap/038833.mp3, Distance: 0.37162346694643067
Song 7: ../data-fma/fma_small_heap/006762.mp3, Distance: 0.3764236733782712
Song 8: ../data-fma/fma_small_heap/059677.mp3, Distance: 0.395018447755215
Song 9: ../data-fma/fma_small_heap/084155.mp3, Distance: 0.39661573441964626
Song 10: ../data-fma/fma_small_heap/045513.mp3, Distance: 0.4076604598107314
