In [10]:
# Import packages
import os # To handle file paths and listing directories
import pandas as pd
import numpy as np # Used to manipulate feature vectors
import librosa # To analyse audio files
import librosa.display
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity # To compute similarty between feature vectors of songs

import warnings
warnings.filterwarnings("ignore", category=UserWarning)

In [11]:
# Define the function to extract audio features
def extract_audio_features(audio_path):
    # Load the audio file
    y, sr = librosa.load(audio_path, sr=None) # y is audio signal as a NumPy aray. sr is number of samples per second
    
    # Extract Harmonic Content (Chroma CQT)
    harmonic, _ = librosa.effects.hpss(y) # Separates harmonic and percussive parts (we only use harmonic part)
    harmonic_content = np.mean(librosa.feature.chroma_cqt(y=harmonic, sr=sr), axis=1) # Extract chromatic features (tonal distribution). Average chroma values over time to get fixed-length vector
    
    # Extract Frequency Distribution (MFCCs)
    mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).T, axis=0) # MFCC describe timbre of audio, extract 13 coefficients and averages it
    
    # Extract Spectrogram Data (Mel Spectrogram)
    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr) # Extract energy distribution and converts it to decibel scale
    mel_spectrogram_db = np.mean(librosa.power_to_db(mel_spectrogram, ref=np.max), axis=1)  # Averaged across time

    # Apply Short-Time Fourier Transform (STFT)
    stft = np.abs(librosa.stft(y)) # Capture time frequency representation of signal. Only take absolute magnitude

    # Extract STFT-based features
    spectral_centroid = np.mean(librosa.feature.spectral_centroid(S=stft, sr=sr)) # Represents "center of mass" of spectrum (perceives pitch brightness)
    spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(S=stft, sr=sr)) # Measures how spread out the frequencies are
    spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(S=stft, sr=sr)) # Identifies frequencies below which most of the energy is concentrated

    # Combine all features into a single vector
    feature_vector = np.concatenate((
        harmonic_content, mfccs, mel_spectrogram_db, 
        [spectral_centroid, spectral_bandwidth, spectral_rolloff]
    ))

    return feature_vector

In [12]:
# Process all songs in the "previews" folder. WARNINGS WILL APPEAR BUT JUST IGNORE!!
song_features = {}
previews_folder = "../previews"

for filename in os.listdir(previews_folder):
    if filename.endswith(".mp3"):
        file_path = os.path.join(previews_folder, filename)
        song_features[filename] = extract_audio_features(file_path)



In [13]:
# Define function to find top 5 similar songs using cosine similarity
def find_similar_songs(query_song, song_features, top_n=5):
    if query_song not in song_features: # validate query song, but probably need some cleaning process to ensure search query functionality will always return a result. maybe take in .mp3 as input then compare?
        print("Song not found in dataset.")
        return []
    
    query_features = song_features[query_song].reshape(1, -1)
    similarities = {}
    
    for song, features in song_features.items():
        if song != query_song:
            similarities[song] = cosine_similarity(query_features, features.reshape(1, -1))[0][0]
    
    # Sort songs by similarity score in descending order
    sorted_songs = sorted(similarities.items(), key=lambda x: x[1], reverse=True)
    
    return sorted_songs[:top_n]

In [14]:
# Tester. Value beside represents how many % similar it is (probably need to modify to be more specific / for PCA in the future)
query_song = "castle on the hill.mp3"
similar_songs = find_similar_songs(query_song, song_features)
print("Top 5 similar songs:", similar_songs)

Song not found in dataset.
Top 5 similar songs: []


In [15]:
# For reference: `previews` folder how the song_features dictionary looks like
song_features

{"Somethin' I'm Good At.mp3": array([ 7.20870674e-01,  5.23503244e-01,  4.95643556e-01,  4.40533608e-01,
         4.63435709e-01,  6.81975603e-01,  3.31219882e-01,  3.88251483e-01,
         3.26843083e-01,  4.10234123e-01,  5.20591974e-01,  3.99469465e-01,
        -6.87751007e+01,  1.56782639e+02, -4.80990105e+01,  5.33729782e+01,
        -1.58032084e+01,  2.67579517e+01, -7.86018085e+00,  1.18456755e+01,
        -5.09688520e+00,  4.37390614e+00,  2.11274290e+00,  3.72283053e+00,
         3.02098608e+00, -2.16198330e+01, -1.52685041e+01, -1.65194550e+01,
        -1.82181187e+01, -1.85633945e+01, -1.88069096e+01, -1.96626282e+01,
        -2.19554386e+01, -2.36391373e+01, -2.64926624e+01, -2.39906063e+01,
        -2.59097290e+01, -2.81402836e+01, -2.56318893e+01, -2.53874493e+01,
        -2.47186127e+01, -2.53054428e+01, -2.57456360e+01, -2.76084766e+01,
        -2.78720760e+01, -2.68836861e+01, -2.52728882e+01, -2.56323929e+01,
        -3.06267414e+01, -2.92192459e+01, -3.10041676e+01, 