In [11]:
# Import packages
import os # To handle file paths and listing directories
import pandas as pd
import numpy as np # Used to manipulate feature vectors
import librosa # To analyse audio files
import librosa.display
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity # To compute similarty between feature vectors of songs

import warnings
warnings.filterwarnings("ignore", category=UserWarning)

In [12]:
# Define the function to extract audio features
def extract_audio_features(audio_path):
    # Load the audio file
    y, sr = librosa.load(audio_path, sr=None) # y is audio signal as a NumPy aray. sr is number of samples per second
    
    # Extract Harmonic Content (Chroma CQT)
    harmonic, _ = librosa.effects.hpss(y) # Separates harmonic and percussive parts (we only use harmonic part)
    harmonic_content = np.mean(librosa.feature.chroma_cqt(y=harmonic, sr=sr), axis=1) # Extract chromatic features (tonal distribution). Average chroma values over time to get fixed-length vector
    
    # Extract Frequency Distribution (MFCCs)
    mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).T, axis=0) # MFCC describe timbre of audio, extract 13 coefficients and averages it
    
    # Extract Spectrogram Data (Mel Spectrogram)
    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr) # Extract energy distribution and converts it to decibel scale
    mel_spectrogram_db = np.mean(librosa.power_to_db(mel_spectrogram, ref=np.max), axis=1)  # Averaged across time

    # Apply Short-Time Fourier Transform (STFT)
    stft = np.abs(librosa.stft(y)) # Capture time frequency representation of signal. Only take absolute magnitude

    # Extract STFT-based features
    spectral_centroid = np.mean(librosa.feature.spectral_centroid(S=stft, sr=sr)) # Represents "center of mass" of spectrum (perceives pitch brightness)
    spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(S=stft, sr=sr)) # Measures how spread out the frequencies are
    spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(S=stft, sr=sr)) # Identifies frequencies below which most of the energy is concentrated

    # Combine all features into a single vector
    feature_vector = np.concatenate((
        harmonic_content, mfccs, mel_spectrogram_db, 
        [spectral_centroid, spectral_bandwidth, spectral_rolloff]
    ))

    return feature_vector

In [5]:
# Process all songs in the "previews" folder. WARNINGS WILL APPEAR BUT JUST IGNORE!!
song_features = {}
previews_folder = "previews"

for filename in os.listdir(previews_folder):
    if filename.endswith(".mp3"):
        file_path = os.path.join(previews_folder, filename)
        song_features[filename] = extract_audio_features(file_path)



In [17]:
# Define function to find top 5 similar songs using cosine similarity
def find_similar_songs(query_song, song_features, top_n=5):
    if query_song not in song_features: # validate query song, but probably need some cleaning process to ensure search query functionality will always return a result. maybe take in .mp3 as input then compare?
        print("Song not found in dataset.")
        return []
    
    query_features = song_features[query_song].reshape(1, -1)
    similarities = {}
    
    for song, features in song_features.items():
        if song != query_song:
            similarities[song] = cosine_similarity(query_features, features.reshape(1, -1))[0][0]
    
    # Sort songs by similarity score in descending order
    sorted_songs = sorted(similarities.items(), key=lambda x: x[1], reverse=True)
    
    return sorted_songs[:top_n]

In [19]:
# Tester. Value beside represents how many % similar it is (probably need to modify to be more specific / for PCA in the future)
query_song = "castle on the hill.mp3"
similar_songs = find_similar_songs(query_song, song_features)
print("Top 5 similar songs:", similar_songs)

Top 5 similar songs: [('roses.mp3', 0.9992577771797482), ('the new black live unplugged.mp3', 0.998930054692032), ('suspicious live unplugged.mp3', 0.998790714754431), ('break up with him.mp3', 0.9987561795125205), ('cheap thrills.mp3', 0.9987311046714963)]


In [21]:
# For reference: `previews` folder how the song_features dictionary looks like
song_features

{'the new black live unplugged.mp3': array([ 2.06801474e-01,  2.36583576e-01,  3.11538577e-01,  2.76813537e-01,
         8.54273379e-01,  3.38451564e-01,  2.49412507e-01,  2.64905155e-01,
         3.47331315e-01,  5.16974270e-01,  2.67785311e-01,  4.05870289e-01,
        -2.57380463e+02,  1.78133560e+02, -2.32793198e+01,  3.40620117e+01,
         1.22285833e+01,  6.02745199e+00, -9.55065536e+00, -2.85365224e+00,
        -3.33963704e+00, -1.44317460e+00, -4.98096752e+00, -2.98136950e+00,
        -6.46858215e+00, -4.29398308e+01, -2.94289913e+01, -2.52821999e+01,
        -2.54738045e+01, -2.40433044e+01, -2.32110901e+01, -2.28718472e+01,
        -2.47230244e+01, -2.68402615e+01, -2.30596733e+01, -2.12279644e+01,
        -2.72166100e+01, -2.66917534e+01, -2.48898125e+01, -2.93657913e+01,
        -2.86310711e+01, -3.13138752e+01, -3.14800301e+01, -3.28502998e+01,
        -3.46926384e+01, -3.25446014e+01, -3.28931084e+01, -3.54918022e+01,
        -3.46207809e+01, -3.34859695e+01, -3.7304252