In [None]:
# make sure to run this on virtual env with python 3.8.20
! pip install pandas scikit-learn torch essentia



In [None]:
import os
import json
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import essentia.standard as es
import torch.nn.functional as F
from torch.utils.data import Dataset


# creating encoder class to use later
class SongEncoder(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(SongEncoder, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        self.relu = nn.GELU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# after i save features from song into json, i grab specific ones to embed
def extract_features_from_json(json_file):

    # open json file
    with open(json_file, "r") as f:
        data = json.load(f)

    # pick relevant features and return numpy array
    features = [
        data["lowlevel"]["average_loudness"],
        data["lowlevel"]["dissonance"]["mean"],
        data["lowlevel"]["dynamic_complexity"],
        data["lowlevel"]["spectral_centroid"]["mean"],
        data["lowlevel"]["spectral_flux"]["mean"],
        data["lowlevel"]["zerocrossingrate"]["mean"],
        *data["lowlevel"]["barkbands"]["mean"],  
        *data["lowlevel"]["mfcc"]["mean"],     
        data["rhythm"]["bpm"],
        data["rhythm"]["beats_count"],
        data["rhythm"]["danceability"],
        data["rhythm"]["onset_rate"],
        data["tonal"]["chords_strength"]["mean"],
        data["tonal"]["hpcp_crest"]["mean"],
        data["tonal"]["hpcp_entropy"]["mean"],
        data["tonal"]["key_edma"]["strength"],
        data["tonal"]["key_krumhansl"]["strength"],
        data["tonal"]["key_temperley"]["strength"],
        data["metadata"]["audio_properties"]["length"],
        data["metadata"]["audio_properties"]["sample_rate"]
    ]

    feature_vector = np.array(features)
    return feature_vector

def save_vector_to_csv(vector, file_path):

    # take the vector and save it to a csv file
    df = pd.DataFrame([vector])
    df.to_csv(file_path, index=False, header=False)

    # also save it to song_vector to check
    vector_csv = os.path.join("song_vector", f"{artist} - {song_name}.csv")
    df.to_csv(vector_csv, index=False, header=False)

    print(f"Feature vector saved to: {file_path}")

def encode_and_save_song_vectors(csv_file, encoder):

    # take the csv file with the vector in it, then encode it
    df = pd.read_csv(csv_file, header=None)
    song_vector = df.values.flatten().astype(np.float32)

    with torch.no_grad():
        encoded_vector = encoder(torch.tensor(song_vector)).numpy()

    save_vector_to_csv(encoded_vector, csv_file)

def extract_encode(artist, song_name, audio_folder="audio", json_folder="song_features", csv_folder="song_csv", encoder=None): 
    audio_file = os.path.join(audio_folder, f"{artist} - {song_name}.wav")
    json_file = os.path.join(json_folder, f"{artist} - {song_name}.json")
    csv_file = os.path.join(csv_folder, f"{artist} - {song_name}.csv")

    features, _ = es.MusicExtractor(
        lowlevelStats=['mean', 'stdev'],
        rhythmStats=['mean', 'stdev'],
        tonalStats=['mean', 'stdev'])(audio_file)

    # features to json -> extract to feature vector -> save to csv
    es.YamlOutput(filename=json_file, format='json')(features)
    feature_vector = extract_features_from_json(json_file)
    save_vector_to_csv(feature_vector, csv_file)

    # encode and save to csv
    if encoder:
        encode_and_save_song_vectors(csv_file, encoder)

def mass_encoder(csv_file, audio_folder="audio", json_folder="song_features", csv_folder="song_csv", encoder=None):
  
    # read csv and check if required columns exist
    pair_data = pd.read_csv(csv_file)
    if not all(col in pair_data.columns for col in ["id", "music_artist", "music_title"]):
        raise ValueError("CSV file must contain 'id', 'music_artist', and 'music_title' columns.")
    
    # filter out rows with nan values in song
    pair_data = pair_data[pair_data["music_artist"].notna() & pair_data["music_title"].notna()]

    output_file = os.path.join(os.path.dirname(csv_file), "pairs_songencoded.csv")
    output_data = []
    
    # tterate through each row in the CSV file
    for index, row in pair_data.iterrows():

        artist = row["music_artist"]
        print(artist)
        song_name = row["music_title"]
        print(song_name)
        shortcode = row["shortcode"]
        image_link = row["head_image_url"]

        print(f"Processing song {index + 1}: {artist} - {song_name}")

        try:
            # extract features and encode the song
            extract_encode(
                artist=artist,
                song_name=song_name,
                audio_folder=audio_folder,
                json_folder=json_folder,
                csv_folder=csv_folder,
                encoder=encoder
            )

            song_csv_path = os.path.join(csv_folder, f"{artist} - {song_name}.csv")
            embedding_df = pd.read_csv(song_csv_path, header=None)
            embedding_vector = embedding_df.values.flatten().tolist()

            output_data.append({
                "shortcode" : shortcode,
                "link" : image_link,
                "embedding" : embedding_vector
            })

            pd.DataFrame(output_data).to_csv(output_file, index=False)

        except Exception as e:
            print(f"Failed to process {artist} - {song_name}: {e}")




In [24]:
if __name__ == "__main__":
    # path to csv
    csv_file = "meowmeow.csv" 

    input_dim = 58 
    hidden_dim = 128
    output_dim = 128
    encoder = SongEncoder(input_dim, hidden_dim, output_dim)

    # idk wtf this does lol
    model_path = "song_encoder.pth"
    if os.path.exists(model_path):
        encoder.load_state_dict(torch.load(model_path))
        print(f"Loaded pre-trained encoder from {model_path}")

    # use goat function and boom boom done (make sure to download songs in spotdl.ipynb first)
    mass_encoder(csv_file=csv_file, encoder=encoder)

Processing song 1: BigXthaPlug - Mmhmm
Failed to process BigXthaPlug - Mmhmm: name 'artist' is not defined
Processing song 2: NLE Choppa, BigXthaPlug - PISTOL PACCIN


[   INFO   ] MusicExtractor: Read metadata
[   INFO   ] MusicExtractor: Compute md5 audio hash, codec, length, and EBU 128 loudness
[   INFO   ] MusicExtractor: Replay gain
[   INFO   ] MusicExtractor: Compute audio features
[   INFO   ] MusicExtractor: Compute aggregation
[   INFO   ] All done
[   INFO   ] MusicExtractor: Read metadata
[   INFO   ] MusicExtractor: Compute md5 audio hash, codec, length, and EBU 128 loudness
[   INFO   ] MusicExtractor: Replay gain
[   INFO   ] MusicExtractor: Compute audio features
[   INFO   ] MusicExtractor: Compute aggregation
[   INFO   ] All done


Failed to process NLE Choppa, BigXthaPlug - PISTOL PACCIN: name 'artist' is not defined
Processing song 3: Sam Barber - Straight and Narrow
Failed to process Sam Barber - Straight and Narrow: name 'artist' is not defined
Processing song 4: Owen Wilson - Dont Give Up On Us


[   INFO   ] MusicExtractor: Read metadata
[   INFO   ] MusicExtractor: Compute md5 audio hash, codec, length, and EBU 128 loudness
[   INFO   ] MusicExtractor: Replay gain
[   INFO   ] MusicExtractor: Compute audio features
[   INFO   ] MusicExtractor: Compute aggregation
[   INFO   ] All done
[   INFO   ] MusicExtractor: Read metadata
[   INFO   ] MusicExtractor: Compute md5 audio hash, codec, length, and EBU 128 loudness
[   INFO   ] MusicExtractor: Replay gain
[   INFO   ] MusicExtractor: Compute audio features
[   INFO   ] MusicExtractor: Compute aggregation
[   INFO   ] All done


Failed to process Owen Wilson - Dont Give Up On Us: name 'artist' is not defined
Processing song 5: BigXthaPlug, Tommy Newport - Comes & Goes (feat. Tommy Newport)
Failed to process BigXthaPlug, Tommy Newport - Comes & Goes (feat. Tommy Newport): name 'artist' is not defined
Processing song 6: Wiz Khalifa - Young, Wild & Free (feat. Bruno Mars)


[   INFO   ] MusicExtractor: Read metadata
[   INFO   ] MusicExtractor: Compute md5 audio hash, codec, length, and EBU 128 loudness
[   INFO   ] MusicExtractor: Replay gain
[   INFO   ] MusicExtractor: Compute audio features
[   INFO   ] MusicExtractor: Compute aggregation
[   INFO   ] All done
[   INFO   ] MusicExtractor: Read metadata
[   INFO   ] MusicExtractor: Compute md5 audio hash, codec, length, and EBU 128 loudness
[   INFO   ] MusicExtractor: Replay gain
[   INFO   ] MusicExtractor: Compute audio features
[   INFO   ] MusicExtractor: Compute aggregation
[   INFO   ] All done


Failed to process Wiz Khalifa - Young, Wild & Free (feat. Bruno Mars): name 'artist' is not defined
Processing song 7: Sam R Barber - As Time Passes


[   INFO   ] MusicExtractor: Read metadata
[   INFO   ] MusicExtractor: Compute md5 audio hash, codec, length, and EBU 128 loudness
[   INFO   ] MusicExtractor: Replay gain
[   INFO   ] MusicExtractor: Compute audio features
[   INFO   ] MusicExtractor: Compute aggregation
[   INFO   ] All done


KeyboardInterrupt: 