In [9]:
import os
import pandas as pd
import essentia
import essentia.standard as es
# from essentia.standard import MonoLoader, TensorflowPredictEffnetDiscogs, TensorflowPredict2D
import re


client_id = os.getenv("SPOTIFY_CLIENT_ID")
secret_key = os.getenv("SPOTIFY_SECRET_KEY")
df = pd.read_csv("spotify_tracks_50.csv")
print(df.head())



                              track_uri              album_name  \
0  spotify:track:4pLwZjInHj3SimIyN9SnOz         Dangerous Woman   
1  spotify:track:37f4ITSlgPX81ad2EvmVQr                Wildfire   
2  spotify:track:6i0V12jOa3mr6uu4WYhUBr                Heathens   
3  spotify:track:2DpCdPMg1BADE4HDnxt3Rd  Sit Still, Look Pretty   
4  spotify:track:6Knv6wdA0luoMUuuoYi2i1                MY HOUSE   

                              album_uri        artist_name  \
0  spotify:album:4lVR2fg3DAUQpGVJ6DciHW      Ariana Grande   
1  spotify:album:0mFDIOqypzHp6Xd0el1hoT     Rachel Platten   
2  spotify:album:3J8W9AOjQhnBLCX33m3atT  Twenty One Pilots   
3  spotify:album:2cE2eOy7alOZHpuelJEV8Q               Daya   
4  spotify:album:5lkNnHVlnCCCV304t89wOH           Flo Rida   

                              artist_uri  duration_ms  pos  \
0  spotify:artist:66CXWjxzNUsdJxJ2JdwvnR       226160    0   
1  spotify:artist:3QLIkT4rD2FMusaqmkepbq       204013    1   
2  spotify:artist:3YQKmKGau1PzlVlkL1io

In [19]:
import os
import requests
import pandas as pd
import essentia
import essentia.standard as es
import urllib.parse

# ----- Configuration -----
CSV_INPUT = "spotify_tracks_50.csv"  # your CSV file with metadata
PREVIEW_DIR = "previews"             # directory to store downloaded audio previews
OUTPUT_CSV = "tracks_with_query_urls.csv"  # CSV file that will contain query URLs

# Ensure the previews directory exists
os.makedirs(PREVIEW_DIR, exist_ok=True)

def clean_filename(name):
    name = name.lower()

    name = name.replace('-', ' ')

    name = re.sub(r'[^a-z0-9 ]', '', name)
    
    name = re.sub(r'\s+', ' ', name).strip()

    return name


# ----- Step 1: Functions to query Deezer API -----
def get_deezer_track_id(track):
    norm_track = clean_filename(track)

    # query = f'artist:"{artist}" track:"{track}" album:"{album}"'
    query = f'track:"{track}"'

    encoded_query = urllib.parse.quote(query)
    url = f"https://api.deezer.com/search?q={encoded_query}"
    print("Query URL:", url)  # Debug: show URL
    response = requests.get(url)
    
    if response.status_code == 200:
        data = response.json()
        if data.get("data"):
            return data["data"][0]["id"], url
    return None, url


def get_deezer_preview_url(track_id):
    """
    Given a Deezer track id, retrieve the track details and return the preview URL.
    """
    url = f"https://api.deezer.com/track/{track_id}"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        return data.get("preview")  # preview is a 30-second MP3 URL
    return None

def download_audio(url, save_path):
    """
    Downloads the audio file from the given URL and saves it to save_path.
    """
    response = requests.get(url)
    if response.status_code == 200:
        with open(save_path, "wb") as f:
            f.write(response.content)
        return save_path
    return None

# ----- Main Processing Pipeline -----
def process_tracks():
    df = pd.read_csv(CSV_INPUT)
    
    # List to hold processed track data
    results = []

    for idx, row in df.iterrows():
        artist = row["artist_name"]
        track = row["track_name"]
        album = row["album_name"]

        print(f"Processing: Artist='{artist}' | Track='{track}' | Album='{album}'")

        deezer_id, query_url = get_deezer_track_id(track)
        if not deezer_id:
            print(f"  -> Deezer ID not found for {track} by {artist}")
            row_dict = row.to_dict()
            row_dict["url"] = query_url
            results.append(row_dict)
            continue  

        preview_url = get_deezer_preview_url(deezer_id)
        if not preview_url:
            print(f"  -> Preview URL not available for Deezer ID {deezer_id}")
            continue

        # Use the sanitized track name for the filename
        safe_track = clean_filename(track)
        preview_filename = os.path.join(PREVIEW_DIR, f"{safe_track}.mp3")
        downloaded_file = download_audio(preview_url, preview_filename)
        if not downloaded_file:
            print(f"  -> Failed to download preview for Deezer ID {deezer_id}")
            continue

        # features = extract_audio_features(downloaded_file)
        track_data = row.to_dict()
        track_data.update({
            "deezer_id": deezer_id,
            "preview_url": preview_url,
            "audio_filepath": downloaded_file,
            "query_url" : query_url
        })
        # for key, value in features.items():
        #     track_data[key] = value
        results.append(track_data)

        # Create a DataFrame with all results and save to CSV
    if results:
        results_df = pd.DataFrame(results)
        results_df.to_csv(OUTPUT_CSV, index=False)
        print(f"Processing complete. Results saved to {OUTPUT_CSV}")
    else:
        print("No tracks were processed.")


if __name__ == "__main__":
    process_tracks()

# # ----- Step 2: Essentia Feature Extraction ----- (Not working Yet)
# def extract_audio_features(audio_filepath):
#     """
#     Loads an audio file using Essentia and extracts sample features.
#     Here we extract tempo (BPM) and MFCC statistics as an example.
#     You can extend this function to include additional features (instrumental, moods, etc.)
#     """
#     try:
#         # Load audio; ensure sampleRate is set as needed (here 16000 Hz)
#         audio = es.MonoLoader(filename=audio_filepath, sampleRate=16000, resampleQuality=4)()
        
#         # You can replace or extend the feature extraction with additional Essentia algorithms for “Instrumental, Moods and Context” as needed.)
#         # Example 1: Extract tempo (BPM)
#         rhythmExtractor = es.RhythmExtractor2013(method="multifeature")
#         #bpm, beats, estimates, bpm_confidence = rhythmExtractor(audio)
#         bpm, bpm_confidence = rhythmExtractor(audio)

        
#         # Example 2: Extract MFCC coefficients
#         mfcc_extractor = es.MFCC(numberCoeffs=13)
#         # mfcc_coeffs, mfcc_bands = mfcc_extractor(audio)
#         mfcc_coeffs = mfcc_extractor(audio)
#         mfcc_mean = mfcc_coeffs.mean(axis=1).tolist()
#         mfcc_std = mfcc_coeffs.std(axis=1).tolist()
        
#         # Package features into a dictionary
#         features = {
#             "bpm": bpm,
#             "bpm_confidence": bpm_confidence,
#             "mfcc_mean": mfcc_mean,
#             "mfcc_std": mfcc_std,
#         }
#         return features
#     except Exception as e:
#         print(f"Error processing {audio_filepath}: {e}")
#         return {}

    
    # Iterate over each row in the CSV
#     for idx, row in df.iterrows():
#         artist = row["artist_name"]
#         track = row["track_name"]
#         album = row["album_name"]
        
#         print(f"Processing: Artist='{artist}' | Track='{track}' | Album='{album}'")
        
#         # 1. Get Deezer track id using search query
#         deezer_id = get_deezer_track_id(artist, track, album)
#         if not deezer_id:
#             print(f"  -> Deezer ID not found for {track} by {artist}")
#             continue  
        
#         # 2. Get the preview URL using the track id
#         preview_url = get_deezer_preview_url(deezer_id)
#         if not preview_url:
#             print(f"  -> Preview URL not available for Deezer ID {deezer_id}")
#             continue
        
#         # 3. Download the preview audio file
#         preview_filename = os.path.join(PREVIEW_DIR, f"{deezer_id}.mp3")
#         downloaded_file = download_audio(preview_url, preview_filename)
#         if not downloaded_file:
#             print(f"  -> Failed to download preview for Deezer ID {deezer_id}")
#             continue
        
#         # 4. Use Essentia to extract audio features
#         features = extract_audio_features(downloaded_file)
        
#         # Combine the original metadata with extracted features and Deezer info
#         track_data = row.to_dict()  # original row data
#         track_data.update({
#             "deezer_id": deezer_id,
#             "preview_url": preview_url,
#             "audio_filepath": downloaded_file,
#         })
#         # Flatten the features (you may want to store MFCC mean/std as JSON strings or separate columns)
#         for key, value in features.items():
#             track_data[key] = value
        
#         results.append(track_data)
    
#     # Create a DataFrame with all results and save to CSV
#     if results:
#         results_df = pd.DataFrame(results)
#         results_df.to_csv(OUTPUT_CSV, index=False)
#         print(f"Processing complete. Results saved to {OUTPUT_CSV}")
#     else:
#         print("No tracks were processed.")

# if __name__ == "__main__":
#     process_tracks()


Processing: Artist='Ariana Grande' | Track='Side To Side' | Album='Dangerous Woman'
Query URL: https://api.deezer.com/search?q=track%3A%22Side%20To%20Side%22
Processing: Artist='Rachel Platten' | Track='Fight Song' | Album='Wildfire'
Query URL: https://api.deezer.com/search?q=track%3A%22Fight%20Song%22
Processing: Artist='Twenty One Pilots' | Track='Heathens' | Album='Heathens'
Query URL: https://api.deezer.com/search?q=track%3A%22Heathens%22
Processing: Artist='Daya' | Track='Sit Still, Look Pretty' | Album='Sit Still, Look Pretty'
Query URL: https://api.deezer.com/search?q=track%3A%22Sit%20Still%2C%20Look%20Pretty%22
Processing: Artist='Flo Rida' | Track='My House' | Album='MY HOUSE'
Query URL: https://api.deezer.com/search?q=track%3A%22My%20House%22
Processing: Artist='Fifth Harmony' | Track='Work from Home' | Album='7/27'
Query URL: https://api.deezer.com/search?q=track%3A%22Work%20from%20Home%22
Processing: Artist='Moon Taxi' | Track='Morocco' | Album='Mountains Beaches Cities'
Qu

In [5]:
from essentia.standard import MonoLoader, TensorflowPredictMusiCNN, TensorflowPredict2D

def predict_arousal_valence(audio_filepath):
    # Load audio from your file (ensure it's 16000 Hz)
    audio = MonoLoader(filename=audio_filepath, sampleRate=16000, resampleQuality=4)()
    
    # Obtain embeddings using the MusiCNN model
    embedding_model = TensorflowPredictMusiCNN(
        graphFilename="msd-musicnn-1.pb", 
        output="model/dense/BiasAdd"
    )
    embeddings = embedding_model(audio)
    
    # Predict arousal and valence using the DEAM regression model
    regression_model = TensorflowPredict2D(
        graphFilename="deam-msd-musicnn-2.pb", 
        output="model/Identity"
    )
    predictions = regression_model(embeddings)
    
    return predictions  # This returns a 2-element vector: [arousal, valence]

# Example usage:
audio_file = "previews/124995668.mp3"  # Replace with your own file path
pred = predict_arousal_valence(audio_file)
print("Predicted arousal and valence:", pred)



ImportError: cannot import name 'TensorflowPredictMusiCNN' from 'essentia.standard' (/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/essentia/standard.py)