# Workshop #2 :
## Reccobeats API - Extraction

------------------------------------------------------------

https://reccobeats.com/docs/apis/extract-audio-features

In [2]:
import os
import sys
import re
import subprocess
import requests
import pandas as pd
import logging
import json
from tqdm import tqdm
import yt_dlp

sys.path.append(os.path.abspath('../'))
from src.params import Params
from src.client import DatabaseClient
from src.logging_config import setup_logging

In [12]:
setup_logging()

# 🎯 Cargar canciones relevantes del CSV


In [13]:
df = pd.read_csv("../data/intermediate/grammys.csv")

# Filter categories
filtered_df = df[df['normalized_category'].isin(['Song Of The Year', 'Record Of The Year'])]

# Count how many rows there are
total_songs = len(filtered_df)
logging.info(f"Total relevant songs: {total_songs}")


2025-04-10 02:18:28,828 - INFO - root - Total relevant songs: 124


In [14]:
AUDIO_DIR = "../data/audio_files"

In [15]:
def safe_filename(title):
    return re.sub(r'[^\w\-_\(\)\s]', '', title).replace(" ", "_")

 Función para descargar audio desde YouTube como MP3

In [16]:
def download_audio(query, output_dir=AUDIO_DIR):
    os.makedirs(output_dir, exist_ok=True)
    safe_name = safe_filename(query)
    output_path = os.path.join(output_dir, f"{safe_name}.%(ext)s")

    ydl_opts = {
        'format': 'bestaudio/best',
        'noplaylist': True,
        'quiet': True,
        'outtmpl': output_path,
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '192',
        }],
    }

    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([f"ytsearch1:{query}"])
        final_path = os.path.join(output_dir, f"{safe_name}.mp3")
        return final_path if os.path.exists(final_path) else None
    except Exception as e:
        print(f"Error wwhile downloading {query} con yt_dlp: {e}")
        return None

 ✂️ Recortar audio a 30s

In [17]:
def trim_audio(audio_path, output_dir="../data/audio_files/trimmed"):
    os.makedirs(output_dir, exist_ok=True)
    base_name = os.path.basename(audio_path)
    trimmed_path = os.path.join(output_dir, f"{os.path.splitext(base_name)[0]}_trimmed.mp3")

    try:
        subprocess.run([
            "ffmpeg", "-y", "-i", audio_path,
            "-t", "30", "-acodec", "copy", trimmed_path
        ], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

        if os.path.exists(trimmed_path) and os.path.getsize(trimmed_path) > 0:
            return trimmed_path
        else:
            return None
    except Exception as e:
        logging.error(f"Error while trimming {audio_path}: {e}")
        return None

# 🧠 Enviar archivo a la API de ReccoBeats

In [18]:
def analyze_with_reccobeats(trimmed_path):
    try:
        with open(trimmed_path, 'rb') as file:
            files = {
                'audioFile': (os.path.basename(trimmed_path), file, 'audio/mpeg')
            }

            headers = {
                'Accept': 'application/json'
            }

            response = requests.post(
                "https://api.reccobeats.com/v1/analysis/audio-features",
                files=files,
                headers=headers
            )

            if response.status_code == 200:
                return response.json(), None
            else:
                return None, f"{response.status_code} {response.reason}"

    except Exception as e:
        return None, str(e)

# 🚀 Procesar

In [20]:
results = []

for _, row in tqdm(filtered_df.iterrows(), total=len(filtered_df), desc="Analizyng with ReccoBeats"):
    nominee = row["nominee"]
    filename = safe_filename(nominee) + ".mp3"
    audio_path = os.path.join(AUDIO_DIR, filename)

    if not os.path.exists(audio_path):
        audio_path = download_audio(nominee)

    if audio_path and os.path.exists(audio_path):
        trimmed = trim_audio(audio_path)
        if trimmed:
            features, error = analyze_with_reccobeats(trimmed)
            os.remove(trimmed)
        else:
            features, error = None, "Trimmed audio not found"
    else:
        features, error = None, "Invalid file or not found"

    results.append({
        "nominee": nominee,
        "features": features,
        "error": error
    })


Analizyng with ReccoBeats:  10%|█         | 13/124 [00:36<05:12,  2.82s/it]

                                                           

Analizyng with ReccoBeats:  11%|█▏        | 14/124 [00:49<10:52,  5.94s/it]

                                                           

Analizyng with ReccoBeats:  12%|█▏        | 15/124 [01:03<15:00,  8.26s/it]

                                                           

Analizyng with ReccoBeats:  13%|█▎        | 16/124 [01:17<17:47,  9.89s/it]

                                                           

Analizyng with ReccoBeats:  15%|█▍        | 18/124 [01:33<15:11,  8.60s/it]

                                                           

Analizyng with ReccoBeats:  16%|█▌        | 20/124 [01:50<13:55,  8.03s/it]

                                                           

Analizyng with ReccoBeats:  17%|█▋        | 21/124 [02:03<16:13,  9.45s/it]

                                                           

Analizyng with ReccoBeats:  18%|█▊        | 22/124 [02:16<17:53, 10.53s/it]

                                                           

Analizyng with ReccoBeats:  19%|█▊        | 23/124 [02:29<19:09, 11.39s/it]

                                                           

Analizyng with ReccoBeats:  19%|█▉        | 24/124 [02:43<20:03, 12.04s/it]

                                                           

Analizyng with ReccoBeats:  21%|██        | 26/124 [02:58<15:24,  9.43s/it]

                                                           

Analizyng with ReccoBeats:  23%|██▎       | 28/124 [03:14<13:05,  8.18s/it]

                                                           

Analizyng with ReccoBeats:  23%|██▎       | 29/124 [03:29<16:01, 10.12s/it]

                                                           

Analizyng with ReccoBeats:  24%|██▍       | 30/124 [03:42<17:24, 11.11s/it]

                                                           

Analizyng with ReccoBeats:  25%|██▌       | 31/124 [03:54<17:27, 11.27s/it]

                                                           

Analizyng with ReccoBeats:  26%|██▌       | 32/124 [04:07<17:55, 11.69s/it]

                                                           

Analizyng with ReccoBeats:  27%|██▋       | 33/124 [04:21<18:56, 12.49s/it]

                                                           

Analizyng with ReccoBeats:  27%|██▋       | 34/124 [04:34<18:57, 12.64s/it]

                                                           

Analizyng with ReccoBeats:  29%|██▉       | 36/124 [04:49<14:03,  9.59s/it]

                                                           

Analizyng with ReccoBeats:  30%|██▉       | 37/124 [05:04<16:08, 11.14s/it]

                                                           

Analizyng with ReccoBeats:  31%|███       | 38/124 [05:16<16:32, 11.54s/it]

                                                           

Analizyng with ReccoBeats:  32%|███▏      | 40/124 [05:32<13:05,  9.35s/it]

                                                           

Analizyng with ReccoBeats:  34%|███▍      | 42/124 [05:48<11:09,  8.16s/it]

                                                           

Analizyng with ReccoBeats:  35%|███▌      | 44/124 [06:05<10:19,  7.75s/it]

                                                           

Analizyng with ReccoBeats:  37%|███▋      | 46/124 [06:20<09:28,  7.29s/it]

                                                           

Analizyng with ReccoBeats:  39%|███▊      | 48/124 [06:37<09:20,  7.38s/it]

                                                           

Analizyng with ReccoBeats:  40%|████      | 50/124 [06:54<09:12,  7.46s/it]

                                                           

Analizyng with ReccoBeats:  41%|████      | 51/124 [07:07<11:12,  9.21s/it]

                                                           

Analizyng with ReccoBeats:  42%|████▏     | 52/124 [07:19<12:03, 10.05s/it]

                                                           

Analizyng with ReccoBeats:  43%|████▎     | 53/124 [07:34<13:35, 11.48s/it]

                                                           

Analizyng with ReccoBeats:  44%|████▎     | 54/124 [07:48<14:10, 12.15s/it]

                                                           

Analizyng with ReccoBeats:  45%|████▌     | 56/124 [08:04<10:53,  9.62s/it]

                                                           

Analizyng with ReccoBeats:  47%|████▋     | 58/124 [08:21<09:18,  8.46s/it]

                                                           

Analizyng with ReccoBeats:  48%|████▊     | 59/124 [08:34<10:47,  9.97s/it]

                                                           

Analizyng with ReccoBeats:  48%|████▊     | 60/124 [08:49<12:02, 11.30s/it]

                                                           

Analizyng with ReccoBeats:  50%|█████     | 62/124 [09:10<10:42, 10.37s/it]

                                                           

Analizyng with ReccoBeats:  52%|█████▏    | 64/124 [09:27<08:43,  8.72s/it]

                                                           

Analizyng with ReccoBeats:  52%|█████▏    | 65/124 [09:41<10:21, 10.53s/it]

                                                         

Analizyng with ReccoBeats:  53%|█████▎    | 66/124 [09:55<11:10, 11.55s/it]

                                                           

Analizyng with ReccoBeats:  54%|█████▍    | 67/124 [10:09<11:38, 12.26s/it]

                                                           

Analizyng with ReccoBeats:  55%|█████▍    | 68/124 [10:23<11:48, 12.65s/it]

                                                           

Analizyng with ReccoBeats:  56%|█████▋    | 70/124 [10:43<09:38, 10.71s/it]

                                                           

Analizyng with ReccoBeats:  58%|█████▊    | 72/124 [10:59<07:35,  8.77s/it]

                                                           

Analizyng with ReccoBeats:  59%|█████▉    | 73/124 [11:12<08:38, 10.17s/it]

                                                           

Analizyng with ReccoBeats:  60%|█████▉    | 74/124 [11:25<09:13, 11.07s/it]

                                                           

Analizyng with ReccoBeats:  60%|██████    | 75/124 [11:41<10:14, 12.53s/it]

                                                           

Analizyng with ReccoBeats:  61%|██████▏   | 76/124 [11:57<10:45, 13.45s/it]

                                                           

Analizyng with ReccoBeats:  63%|██████▎   | 78/124 [12:12<07:41, 10.03s/it]

                                                           

Analizyng with ReccoBeats:  65%|██████▍   | 80/124 [12:26<05:54,  8.07s/it]

                                                           

Analizyng with ReccoBeats:  66%|██████▌   | 82/124 [12:42<05:13,  7.45s/it]

                                                           

Analizyng with ReccoBeats:  68%|██████▊   | 84/124 [12:58<04:52,  7.32s/it]

                                                           

Analizyng with ReccoBeats:  69%|██████▊   | 85/124 [13:14<06:31, 10.03s/it]

                                                           

Analizyng with ReccoBeats:  69%|██████▉   | 86/124 [13:24<06:14,  9.85s/it]

                                                           

Analizyng with ReccoBeats:  70%|███████   | 87/124 [13:44<07:52, 12.78s/it]

                                                           

Analizyng with ReccoBeats:  71%|███████   | 88/124 [13:56<07:40, 12.80s/it]

                                                           

Analizyng with ReccoBeats:  72%|███████▏  | 89/124 [14:09<07:28, 12.80s/it]

                                                           

Analizyng with ReccoBeats:  73%|███████▎  | 90/124 [14:23<07:23, 13.03s/it]

                                                           

Analizyng with ReccoBeats:  73%|███████▎  | 91/124 [14:36<07:10, 13.05s/it]

                                                           

Analizyng with ReccoBeats:  74%|███████▍  | 92/124 [14:49<06:54, 12.95s/it]

                                                           

Analizyng with ReccoBeats:  76%|███████▌  | 94/124 [15:04<04:54,  9.81s/it]

                                                           

Analizyng with ReccoBeats:  77%|███████▋  | 96/124 [15:18<03:44,  8.02s/it]

                                                           

Analizyng with ReccoBeats:  78%|███████▊  | 97/124 [15:42<05:49, 12.95s/it]

                                                           

Analizyng with ReccoBeats:  79%|███████▉  | 98/124 [15:57<05:50, 13.48s/it]

                                                           

Analizyng with ReccoBeats:  81%|████████  | 100/124 [16:14<04:11, 10.50s/it]

                                                           

Analizyng with ReccoBeats:  81%|████████▏ | 101/124 [16:28<04:23, 11.45s/it]

                                                           

Analizyng with ReccoBeats:  82%|████████▏ | 102/124 [16:39<04:09, 11.34s/it]

                                                           

Analizyng with ReccoBeats:  83%|████████▎ | 103/124 [16:52<04:04, 11.64s/it]

                                                           

Analizyng with ReccoBeats:  84%|████████▍ | 104/124 [17:04<04:00, 12.02s/it]

                                                           

Analizyng with ReccoBeats:  85%|████████▌ | 106/124 [17:18<02:40,  8.91s/it]

                                                           

Analizyng with ReccoBeats:  86%|████████▋ | 107/124 [17:27<02:35,  9.16s/it]

                                                           

Analizyng with ReccoBeats:  87%|████████▋ | 108/124 [17:38<02:34,  9.63s/it]

                                                           

Analizyng with ReccoBeats:  88%|████████▊ | 109/124 [17:48<02:26,  9.80s/it]

                                                           

Analizyng with ReccoBeats:  89%|████████▊ | 110/124 [18:00<02:24, 10.32s/it]

                                                           

Analizyng with ReccoBeats:  90%|████████▉ | 111/124 [18:13<02:23, 11.02s/it]

                                                           

Analizyng with ReccoBeats:  90%|█████████ | 112/124 [18:22<02:06, 10.57s/it]

                                                           

Analizyng with ReccoBeats:  92%|█████████▏| 114/124 [18:36<01:23,  8.36s/it]

                                                           

Analizyng with ReccoBeats:  93%|█████████▎| 115/124 [18:46<01:19,  8.78s/it]

                                                           

Analizyng with ReccoBeats:  94%|█████████▎| 116/124 [18:59<01:21, 10.23s/it]

                                                           

Analizyng with ReccoBeats:  95%|█████████▌| 118/124 [19:14<00:49,  8.26s/it]

                                                           

Analizyng with ReccoBeats:  96%|█████████▌| 119/124 [19:24<00:44,  8.82s/it]

                                                           

Analizyng with ReccoBeats:  97%|█████████▋| 120/124 [19:34<00:37,  9.34s/it]

                                                           

Analizyng with ReccoBeats:  98%|█████████▊| 121/124 [19:46<00:30, 10.01s/it]

                                                           

Analizyng with ReccoBeats:  98%|█████████▊| 122/124 [19:57<00:20, 10.22s/it]

                                                           

Analizyng with ReccoBeats: 100%|██████████| 124/124 [20:11<00:00,  9.77s/it]


# 💾 Guardar resultados

In [None]:
# Guardamos como JSON
json_path = "../data/intermediate/reccobeats_features.json"
with open(json_path, "w", encoding="utf-8") as f:
    json.dump(results, f, indent=2, ensure_ascii=False)
logging.info(f"Results saved in JSON format: {json_path}")

# También como CSV (solo features en columnas si están disponibles)
df_results = pd.DataFrame(results)

# Expandimos los dicts de 'features' a columnas separadas
features_df = df_results.dropna(subset=["features"]).copy()
features_expanded = features_df["features"].apply(pd.Series)
features_combined = pd.concat([features_df[["nominee"]], features_expanded], axis=1)

csv_path = "../data/intermediate/reccobeats_features.csv"
features_combined.to_csv(csv_path, index=False)
logging.info(f"Results saved in CSV format: {csv_path}")
