In [7]:
# make sure to run this on virtual env with python 3.9.21
! pip install spotdl pandas



In [None]:
import os
import json
import pandas as pd

def song_downloader(csv_file, audio_folder="audio", json_folder="song_features", csv_folder="song_csv"):
    # read csv
    pair_data = pd.read_csv(csv_file)
    pair_data = pair_data[pair_data['music_artist'].notna() & pair_data['music_title'].notna()]

    # use cleint-id and client-secret
    client_id = "12596a1457b64667a6928aadcd7cc535";
    client_secret = "dcce9a169c694afcbcb5c831c9e7fb47";

    # iterate every row -> [image, artist, song name]
    for index, row in pair_data.iterrows():
        # save artist and song name 
        artist = row["music_artist"].replace("'", "")
        song_name = row["music_title"].replace("'", "")

        # test message
        print(f"Processing song {index + 1}: {artist} - {song_name}")

        try:
            # download song if it's not already in folder
            expected_file = audio_folder + "/" + f"{artist} - {song_name}.wav"
            if not os.path.exists(expected_file):
                # spotdl requires a very unique format for the --output command shown below
                download_name = audio_folder + "/" + f"{artist} - {song_name}" + ".{output-ext}"
                os.system(f"spotdl download '{artist} - {song_name}' --format wav --output '{download_name}'")

        except Exception as e:
            print(f"Failed to process {artist} - {song_name}: {e}")

In [None]:
if __name__ == "__main__":
    csv_file = "all_instagram_data.csv"
    pair_data = pd.read_csv(csv_file)

    for index, row in pair_data.iterrows():
        song_downloader(csv_file)


In [1]:
import os, shlex, subprocess, pandas as pd
from concurrent.futures import ThreadPoolExecutor, as_completed

AUDIO_DIR   = "audio"
CSV_IN      = "all_instagram_data.csv"
MAX_JOBS    = 8           # try 4-10; raise/lower to taste
TIMEOUT_SEC = 600         # kill any single dl if >10 min

# ───────────────────────── helpers ──────────────────────────
def needs_download(artist, title):
    return not os.path.exists(os.path.join(AUDIO_DIR, f"{artist} - {title}.wav"))

def spotdl_cmd(artist, title):
    tpl = os.path.join(AUDIO_DIR, f"{artist} - {title}" + ".{output-ext}")
    # >>> this is *exactly* your original command <<<
    return f"spotdl download '{artist} - {title}' --format wav --output '{tpl}'"

def download_task(artist, title):
    if not needs_download(artist, title):
        return True, artist, title, "exists"

    cmd = spotdl_cmd(artist, title)
    try:
        res = subprocess.run(cmd, shell=True,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.STDOUT,
                             timeout=TIMEOUT_SEC,
                             text=True)
        return res.returncode == 0, artist, title, res.stdout[-200:]
    except subprocess.TimeoutExpired:
        return False, artist, title, "timeout"

# ─────────────────────── main downloader ────────────────────
def parallel_song_downloader(csv_file=CSV_IN, max_jobs=MAX_JOBS):
    df = pd.read_csv(csv_file)
    df = df[df.music_artist.notna() & df.music_title.notna()]
    df = df.drop_duplicates(subset=["music_artist", "music_title"])

    os.makedirs(AUDIO_DIR, exist_ok=True)

    with ThreadPoolExecutor(max_workers=max_jobs) as pool:
        futs = []
        for _, r in df.iterrows():
            artist = r["music_artist"].replace("'", "")
            title  = r["music_title"].replace("'", "")
            futs.append(pool.submit(download_task, artist, title))

        ok, fail = 0, []
        for f in as_completed(futs):
            success, artist, title, msg = f.result()
            if success:
                ok += 1
                print(f"✓ {artist} - {title}")
            elif msg == "exists":
                print(f"• {artist} - {title} (skipped, already downloaded)")
            else:
                fail.append((artist, title, msg.strip()))
                print(f"✗ {artist} - {title}  [{msg.strip()[:80]}]")

    print(f"\nFinished — downloaded={ok}  failed={len(fail)}")

if __name__ == "__main__":
    parallel_song_downloader()       # ← run!


✓ Selena Gomez, benny blanco - Sunset Blvd
✓ 동요천국 - 짱구는 / 말썽쟁이짱구 (From "짱구는못말려")
✓ Jungle - Ive Been In Love (feat. Channel Tres)
✓ JISOO - FLOWER
✓ Ed Sheeran - Perfect
✓ New West - Those Eyes
✓ JISOO - TEARS
✓ Yoon Jong Shin - Annie (2022 Live Version)
✓ JISOO - Your Love
✓ NewJeans - How Sweet
✓ JISOO - earthquake
✓ JISOO - All Eyes On Me
✓ Sabrina Carpenter - Bed Chem
✓ Selena Gomez, benny blanco, Gracie Abrams - Call Me When You Break Up
✓ The 1975 - Menswear
✓ JISOO - Hugs & Kisses
✓ Ariana Grande - needy
✓ Drake - SMALL TOWN FAME
✓ All Time Low - Kids In The Dark
✓ The Weeknd, Playboi Carti - Timeless
✓ Girls Generation - Gee
✓ The Kid LAROI - YOU
✓ The 1975 - Robbers
✓ Jang Hayden - RUN
✓ BLACKPINK - Shut Down
✓ TWICE - CHEER UP
✓ Stevie Wonder - Isnt She Lovely
✓ NewJeans - Cool With You
✓ Drake - Rich Baby Daddy (feat. Sexyy Red, SZA)
✓ Taeko Onuki - Tokai
✓ Grover Washington, Jr. - Just the Two of Us (feat. Bill Withers)
✓ The Marías - Cariño
✓ Suzy, BAEKHYUN - Dream
✓ Dream