# Download from Soundcloud using yt-dlp

## Setup the Notebook and the Dependencies

In [1]:
import yt_dlp
import os
from dotenv import load_dotenv
os.environ['PATH'] = r'C:\ffmpeg\bin;' + os.environ['PATH']

load_dotenv()
TOKEN = os.getenv('SC_TOKEN')
SONGLINK = "https://soundcloud.com/katyperry/last-friday-night-t-g-i-f"

## Download a track as .m4a using a token to access Soundcloud Go songs

In [3]:
SONGLINK = "https://soundcloud.com/katyperry/last-friday-night-t-g-i-f"


ydl_opts = {
    'format': 'bestaudio/best',
    'outtmpl': '%(title)s.%(ext)s',  # Output filename template
    'username': 'oauth',
    'password': TOKEN,
}

with yt_dlp.YoutubeDL(ydl_opts) as ydl:
    ydl.download([SONGLINK])

[soundcloud] Verifying login token...
[soundcloud] Logging in
[soundcloud] Extracting URL: https://soundcloud.com/katyperry/last-friday-night-t-g-i-f
[soundcloud] katyperry/last-friday-night-t-g-i-f: Downloading info JSON
[soundcloud] 256250254: Downloading hls_aac format info JSON
[soundcloud] 256250254: Downloading hls_mp3 format info JSON
[soundcloud] 256250254: Downloading hls_opus format info JSON
[info] 256250254: Downloading 1 format(s): hls_aac_1_0
[hlsnative] Downloading m3u8 manifest
[hlsnative] Total fragments: 26
[download] Destination: Last Friday Night (T.G.I.F.).m4a
[download] 100% of    7.16MiB in 00:00:10 at 721.14KiB/s                
[FixupM4a] Correcting container of "Last Friday Night (T.G.I.F.).m4a"


## Get all the metadata to a song

In [4]:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
    info = ydl.extract_info(SONGLINK, download=False)
    
# Display all metadata
print("Available metadata:")
for key, value in info.items():
    print(f"{key}: {value}")


[soundcloud] Extracting URL: https://soundcloud.com/katyperry/last-friday-night-t-g-i-f
[soundcloud] katyperry/last-friday-night-t-g-i-f: Downloading info JSON
[soundcloud] 256250254: Downloading hls_aac format info JSON
[soundcloud] 256250254: Downloading hls_mp3 format info JSON
[soundcloud] 256250254: Downloading hls_opus format info JSON
Available metadata:
id: 256250254
uploader: KatyPerry
uploader_id: 8117274
uploader_url: https://soundcloud.com/katyperry
timestamp: 1459350413
title: Last Friday Night (T.G.I.F.)
track: Last Friday Night (T.G.I.F.)
description: None
thumbnails: [{'id': 'mini', 'url': 'https://i1.sndcdn.com/artworks-bUxAruvIrcBv-0-mini.jpg', 'width': 16, 'height': 16, 'resolution': '16x16'}, {'id': 'tiny', 'url': 'https://i1.sndcdn.com/artworks-bUxAruvIrcBv-0-tiny.jpg', 'width': 20, 'height': 20, 'resolution': '20x20'}, {'id': 'small', 'url': 'https://i1.sndcdn.com/artworks-bUxAruvIrcBv-0-small.jpg', 'width': 32, 'height': 32, 'resolution': '32x32'}, {'id': 'badge'

## Download as .mp3 with Thumbnail

Download the track, convert it into mp3. Get the hightest resolution thumbnail and embed it into the mp3 file. 

In [5]:
ydl_opts = {
    'format': 'bestaudio/best',
    'outtmpl': '%(artist)s - %(title)s.%(ext)s',
    'username': 'oauth',
    'password': TOKEN,
    'postprocessors': [
        {
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '320',
        },
        {
            'key': 'FFmpegMetadata',
            'add_metadata': True,
        },
        {
            'key': 'EmbedThumbnail',
            'already_have_thumbnail': False,
        },
    ],
    'writethumbnail': True,  # Download thumbnail
    'embedthumbnail': True,  # Embed it in the audio file
}

with yt_dlp.YoutubeDL(ydl_opts) as ydl:
    ydl.download([SONGLINK])

[soundcloud] Extracting URL: https://soundcloud.com/katyperry/last-friday-night-t-g-i-f
[soundcloud] katyperry/last-friday-night-t-g-i-f: Downloading info JSON
[soundcloud] 256250254: Downloading hls_aac format info JSON
[soundcloud] 256250254: Downloading hls_mp3 format info JSON
[soundcloud] 256250254: Downloading hls_opus format info JSON
[info] 256250254: Downloading 1 format(s): hls_aac_1_0
[info] Downloading video thumbnail original ...
[info] Writing video thumbnail original to: Katy Perry - Last Friday Night (T.G.I.F.).jpg
[hlsnative] Downloading m3u8 manifest
[hlsnative] Total fragments: 26
[download] Destination: Katy Perry - Last Friday Night (T.G.I.F.).m4a
[download] 100% of    7.16MiB in 00:00:09 at 801.11KiB/s                
[FixupM4a] Correcting container of "Katy Perry - Last Friday Night (T.G.I.F.).m4a"
[ExtractAudio] Destination: Katy Perry - Last Friday Night (T.G.I.F.).mp3
Deleting original file Katy Perry - Last Friday Night (T.G.I.F.).m4a (pass -k to keep)
[Metad

# Add the songlink as a comment
When downloading a song, also embed the songlink into the mp3 as the comment tag

In [None]:
ydl_opts = {
    'format': 'bestaudio/best',
    'outtmpl': '%(artist)s - %(title)s.%(ext)s',
    'username': 'oauth',
    'password': TOKEN,
    'postprocessors': [
        {
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '320',
        },
        {
            'key': 'FFmpegMetadata',
            'add_metadata': True,
        },
        {
            'key': 'EmbedThumbnail',
        },
    ],
    'writethumbnail': True,
    'embedthumbnail': True,
}

with yt_dlp.YoutubeDL(ydl_opts) as ydl:
    ydl.download([SONGLINK])

    info = ydl.extract_info(SONGLINK, download=False)
    mp3_file = f"{info['artist']} - {info['title']}.mp3"

In [None]:
from mutagen.id3 import ID3, COMM
# Add comment with the song URL to the downloaded file
audio = ID3(mp3_file)
audio.add(COMM(text=[SONGLINK]))
audio.save()

# Download a Playlist

In [None]:
from mutagen.id3 import ID3, COMM
PLAYLIST_LINK = "https://soundcloud.com/user/set/your-playlist-link-here"

ydl_opts_playlist = {
    'format': 'bestaudio/best',
    'outtmpl': '%(artist)s - %(title)s.%(ext)s',
    'username': 'oauth',
    'password': TOKEN,
    'postprocessors': [
        {
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '320',
        },
        {
            'key': 'FFmpegMetadata',
            'add_metadata': True,
        },
        {
            'key': 'EmbedThumbnail',
        },
    ],
    'writethumbnail': True,
    'embedthumbnail': True,
}

with yt_dlp.YoutubeDL(ydl_opts_playlist) as ydl:
    playlist_info = ydl.extract_info(PLAYLIST_LINK, download=True)
    
    # Add song URL as comment to each downloaded track
    for entry in playlist_info['entries']:
        mp3_file = f"{entry['artist']} - {entry['title']}.mp3"
        song_url = entry['webpage_url']
        
        audio = ID3(mp3_file)
        audio.delall('TALB')  # Remove album tag, would otherwise be set to playlist title
        audio.add(COMM(text=[song_url]))
        audio.save()
        
print(f"Downloaded {len(playlist_info['entries'])} tracks from playlist")

[soundcloud:set] Extracting URL: https://soundcloud.com/user-251038582/sets/tims-rap-hiphop-rnb-crate
[soundcloud:set] user-251038582/sets/tims-rap-hiphop-rnb-crate: Downloading JSON metadata
[download] Downloading playlist: Tim's Rap / HipHop / RnB Crate
[info] There are no playlist thumbnails to download
[soundcloud:set] Playlist Tim's Rap / HipHop / RnB Crate: Downloading 2 items of 2
[download] Downloading item 1 of 2
[soundcloud] Extracting URL: https://soundcloud.com/centralcee-music/doja
[soundcloud] centralcee-music/doja: Downloading info JSON
[soundcloud] 1304903974: Downloading hls_aac format info JSON
[soundcloud] 1304903974: Downloading http_aac format info JSON
[soundcloud] 1304903974: Downloading hls_mp3 format info JSON
[soundcloud] 1304903974: Downloading http_mp3 format info JSON
[soundcloud] 1304903974: Downloading hls_opus format info JSON
[info] 1304903974: Downloading 1 format(s): http_aac_1_0
[info] Downloading video thumbnail original ...
[info] Writing video thu

# Check for native download

In [3]:
SONGLINK = "https://soundcloud.com/fawafawa/liverpool-street-in-the-rain-speed-garage-edit"

ydl_opts_native = {
    'format': 'original/best',  # Prioritize original/native download over streams
    'outtmpl': '%(artist)s - %(title)s.%(ext)s',
    'username': 'oauth',
    'password': TOKEN,
}

with yt_dlp.YoutubeDL(ydl_opts_native) as ydl:
    info = ydl.extract_info(SONGLINK, download=False)
    
    # Check if original download is available
    if info.get('download_url'):
        print(f"Native download available: {info.get('ext')} format")
    else:
        print("No native download available, will use stream")
    
    ydl.download([SONGLINK])

[soundcloud] Verifying login token...
[soundcloud] Logging in
[soundcloud] Extracting URL: https://soundcloud.com/fawafawa/liverpool-street-in-the-rain-speed-garage-edit
[soundcloud] fawafawa/liverpool-street-in-the-rain-speed-garage-edit: Downloading info JSON
[soundcloud] 2216701607: Downloading original download format info JSON
[soundcloud] 2216701607: Checking original download format availability
[soundcloud] 2216701607: Downloading hls_aac format info JSON
[soundcloud] 2216701607: Downloading http_aac format info JSON
[soundcloud] 2216701607: Downloading hls_aac format info JSON
[soundcloud] 2216701607: Downloading hls_mp3 format info JSON
[soundcloud] 2216701607: Downloading http_mp3 format info JSON
[soundcloud] 2216701607: Downloading hls_opus format info JSON
No native download available, will use stream
[soundcloud] Extracting URL: https://soundcloud.com/fawafawa/liverpool-street-in-the-rain-speed-garage-edit
[soundcloud] fawafawa/liverpool-street-in-the-rain-speed-garage-e

# Buy / External Download available?



In [None]:
SONGLINK = "https://soundcloud.com/taziaus/way-2-sexy-tazi-bootleg"

# Check using yt-dlp (will fail to detect)
with yt_dlp.YoutubeDL({'username': 'oauth', 'password': TOKEN}) as ydl:
    info = ydl.extract_info(SONGLINK, download=False)
    
    # Check if external purchase/download link is available
    has_external_link_ytdlp = bool(info.get('purchase_url'))
    
    print("=== Using yt-dlp ===")
    print(f"External buy/download link available: {has_external_link_ytdlp}")
    if has_external_link_ytdlp:
        print(f"Link: {info.get('purchase_url')}")
    else:
        print("yt-dlp does NOT extract purchase_url for SoundCloud")

[soundcloud] Extracting URL: https://soundcloud.com/taziaus/way-2-sexy-tazi-bootleg
[soundcloud] taziaus/way-2-sexy-tazi-bootleg: Downloading info JSON
[soundcloud] 1216736581: Downloading hls_aac format info JSON
[soundcloud] 1216736581: Downloading hls_aac format info JSON
[soundcloud] 1216736581: Downloading http_aac format info JSON
[soundcloud] 1216736581: Downloading hls_aac format info JSON
[soundcloud] 1216736581: Downloading hls_mp3 format info JSON
[soundcloud] 1216736581: Downloading http_mp3 format info JSON
[soundcloud] 1216736581: Downloading hls_opus format info JSON
External buy/download link available: False


In [5]:
# Verify all available metadata fields
print("All available metadata fields:")
for key in sorted(info.keys()):
    print(f"  {key}")

# Check specifically for purchase/download related fields
purchase_fields = [k for k in info.keys() if 'purchase' in k.lower() or 'buy' in k.lower() or 'external' in k.lower()]
print(f"\nPurchase/Buy/External related fields: {purchase_fields if purchase_fields else 'None found'}")
print(f"\npurchase_url value: {info.get('purchase_url')}")

All available metadata fields:
  _has_drm
  abr
  acodec
  artists
  aspect_ratio
  audio_ext
  comment_count
  container
  description
  display_id
  duration
  duration_string
  epoch
  ext
  extractor
  extractor_key
  filesize_approx
  format
  format_id
  format_note
  formats
  fulltitle
  genres
  http_headers
  id
  license
  like_count
  original_url
  playlist
  playlist_index
  preference
  protocol
  quality
  release_year
  repost_count
  requested_subtitles
  resolution
  tags
  tbr
  thumbnail
  thumbnails
  timestamp
  title
  track
  upload_date
  uploader
  uploader_id
  uploader_url
  url
  vbr
  vcodec
  video_ext
  view_count
  webpage_url
  webpage_url_basename
  webpage_url_domain

Purchase/Buy/External related fields: None found

purchase_url value: None


## Web Scraping Solution for External Download Links

**Note:** yt-dlp does NOT extract `purchase_url` for SoundCloud tracks, even when they have "Free Download" buttons. These external links (to Hypeddit, Toneden, etc.) are embedded in the webpage HTML but not exposed through SoundCloud's API.

We need to scrape the SoundCloud page HTML directly to find the `purchaseLink_container` div.

In [6]:
import requests
from bs4 import BeautifulSoup
import re

def check_soundcloud_external_download(track_url):
    """
    Check if a SoundCloud track has an external download link (Free Download button).
    
    Args:
        track_url: SoundCloud track URL
        
    Returns:
        dict with 'has_external_link' (bool) and 'external_link' (str or None)
    """
    try:
        # Fetch the SoundCloud page HTML
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
        }
        response = requests.get(track_url, headers=headers, timeout=10)
        response.raise_for_status()
        
        # Parse HTML
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Look for the embedded JSON data in script tags
        # SoundCloud embeds track data in window.__sc_hydration
        for script in soup.find_all('script'):
            if script.string and 'window.__sc_hydration' in script.string:
                # Search for purchase_url in the JSON data
                match = re.search(r'["\']purchase_url["\']\s*:\s*["\']([^"\']+)', script.string)
                if match:
                    external_url = match.group(1)
                    # Decode unicode escapes if present
                    external_url = external_url.encode().decode('unicode_escape')
                    return {
                        'has_external_link': True,
                        'external_link': external_url
                    }
        
        return {'has_external_link': False, 'external_link': None}
        
    except Exception as e:
        print(f"Error checking external link: {e}")
        return {'has_external_link': False, 'external_link': None, 'error': str(e)}

In [8]:
# Test with the track that has a Free Download button
test_url = "https://soundcloud.com/taziaus/way-2-sexy-tazi-bootleg"

result = check_soundcloud_external_download(test_url)

print(f"Has external download link: {result['has_external_link']}")
if result['has_external_link']:
    print(f"External link: {result['external_link']}")
else:
    print("No external download link found")
    if 'error' in result:
        print(f"Error: {result['error']}")

Has external download link: True
External link: https://hypeddit.com/drakeftfutureyoungthug/way2sexytazibootleg


In [4]:
# Debug: Let's examine the HTML to understand the structure
test_url = "https://soundcloud.com/taziaus/way-2-sexy-tazi-bootleg"

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
}
response = requests.get(test_url, headers=headers, timeout=10)
soup = BeautifulSoup(response.text, 'html.parser')

# Search for any element containing "purchase" or "download"
print("=== Searching for purchase/download related elements ===\n")

# Look for any div with purchase in class name
purchase_divs = soup.find_all('div', class_=lambda x: x and 'purchase' in x.lower())
print(f"Divs with 'purchase' in class: {len(purchase_divs)}")
for div in purchase_divs[:3]:
    print(f"  Class: {div.get('class')}")
    print(f"  Content preview: {str(div)[:200]}...\n")

# Look for links with "download" text
download_links = soup.find_all('a', string=lambda x: x and 'download' in x.lower())
print(f"\nLinks with 'download' in text: {len(download_links)}")
for link in download_links[:3]:
    print(f"  Text: {link.get_text(strip=True)}")
    print(f"  Href: {link.get('href')}")
    print(f"  Classes: {link.get('class')}\n")

=== Searching for purchase/download related elements ===

Divs with 'purchase' in class: 0

Links with 'download' in text: 0


In [5]:
# Look for JSON data embedded in script tags
import json
import re

print("=== Searching for embedded JSON data ===\n")

# Find all script tags
scripts = soup.find_all('script')
print(f"Total script tags found: {len(scripts)}\n")

# Look for hydration data or initial state
for i, script in enumerate(scripts):
    script_content = script.string
    if script_content and ('purchase' in script_content.lower() or 'buy_link' in script_content.lower() or 'external_url' in script_content.lower()):
        print(f"Script {i} contains purchase/buy/external_url keywords")
        print(f"Preview: {script_content[:500]}...\n")
        
# Try to find the main data payload
for script in scripts:
    if script.string and 'window.__sc_hydration' in script.string:
        print("Found window.__sc_hydration!")
        # Try to extract and search for purchase_url
        match = re.search(r'purchase_url["\']?\s*:\s*["\']([^"\']+)', script.string)
        if match:
            print(f"Found purchase_url: {match.group(1)}")
        else:
            print("No purchase_url found in hydration data")
        break

=== Searching for embedded JSON data ===

Total script tags found: 21

Script 13 contains purchase/buy/external_url keywords
Preview: window.__sc_hydration = [{"hydratable":"anonymousId","data":"681385-842411-467880-135226"},{"hydratable":"features","data":{"features":["v2_dsa_report_content_links","mobi_webauth_oauth_mode","mobi_use_auth_internal_analytics","v2_use_onetrust_tcfv2_us_ca","mobi_enable_onetrust_tcfv2","mobi_tracking_send_session_id","mobi_use_onetrust_eu1","mobi_use_onetrust_gb","mobi_use_onetrust_tcfv2_us_ca","mobi_dsa_report_content_form","v2_use_onetrust_user_id_eu2","v2_enable_sourcepoint_tcfv2","mobi_use_on...

Found window.__sc_hydration!
Found purchase_url: https://hypeddit.com/drakeftfutureyoungthug/way2sexytazibootleg


In [9]:
# Test with multiple tracks to verify the function
test_tracks = [
    ("https://soundcloud.com/taziaus/way-2-sexy-tazi-bootleg", "Track with Free Download"),
    ("https://soundcloud.com/katyperry/last-friday-night-t-g-i-f", "Track without Free Download"),
]

print("=== Testing External Download Detection ===\n")
for track_url, description in test_tracks:
    print(f"{description}:")
    print(f"  URL: {track_url}")
    
    result = check_soundcloud_external_download(track_url)
    
    if result['has_external_link']:
        print(f"  ✓ External download link found: {result['external_link']}")
    else:
        print(f"  ✗ No external download link")
    print()

=== Testing External Download Detection ===

Track with Free Download:
  URL: https://soundcloud.com/taziaus/way-2-sexy-tazi-bootleg
  ✓ External download link found: https://hypeddit.com/drakeftfutureyoungthug/way2sexytazibootleg

Track without Free Download:
  URL: https://soundcloud.com/katyperry/last-friday-night-t-g-i-f
  ✗ No external download link



## Summary: yt-dlp vs Web Scraping

**yt-dlp limitations:**
- `purchase_url` field is **NOT** extracted by yt-dlp's SoundCloud extractor
- Returns `None` even when tracks have "Free Download" buttons
- yt-dlp only uses SoundCloud's API, which doesn't expose external download links

**Web scraping solution:**
- Successfully extracts `purchase_url` from the embedded JSON in `window.__sc_hydration`
- Works for tracks with external download links (Hypeddit, Toneden, etc.)
- Returns `None` for tracks without external links (as expected)

**Recommendation:** Use the `check_soundcloud_external_download()` function above when you need to detect external download links on SoundCloud tracks.