soundcloud-dl

#!/usr/bin/python
from optparse import OptionParser
import cookielib
import urllib2
import random
import sys
import re
import time

# Maximum number of download attempts
max_retry = 3

# set up header values and openers
header_values =  {'User-Agent' : 'Mozilla/5.0 (X11; U; Linux i686 (x86_64); en-US; rv:1.9.2.16) Gecko/20110319 Firefox/3.6.16', 'Accept' : 'application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5', 'Accept-Charset' : 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', 'Accept-Encoding' : 'gzip,deflate,sdch', 'Accept-Language' : 'en-US,en;q=0.8', 'Cache-Control' : 'max-age=0', 'Connection' : 'keep-alive'}
cj = cookielib.MozillaCookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj), urllib2.HTTPHandler())
urllib2.install_opener(opener)

def open_url(url):
    print "Fetching URL html..."
    try:
        request = urllib2.Request(options.url, headers=header_values)
        response = opener.open(request)
    except urllib2.HTTPError, e:
        time.sleep(tries)
    except ValueError, e:
        print str(e)
        return None
    print "Retrieved html"
    html = response.read()
    if not html:
        print "failed to fetch url"
        return None
    return html

def get_stream_token_uid(page):
    """ returns stream token and uid as tuple """
    match = re.search('"uid":"([\w\d]+?)".*?stream_token=([\w\d]+)', page)
    if match:
        uid = match.group(1)
        stream_token = match.group(2)
        return (uid, stream_token)

def get_song_title(page):
    match = re.search('(?<=\"title\":\").*?(?=\")', page)
    if match:
        return match.group(0).replace(' ','_')
    else:
        random_length = 5
        return ''.join(random.choice(alpha) for i in xrange(random_length))

def download(uid, token, song_title):
    """ given url with token and uid, download file to mp3 """

    # compose a url with uid and token and request the mpeg
    url = "http://media.soundcloud.com/stream/%s?stream_token=%s" % (uid, token)
    request = urllib2.Request(url, headers=header_values)
    response = opener.open(request)

    f = open(song_title, 'w')
    f.write(response.read())


if __name__ == '__main__':

    parser = OptionParser()
    parser.add_option("-u", "--url", help="soundcloud url to download", dest="url")
    (options, args) = parser.parse_args()
    if not options.url:
        parser.error("--url option requires an argument")

    abort = True
    tries = 0
    while tries < max_retry:
        # open up initial page to get stream token, uid, song title
        html = open_url(options.url)
        if not html:
            tries += 1
            print "Could not retrieve initial html. (%s) " % (tries)
            continue # Try again
        # Sometimes html isn't html, it's some flash applet (or something binary).
        # In that case get_stream_token_uid returns None.
        info = get_stream_token_uid(html)
        if not info:
            tries += 1
            print "Could not get stream token. (%s)" % (tries)
            continue # Try again
        (uid, token) = info

        song_title = get_song_title(html) + '.mp3'
        abort = False
        break #Break out, we have all the info we need.

    if abort:
        print "Error."
        sys.exit(1)


    # the browser does this...so we will too
    open_url('http://media.soundcloud.com/crossdomain.xml')

    download(uid, token, song_title)
    print song_title+".mp3 has been downloaded."