Skip to content
Find file
Fetching contributors…
Cannot retrieve contributors at this time
executable file 139 lines (111 sloc) 4.42 KB
from optparse import OptionParser
import cookielib
import urllib2
import random
import time
import sys
import re
import os
# max number of download attempts
max_retry = 3
# set up header values and openers
header_values = {'User-Agent' : 'Mozilla/5.0 (X11; U; Linux i686 (x86_64); en-US; rv: Gecko/20110319 Firefox/3.6.16', 'Accept' : 'application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5', 'Accept-Charset' : 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', 'Accept-Encoding' : 'gzip,deflate,sdch', 'Accept-Language' : 'en-US,en;q=0.8', 'Cache-Control' : 'max-age=0', 'Connection' : 'keep-alive'}
cj = cookielib.MozillaCookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj), urllib2.HTTPHandler())
def open_url(url):
""" fetches html from given url """
print "fetching html..."
request = urllib2.Request(url, headers=header_values)
response =
except urllib2.HTTPError, e:
except ValueError, e:
print str(e)
return None
html =
return html
def get_stream_token_uid(page):
""" returns stream token and uid as tuple """
match ='"uid":"([\w\d]+?)".*?stream_token=([\w\d]+)', page)
if match:
uid =
stream_token =
return (uid, stream_token)
def get_song_title(page):
""" scrapes song title from soundcloud link """
match ='(?<=\"title\":\").*?(?=\")', page)
if match:
return' ','_')
alpha = "abcdefghijklmnopqrstuvwxyz"
random_length = 5
return ''.join(random.choice(alpha) for i in xrange(random_length))
def get_soundcloud_links(url):
""" given an url , scrape and return list of soundcloud links """
retry = 0
while True:
if retry == max_retry:
return None
html = open_url(url)
if not html:
retry += 1
print "could not fetch html. (%s) " % (retry)
return ['' + url for url in re.findall('<h3><a href="(/.*?)">.*?</a></h3>', html)]
def download(uid, token, song_title, directory):
""" given url with token and uid, download file to mp3 """
# compose a url with uid and token and request the mpeg
url = "" % (uid, token)
request = urllib2.Request(url, headers=header_values)
response =
f = open(song_title + '.mp3', 'w')
def main(**kwargs):
""" takes in an url or url to page to scrape soundcloud links """
url = kwargs['url']
retry = 0
while True:
if retry == max_retry:
print "failed to download song"
# open up initial page to get stream token, uid, song title
html = open_url(url)
if not html:
retry += 1
print "Could not retrieve initial html. (%s) " % (retry)
# get stream token returns none if html is random binary
info = get_stream_token_uid(html)
if not info:
retry += 1
print "Could not get stream token. (%s)" % (retry)
(uid, token) = info
song_title = get_song_title(html)
# the browser does we will too
download(uid, token, song_title)
print song_title + " successfully downloaded."
if __name__ == '__main__':
parser = OptionParser()
parser.add_option("-u", "--url", help="soundcloud url to download", dest="url")
parser.add_option("-p", "--page", help="downloads all soundcloud urls found in given page", dest="page_url")
parser.add_option("-d", "--dir", help="file path to where the mp3(s) should be saved.", dest="directory", default=os.getenv('PWD'))
(options, args) = parser.parse_args()
if not options.url and options.page_url:
print "USAGE: [-u URL] [-p PAGE WITH URLS]"
print options
urls = []
if options.page_url:
urls = get_soundcloud_links(options.page_url)
if options.url:
print "downloading: " + str(urls)
for url in urls:
main(**{'url':options.url, 'directory'})
Jump to Line
Something went wrong with that request. Please try again.