Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100755 130 lines (107 sloc) 4.311 kb
cc9af558 »
2011-11-05 renamed to soundcloud-dl, fixed shebang
1 #!/usr/bin/python
4a4b13de »
2011-11-04 initial commit, can't get cookies out
2 from optparse import OptionParser
89f85a0a »
2011-11-04 finish. now successfully downloads .mp3 files and saves it to file. n…
3 import cookielib
4 import urllib2
b26a904c »
2011-11-05 cleaned up the code, fixed typos, made sane default, and use random f…
5 import random
5279dc1d »
2011-11-06 cleaned up uberj's code. created an explicit main function. added fea…
6 import time
48dbfb37 »
2011-11-05 encapsulated soundcloud for usability. added some exception handling
7 import sys
4a4b13de »
2011-11-04 initial commit, can't get cookies out
8 import re
9
5279dc1d »
2011-11-06 cleaned up uberj's code. created an explicit main function. added fea…
10 # max number of download attempts
abe73398 » uberj
2011-11-06 Reworked how I implimented the error checking that was removed. There…
11 max_retry = 3
b26a904c »
2011-11-05 cleaned up the code, fixed typos, made sane default, and use random f…
12
48dbfb37 »
2011-11-05 encapsulated soundcloud for usability. added some exception handling
13 # set up header values and openers
14 header_values = {'User-Agent' : 'Mozilla/5.0 (X11; U; Linux i686 (x86_64); en-US; rv:1.9.2.16) Gecko/20110319 Firefox/3.6.16', 'Accept' : 'application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5', 'Accept-Charset' : 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', 'Accept-Encoding' : 'gzip,deflate,sdch', 'Accept-Language' : 'en-US,en;q=0.8', 'Cache-Control' : 'max-age=0', 'Connection' : 'keep-alive'}
15 cj = cookielib.MozillaCookieJar()
16 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj), urllib2.HTTPHandler())
17 urllib2.install_opener(opener)
18
19 def open_url(url):
5279dc1d »
2011-11-06 cleaned up uberj's code. created an explicit main function. added fea…
20 """ fetches html from given url """
21 print "fetching html..."
37370390 » uberj
2011-11-06 Removed retry attemps from open_url. It made more sense to have one c…
22 try:
5279dc1d »
2011-11-06 cleaned up uberj's code. created an explicit main function. added fea…
23 request = urllib2.Request(url, headers=header_values)
37370390 » uberj
2011-11-06 Removed retry attemps from open_url. It made more sense to have one c…
24 response = opener.open(request)
25 except urllib2.HTTPError, e:
5279dc1d »
2011-11-06 cleaned up uberj's code. created an explicit main function. added fea…
26 time.sleep(1)
37370390 » uberj
2011-11-06 Removed retry attemps from open_url. It made more sense to have one c…
27 except ValueError, e:
28 print str(e)
29 return None
47ec6ce1 » uberj
2011-11-06 Carpet bombing exception handling is bad. You don't know if you have …
30 html = response.read()
48dbfb37 »
2011-11-05 encapsulated soundcloud for usability. added some exception handling
31 return html
32
4a4b13de »
2011-11-04 initial commit, can't get cookies out
33 def get_stream_token_uid(page):
34 """ returns stream token and uid as tuple """
35 match = re.search('"uid":"([\w\d]+?)".*?stream_token=([\w\d]+)', page)
c325916e » uberj
2011-11-05 Changes:
36 if match:
4a4b13de »
2011-11-04 initial commit, can't get cookies out
37 uid = match.group(1)
38 stream_token = match.group(2)
39 return (uid, stream_token)
40
b26a904c »
2011-11-05 cleaned up the code, fixed typos, made sane default, and use random f…
41 def get_song_title(page):
5279dc1d »
2011-11-06 cleaned up uberj's code. created an explicit main function. added fea…
42 """ scrapes song title from soundcloud link """
48dbfb37 »
2011-11-05 encapsulated soundcloud for usability. added some exception handling
43 match = re.search('(?<=\"title\":\").*?(?=\")', page)
b26a904c »
2011-11-05 cleaned up the code, fixed typos, made sane default, and use random f…
44 if match:
45 return match.group(0).replace(' ','_')
aae03b47 » uberj
2011-11-05 Added random name generator. This will be usefull if you want random …
46 else:
5279dc1d »
2011-11-06 cleaned up uberj's code. created an explicit main function. added fea…
47 alpha = "abcdefghijklmnopqrstuvwxyz"
b26a904c »
2011-11-05 cleaned up the code, fixed typos, made sane default, and use random f…
48 random_length = 5
49 return ''.join(random.choice(alpha) for i in xrange(random_length))
48dbfb37 »
2011-11-05 encapsulated soundcloud for usability. added some exception handling
50
5279dc1d »
2011-11-06 cleaned up uberj's code. created an explicit main function. added fea…
51 def get_soundcloud_links(url):
52 """ given an url , scrape and return list of soundcloud links """
53 retry = 0
54 while True:
55 if retry == max_retry:
56 return None
57
58 html = open_url(url)
59 if not html:
60 retry += 1
61 print "could not fetch html. (%s) " % (retry)
62 continue
63 break
64 return ['http://soundcloud.com' + url for url in re.findall('<h3><a href="(/.*?)">.*?</a></h3>', html)]
65
b26a904c »
2011-11-05 cleaned up the code, fixed typos, made sane default, and use random f…
66 def download(uid, token, song_title):
89f85a0a »
2011-11-04 finish. now successfully downloads .mp3 files and saves it to file. n…
67 """ given url with token and uid, download file to mp3 """
68
c325916e » uberj
2011-11-05 Changes:
69 # compose a url with uid and token and request the mpeg
89f85a0a »
2011-11-04 finish. now successfully downloads .mp3 files and saves it to file. n…
70 url = "http://media.soundcloud.com/stream/%s?stream_token=%s" % (uid, token)
71 request = urllib2.Request(url, headers=header_values)
72 response = opener.open(request)
73
5279dc1d »
2011-11-06 cleaned up uberj's code. created an explicit main function. added fea…
74 f = open(song_title + '.mp3', 'w')
89f85a0a »
2011-11-04 finish. now successfully downloads .mp3 files and saves it to file. n…
75 f.write(response.read())
76
5279dc1d »
2011-11-06 cleaned up uberj's code. created an explicit main function. added fea…
77 def main(**kwargs):
78 """ takes in an url or url to page to scrape soundcloud links """
79
80 url = kwargs['url']
89f85a0a »
2011-11-04 finish. now successfully downloads .mp3 files and saves it to file. n…
81
5279dc1d »
2011-11-06 cleaned up uberj's code. created an explicit main function. added fea…
82 retry = 0
83 while True:
4a4b13de »
2011-11-04 initial commit, can't get cookies out
84
5279dc1d »
2011-11-06 cleaned up uberj's code. created an explicit main function. added fea…
85 if retry == max_retry:
86 print "failed to download song"
87 sys.exit(1)
c325916e » uberj
2011-11-05 Changes:
88
abe73398 » uberj
2011-11-06 Reworked how I implimented the error checking that was removed. There…
89 # open up initial page to get stream token, uid, song title
5279dc1d »
2011-11-06 cleaned up uberj's code. created an explicit main function. added fea…
90 html = open_url(url)
abe73398 » uberj
2011-11-06 Reworked how I implimented the error checking that was removed. There…
91 if not html:
5279dc1d »
2011-11-06 cleaned up uberj's code. created an explicit main function. added fea…
92 retry += 1
93 print "Could not retrieve initial html. (%s) " % (retry)
94 continue
95
96 # get stream token returns none if html is random binary
abe73398 » uberj
2011-11-06 Reworked how I implimented the error checking that was removed. There…
97 info = get_stream_token_uid(html)
98 if not info:
5279dc1d »
2011-11-06 cleaned up uberj's code. created an explicit main function. added fea…
99 retry += 1
100 print "Could not get stream token. (%s)" % (retry)
101 continue
b26a904c »
2011-11-05 cleaned up the code, fixed typos, made sane default, and use random f…
102
5279dc1d »
2011-11-06 cleaned up uberj's code. created an explicit main function. added fea…
103 (uid, token) = info
104 song_title = get_song_title(html)
105 break
4a4b13de »
2011-11-04 initial commit, can't get cookies out
106
89f85a0a »
2011-11-04 finish. now successfully downloads .mp3 files and saves it to file. n…
107 # the browser does this...so we will too
48dbfb37 »
2011-11-05 encapsulated soundcloud for usability. added some exception handling
108 open_url('http://media.soundcloud.com/crossdomain.xml')
4a4b13de »
2011-11-04 initial commit, can't get cookies out
109
48dbfb37 »
2011-11-05 encapsulated soundcloud for usability. added some exception handling
110 download(uid, token, song_title)
5279dc1d »
2011-11-06 cleaned up uberj's code. created an explicit main function. added fea…
111 print song_title + " successfully downloaded."
112
113 if __name__ == '__main__':
114
115 parser = OptionParser()
116 parser.add_option("-u", "--url", help="soundcloud url to download", dest="url")
117 parser.add_option("-p", "--page", help="downloads all soundcloud urls found in given page", dest="page_url")
118 (options, args) = parser.parse_args()
119
120 urls = []
121 if options.page_url:
122 urls = get_soundcloud_links(options.page_url)
123 if options.url:
124 urls.append(options.url)
125 if not options.url and options.page_url:
126 print "USAGE: soundcloud.py [-u URL] [-p PAGE WITH URLS]"
127
128 print "downloading: " + str(urls)
129 for url in urls:
130 main(**{'url':url})
131
4a4b13de »
2011-11-04 initial commit, can't get cookies out
132
Something went wrong with that request. Please try again.