Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

cleaned up uberj's code. created an explicit main function. added fea…

…ture to scrape soundcloud links from any page and download all at once
  • Loading branch information...
commit 5279dc1dbfa744b57abb19dfaeaff7aa8edcf8eb 1 parent d9cce62
Kevin Ngo authored

Showing 2 changed files with 73 additions and 41 deletions. Show diff stats Hide diff stats

  1. +7 2 README.md
  2. +66 39 soundcloud-dl
9 README.md
Source Rendered
... ... @@ -1,5 +1,10 @@
1 1 ## Soundcloud CLI tool
2 2
3   -A little command line tool for working with soundcloud.
  3 +A little command line tool for working with Soundcloud.
4 4
5   -USAGE: soundcloud.py -u [URL]
  5 +You can either pass in a direct URL to a Soundcloud song or you can pass in any
  6 +link that may possibly contain Soundcloud URLs and the script will scrape for
  7 +the links and download them all at once. If you pass in both options, a page
  8 +and a URL, it would download the URL and any URLs found within the page.
  9 +
  10 +USAGE: soundcloud.py -u [URL] -p [PAGE_WITH_URLs]
105 soundcloud-dl
@@ -3,11 +3,11 @@ from optparse import OptionParser
3 3 import cookielib
4 4 import urllib2
5 5 import random
  6 +import time
6 7 import sys
7 8 import re
8   -import time
9 9
10   -# Maximum number of download attempts
  10 +# max number of download attempts
11 11 max_retry = 3
12 12
13 13 # set up header values and openers
@@ -17,20 +17,17 @@ opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj), urllib2.HTTPHandl
17 17 urllib2.install_opener(opener)
18 18
19 19 def open_url(url):
20   - print "Fetching URL html..."
  20 + """ fetches html from given url """
  21 + print "fetching html..."
21 22 try:
22   - request = urllib2.Request(options.url, headers=header_values)
  23 + request = urllib2.Request(url, headers=header_values)
23 24 response = opener.open(request)
24 25 except urllib2.HTTPError, e:
25   - time.sleep(tries)
  26 + time.sleep(1)
26 27 except ValueError, e:
27 28 print str(e)
28 29 return None
29   - print "Retrieved html"
30 30 html = response.read()
31   - if not html:
32   - print "failed to fetch url"
33   - return None
34 31 return html
35 32
36 33 def get_stream_token_uid(page):
@@ -42,14 +39,30 @@ def get_stream_token_uid(page):
42 39 return (uid, stream_token)
43 40
44 41 def get_song_title(page):
45   - alpha = "abcdefghijklmnopqrstuvwxyz"
  42 + """ scrapes song title from soundcloud link """
46 43 match = re.search('(?<=\"title\":\").*?(?=\")', page)
47 44 if match:
48 45 return match.group(0).replace(' ','_')
49 46 else:
  47 + alpha = "abcdefghijklmnopqrstuvwxyz"
50 48 random_length = 5
51 49 return ''.join(random.choice(alpha) for i in xrange(random_length))
52 50
  51 +def get_soundcloud_links(url):
  52 + """ given an url , scrape and return list of soundcloud links """
  53 + retry = 0
  54 + while True:
  55 + if retry == max_retry:
  56 + return None
  57 +
  58 + html = open_url(url)
  59 + if not html:
  60 + retry += 1
  61 + print "could not fetch html. (%s) " % (retry)
  62 + continue
  63 + break
  64 + return ['http://soundcloud.com' + url for url in re.findall('<h3><a href="(/.*?)">.*?</a></h3>', html)]
  65 +
53 66 def download(uid, token, song_title):
54 67 """ given url with token and uid, download file to mp3 """
55 68
@@ -58,48 +71,62 @@ def download(uid, token, song_title):
58 71 request = urllib2.Request(url, headers=header_values)
59 72 response = opener.open(request)
60 73
61   - f = open(song_title, 'w')
  74 + f = open(song_title + '.mp3', 'w')
62 75 f.write(response.read())
63 76
  77 +def main(**kwargs):
  78 + """ takes in an url or url to page to scrape soundcloud links """
  79 +
  80 + url = kwargs['url']
64 81
65   -if __name__ == '__main__':
  82 + retry = 0
  83 + while True:
66 84
67   - parser = OptionParser()
68   - parser.add_option("-u", "--url", help="soundcloud url to download", dest="url")
69   - (options, args) = parser.parse_args()
70   - if not options.url:
71   - parser.error("--url option requires an argument")
  85 + if retry == max_retry:
  86 + print "failed to download song"
  87 + sys.exit(1)
72 88
73   - abort = True
74   - tries = 0
75   - while tries < max_retry:
76 89 # open up initial page to get stream token, uid, song title
77   - html = open_url(options.url)
  90 + html = open_url(url)
78 91 if not html:
79   - tries += 1
80   - print "Could not retrieve initial html. (%s) " % (tries)
81   - continue # Try again
82   - # Sometimes html isn't html, it's some flash applet (or something binary).
83   - # In that case get_stream_token_uid returns None.
  92 + retry += 1
  93 + print "Could not retrieve initial html. (%s) " % (retry)
  94 + continue
  95 +
  96 + # get stream token returns none if html is random binary
84 97 info = get_stream_token_uid(html)
85 98 if not info:
86   - tries += 1
87   - print "Could not get stream token. (%s)" % (tries)
88   - continue # Try again
89   - (uid, token) = info
90   -
91   - song_title = get_song_title(html) + '.mp3'
92   - abort = False
93   - break #Break out, we have all the info we need.
94   -
95   - if abort:
96   - print "Error."
97   - sys.exit(1)
  99 + retry += 1
  100 + print "Could not get stream token. (%s)" % (retry)
  101 + continue
98 102
  103 + (uid, token) = info
  104 + song_title = get_song_title(html)
  105 + break
99 106
100 107 # the browser does this...so we will too
101 108 open_url('http://media.soundcloud.com/crossdomain.xml')
102 109
103 110 download(uid, token, song_title)
104   - print song_title+".mp3 has been downloaded."
  111 + print song_title + " successfully downloaded."
  112 +
  113 +if __name__ == '__main__':
  114 +
  115 + parser = OptionParser()
  116 + parser.add_option("-u", "--url", help="soundcloud url to download", dest="url")
  117 + parser.add_option("-p", "--page", help="downloads all soundcloud urls found in given page", dest="page_url")
  118 + (options, args) = parser.parse_args()
  119 +
  120 + urls = []
  121 + if options.page_url:
  122 + urls = get_soundcloud_links(options.page_url)
  123 + if options.url:
  124 + urls.append(options.url)
  125 + if not options.url and options.page_url:
  126 + print "USAGE: soundcloud.py [-u URL] [-p PAGE WITH URLS]"
  127 +
  128 + print "downloading: " + str(urls)
  129 + for url in urls:
  130 + main(**{'url':url})
  131 +
105 132

0 comments on commit 5279dc1

Please sign in to comment.
Something went wrong with that request. Please try again.