Browse files

Now site can be set in config so any EdX powered site can be downloaded

  • Loading branch information...
1 parent 08659e1 commit 4d6c31cd4c36726d56e7a0513ae4fe1d3f7729e2 @nettoyeur nettoyeur committed Dec 28, 2012
Showing with 26 additions and 5 deletions.
  1. +8 −2 config.py
  2. +18 −3 edu_10gen.py
View
10 config.py
@@ -1,2 +1,8 @@
-EMAIL = 'test@test.com'
-PASSWORD = 'password'
+# edx.org config
+EMAIL = 'your-email@he.re'
+PASSWORD='password'
+DOMAIN='www.edx.org'
+
+
+#common things
+SITE_URL = 'https://' + DOMAIN
View
21 edu_10gen.py
@@ -29,12 +29,17 @@
print "You should provide config.py file with EMAIL and PASSWORD."
sys.exit(1)
+try:
+ from config import SITE_URL, DOMAIN
+except ImportError:
+ print "You should provide config.py file with SITE_URL and DOMAIN."
+ sys.exit(1)
+
if len(sys.argv) == 2:
DIRECTORY = sys.argv[1].strip('"') + '/'
else:
DIRECTORY = ''
-SITE_URL = 'https://education.10gen.com'
login_url = '/login'
dashboard_url = '/dashboard'
youtube_url = 'http://www.youtube.com/watch?v='
@@ -52,7 +57,7 @@ def csrfCookie(csrftoken):
name='csrftoken',
value=csrftoken,
port=None, port_specified=False,
- domain='10gen.com',
+ domain=DOMAIN,
domain_specified=False,
domain_initial_dot=False,
path='/', path_specified=True,
@@ -75,6 +80,7 @@ def __init__(self):
with open(YDL_PARAMS_FILE) as fydl:
self._fd = FileDownloader(json.load(fydl))
self._fd.add_info_extractor(YoutubeIE())
+
def login(self, email, password):
try:
login_resp = self._br.open(SITE_URL + login_url, urlencode({'email':email, 'password':password}))
@@ -85,6 +91,7 @@ def login(self, email, password):
return self._logged_in
except mechanize.HTTPError, e:
sys.exit('Can\'t sign in')
+
def list_courses(self):
self.courses = []
if self._logged_in:
@@ -99,6 +106,7 @@ def list_courses(self):
course_name = my_course.h3.text
self.courses.append({'name':course_name, 'url':courseware_url})
print '[%02i] %s' % (i, course_name)
+
def list_chapters(self, course_i):
self.paragraphs = []
if course_i <= len(self.courses) and course_i >= 0:
@@ -120,19 +128,25 @@ def list_chapters(self, course_i):
par_url = paragraph.a['href']
self.paragraphs.append((course_name, i, j, chapter_name, par_name, par_url))
print '\t[%02i.%02i] %s' % (i, j, par_name)
+
def download(self):
+ print "\n-----------------------\nStart downloading\n-----------------------\n"
for (course_name, i, j, chapter_name, par_name, url) in self.paragraphs:
nametmpl = sanitize_filename(course_name) + '/' \
+ sanitize_filename(chapter_name) + '/' \
+ '%02i.%02i.*' % (i,j)
fn = glob.glob(DIRECTORY + nametmpl)
+
if fn:
+ print "Processing of %s skipped" % nametmpl
continue
+ print "Processing %s..." % nametmpl
par = self._br.open(SITE_URL + url)
par_soup = BeautifulSoup(par.read())
contents = par_soup.findAll('div','seq_contents')
k = 0
for content in contents:
+ #print "Content: %s" % content
content_soup = BeautifulSoup(content.text)
try:
video_type = content_soup.h2.text.strip()
@@ -148,7 +162,8 @@ def download(self):
+ sanitize_filename('%s (%s)' % (par_name, video_type)) + '.%(ext)s'
self._fd.params['outtmpl'] = outtmpl
self._fd.download([video_url])
- except:
+ except Exception as e:
+ #print "Error: %s" % e
pass

0 comments on commit 4d6c31c

Please sign in to comment.