Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Initial commit

  • Loading branch information...
commit f07a8b9fa31d9d8f3b1238c61e4d48d8a258ffbf 0 parents
@nonsleepr authored
Showing with 316 additions and 0 deletions.
  1. +22 −0 .gitattributes
  2. +163 −0 .gitignore
  3. +14 −0 README.md
  4. +2 −0  config.py
  5. +115 −0 edu_10gen.py
22 .gitattributes
@@ -0,0 +1,22 @@
+# Auto detect text files and perform LF normalization
+* text=auto
+
+# Custom for Visual Studio
+*.cs diff=csharp
+*.sln merge=union
+*.csproj merge=union
+*.vbproj merge=union
+*.fsproj merge=union
+*.dbproj merge=union
+
+# Standard to msysgit
+*.doc diff=astextplain
+*.DOC diff=astextplain
+*.docx diff=astextplain
+*.DOCX diff=astextplain
+*.dot diff=astextplain
+*.DOT diff=astextplain
+*.pdf diff=astextplain
+*.PDF diff=astextplain
+*.rtf diff=astextplain
+*.RTF diff=astextplain
163 .gitignore
@@ -0,0 +1,163 @@
+#################
+## Eclipse
+#################
+
+*.pydevproject
+.project
+.metadata
+bin/
+tmp/
+*.tmp
+*.bak
+*.swp
+*~.nib
+local.properties
+.classpath
+.settings/
+.loadpath
+
+# External tool builders
+.externalToolBuilders/
+
+# Locally stored "Eclipse launch configurations"
+*.launch
+
+# CDT-specific
+.cproject
+
+# PDT-specific
+.buildpath
+
+
+#################
+## Visual Studio
+#################
+
+## Ignore Visual Studio temporary files, build results, and
+## files generated by popular Visual Studio add-ons.
+
+# User-specific files
+*.suo
+*.user
+*.sln.docstates
+
+# Build results
+[Dd]ebug/
+[Rr]elease/
+*_i.c
+*_p.c
+*.ilk
+*.meta
+*.obj
+*.pch
+*.pdb
+*.pgc
+*.pgd
+*.rsp
+*.sbr
+*.tlb
+*.tli
+*.tlh
+*.tmp
+*.vspscc
+.builds
+*.dotCover
+
+## TODO: If you have NuGet Package Restore enabled, uncomment this
+#packages/
+
+# Visual C++ cache files
+ipch/
+*.aps
+*.ncb
+*.opensdf
+*.sdf
+
+# Visual Studio profiler
+*.psess
+*.vsp
+
+# ReSharper is a .NET coding add-in
+_ReSharper*
+
+# Installshield output folder
+[Ee]xpress
+
+# DocProject is a documentation generator add-in
+DocProject/buildhelp/
+DocProject/Help/*.HxT
+DocProject/Help/*.HxC
+DocProject/Help/*.hhc
+DocProject/Help/*.hhk
+DocProject/Help/*.hhp
+DocProject/Help/Html2
+DocProject/Help/html
+
+# Click-Once directory
+publish
+
+# Others
+[Bb]in
+[Oo]bj
+sql
+TestResults
+*.Cache
+ClientBin
+stylecop.*
+~$*
+*.dbmdl
+Generated_Code #added for RIA/Silverlight projects
+
+# Backup & report files from converting an old project file to a newer
+# Visual Studio version. Backup files are not needed, because we have git ;-)
+_UpgradeReport_Files/
+Backup*/
+UpgradeLog*.XML
+
+
+
+############
+## Windows
+############
+
+# Windows image file caches
+Thumbs.db
+
+# Folder config file
+Desktop.ini
+
+
+#############
+## Python
+#############
+
+*.py[co]
+
+# Packages
+*.egg
+*.egg-info
+dist
+build
+eggs
+parts
+bin
+var
+sdist
+develop-eggs
+.installed.cfg
+
+# Installer logs
+pip-log.txt
+
+# Unit test / coverage reports
+.coverage
+.tox
+
+#Translations
+*.mo
+
+#Mr Developer
+.mr.developer.cfg
+
+# Mac crap
+.DS_Store
14 README.md
@@ -0,0 +1,14 @@
+##Generate list of course videos from eudcation.10gen.com.
+
+File `config.py` should be populated with login/password.
+
+After completion, the script will create text file with youtube links, named after the course.
+This file could then be used to download videos with [youtube-dl](https://github.com/rg3/youtube-dl/).
+
+###Dependencies:
+* Python 2.7
+* Mechanize
+* BeautifulSoup4
+
+###Format:
+`python edu_10gen.py`
2  config.py
@@ -0,0 +1,2 @@
+EMAIL = 'test@test.com'
+PASSWORD = 'password'
115 edu_10gen.py
@@ -0,0 +1,115 @@
+import os
+import re
+import sys
+
+import json
+from datetime import date
+
+from random import random
+from math import floor
+from pprint import pprint
+
+from urllib import urlencode
+
+try:
+ from bs4 import BeautifulSoup
+ import mechanize
+except ImportError:
+ print ("Not all the nessesary libs are installed. " +
+ "Please see requirements.txt.")
+ sys.exit(1)
+
+try:
+ from config import EMAIL, PASSWORD
+except ImportError:
+ print "You should provide config.py file with EMAIL and PASSWORD."
+ sys.exit(1)
+
+try:
+ from config import TARGETDIR
+except ImportError:
+ TARGETDIR = ''
+
+site_url = 'https://education.10gen.com'
+login_url = '/login'
+dashboard_url = '/dashboard'
+youtube_url = 'http://www.youtube.com/watch?v='
+
+username_xpath = '/html/body/section/section[1]/section[1]/section/ul/li[2]/span[2]'
+
+COOKIEFILE = 'c:/Users/EB186011/cookie_10gen.txt'
+
+def makeCsrf():
+ t = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
+ e = 24
+ csrftoken = list()
+ for i in range(0,e):
+ csrftoken.append(t[int(floor(random()*len(t)))])
+ return ''.join(csrftoken)
+
+def csrfCookie(csrftoken):
+ return mechanize.Cookie(version=0,
+ name='csrftoken',
+ value=csrftoken,
+ port=None, port_specified=False,
+ domain='10gen.com',
+ domain_specified=False,
+ domain_initial_dot=False,
+ path='/', path_specified=True,
+ secure=False, expires=None,
+ discard=True,
+ comment=None, comment_url=None,
+ rest={'HttpOnly': None}, rfc2109=False)
+
+
+br = mechanize.Browser()
+cj = mechanize.LWPCookieJar()
+csrftoken = makeCsrf()
+cj.set_cookie(csrfCookie(csrftoken))
+br.set_handle_robots(False)
+br.set_cookiejar(cj)
+br.addheaders.append(('X-CSRFToken',csrftoken))
+br.addheaders.append(('Referer','https://education.10gen.com'))
+try:
+ login_resp = br.open(site_url + login_url, urlencode({'email':EMAIL, 'password':PASSWORD}))
+except mechanize.HTTPError, e:
+ print "Unexpected error:", e.code
+ exit()
+login_state = json.loads(login_resp.read())
+if not login_state.get('success'):
+ print login_state.get('value')
+ exit()
+
+dashboard = br.open(site_url + dashboard_url)
+dashboard_soup = BeautifulSoup(dashboard.read())
+username = dashboard_soup.find('section', 'user-info').findAll('span')[1].text
+print 'Logged as %s\n\n' % username
+
+my_courses = dashboard_soup.findAll('article', 'my-course')
+for my_course in my_courses:
+ course_url = my_course.a['href']
+ course_name = my_course.h3.text
+ f = open(course_name + '.txt', 'w')
+ print '%s' % course_name
+ courseware_url = re.sub(r'\/info$','/courseware',course_url)
+ courseware = br.open(site_url+courseware_url)
+ courseware_soup = BeautifulSoup(courseware.read())
+ chapters = courseware_soup.findAll('div','chapter')
+ for chapter in chapters:
+ chapter_title = chapter.find('h3').find('a').text
+ print '\t%s' % chapter_title
+ paragraphs = chapter.find('ul').findAll('li',' ')
+ for paragraph in paragraphs:
+ par_name = paragraph.p.text
+ par_url = paragraph.a['href']
+ par = br.open(site_url + par_url)
+ par_soup = BeautifulSoup(par.read())
+ content = par_soup.findAll('div','seq_contents')[0].text
+ content_soup = BeautifulSoup(content)
+ video_stream = content_soup.find('div','video')['data-streams']
+ video_id = video_stream.split(':')[1]
+ video_url = youtube_url + video_id
+ print '\t\t%s: %s' % (par_name, video_url)
+ f.writelines(video_url+'\n')
+ f.close()
+ print '\nYou can now downlaod lecture videos with the following command:\n youtube-dl -a "%s.txt" -A -t\n' % course_name
Please sign in to comment.
Something went wrong with that request. Please try again.