Skip to content
This repository
Browse code

Initial commit

  • Loading branch information...
commit f07a8b9fa31d9d8f3b1238c61e4d48d8a258ffbf 0 parents
Alexander Bessonov authored October 24, 2012
22  .gitattributes
... ...
@@ -0,0 +1,22 @@
  1
+# Auto detect text files and perform LF normalization
  2
+* text=auto
  3
+
  4
+# Custom for Visual Studio
  5
+*.cs     diff=csharp
  6
+*.sln    merge=union
  7
+*.csproj merge=union
  8
+*.vbproj merge=union
  9
+*.fsproj merge=union
  10
+*.dbproj merge=union
  11
+
  12
+# Standard to msysgit
  13
+*.doc	 diff=astextplain
  14
+*.DOC	 diff=astextplain
  15
+*.docx diff=astextplain
  16
+*.DOCX diff=astextplain
  17
+*.dot  diff=astextplain
  18
+*.DOT  diff=astextplain
  19
+*.pdf  diff=astextplain
  20
+*.PDF	 diff=astextplain
  21
+*.rtf	 diff=astextplain
  22
+*.RTF	 diff=astextplain
163  .gitignore
... ...
@@ -0,0 +1,163 @@
  1
+#################
  2
+## Eclipse
  3
+#################
  4
+
  5
+*.pydevproject
  6
+.project
  7
+.metadata
  8
+bin/
  9
+tmp/
  10
+*.tmp
  11
+*.bak
  12
+*.swp
  13
+*~.nib
  14
+local.properties
  15
+.classpath
  16
+.settings/
  17
+.loadpath
  18
+
  19
+# External tool builders
  20
+.externalToolBuilders/
  21
+
  22
+# Locally stored "Eclipse launch configurations"
  23
+*.launch
  24
+
  25
+# CDT-specific
  26
+.cproject
  27
+
  28
+# PDT-specific
  29
+.buildpath
  30
+
  31
+
  32
+#################
  33
+## Visual Studio
  34
+#################
  35
+
  36
+## Ignore Visual Studio temporary files, build results, and
  37
+## files generated by popular Visual Studio add-ons.
  38
+
  39
+# User-specific files
  40
+*.suo
  41
+*.user
  42
+*.sln.docstates
  43
+
  44
+# Build results
  45
+[Dd]ebug/
  46
+[Rr]elease/
  47
+*_i.c
  48
+*_p.c
  49
+*.ilk
  50
+*.meta
  51
+*.obj
  52
+*.pch
  53
+*.pdb
  54
+*.pgc
  55
+*.pgd
  56
+*.rsp
  57
+*.sbr
  58
+*.tlb
  59
+*.tli
  60
+*.tlh
  61
+*.tmp
  62
+*.vspscc
  63
+.builds
  64
+*.dotCover
  65
+
  66
+## TODO: If you have NuGet Package Restore enabled, uncomment this
  67
+#packages/
  68
+
  69
+# Visual C++ cache files
  70
+ipch/
  71
+*.aps
  72
+*.ncb
  73
+*.opensdf
  74
+*.sdf
  75
+
  76
+# Visual Studio profiler
  77
+*.psess
  78
+*.vsp
  79
+
  80
+# ReSharper is a .NET coding add-in
  81
+_ReSharper*
  82
+
  83
+# Installshield output folder
  84
+[Ee]xpress
  85
+
  86
+# DocProject is a documentation generator add-in
  87
+DocProject/buildhelp/
  88
+DocProject/Help/*.HxT
  89
+DocProject/Help/*.HxC
  90
+DocProject/Help/*.hhc
  91
+DocProject/Help/*.hhk
  92
+DocProject/Help/*.hhp
  93
+DocProject/Help/Html2
  94
+DocProject/Help/html
  95
+
  96
+# Click-Once directory
  97
+publish
  98
+
  99
+# Others
  100
+[Bb]in
  101
+[Oo]bj
  102
+sql
  103
+TestResults
  104
+*.Cache
  105
+ClientBin
  106
+stylecop.*
  107
+~$*
  108
+*.dbmdl
  109
+Generated_Code #added for RIA/Silverlight projects
  110
+
  111
+# Backup & report files from converting an old project file to a newer
  112
+# Visual Studio version. Backup files are not needed, because we have git ;-)
  113
+_UpgradeReport_Files/
  114
+Backup*/
  115
+UpgradeLog*.XML
  116
+
  117
+
  118
+
  119
+############
  120
+## Windows
  121
+############
  122
+
  123
+# Windows image file caches
  124
+Thumbs.db
  125
+
  126
+# Folder config file
  127
+Desktop.ini
  128
+
  129
+
  130
+#############
  131
+## Python
  132
+#############
  133
+
  134
+*.py[co]
  135
+
  136
+# Packages
  137
+*.egg
  138
+*.egg-info
  139
+dist
  140
+build
  141
+eggs
  142
+parts
  143
+bin
  144
+var
  145
+sdist
  146
+develop-eggs
  147
+.installed.cfg
  148
+
  149
+# Installer logs
  150
+pip-log.txt
  151
+
  152
+# Unit test / coverage reports
  153
+.coverage
  154
+.tox
  155
+
  156
+#Translations
  157
+*.mo
  158
+
  159
+#Mr Developer
  160
+.mr.developer.cfg
  161
+
  162
+# Mac crap
  163
+.DS_Store
14  README.md
Source Rendered
... ...
@@ -0,0 +1,14 @@
  1
+##Generate list of course videos from eudcation.10gen.com.
  2
+
  3
+File `config.py` should be populated with login/password.
  4
+
  5
+After completion, the script will create text file with youtube links, named after the course.
  6
+This file could then be used to download videos with [youtube-dl](https://github.com/rg3/youtube-dl/).
  7
+
  8
+###Dependencies:
  9
+* Python 2.7
  10
+* Mechanize
  11
+* BeautifulSoup4
  12
+
  13
+###Format:
  14
+`python edu_10gen.py`
2  config.py
... ...
@@ -0,0 +1,2 @@
  1
+EMAIL = 'test@test.com'
  2
+PASSWORD = 'password'
115  edu_10gen.py
... ...
@@ -0,0 +1,115 @@
  1
+import os
  2
+import re
  3
+import sys
  4
+
  5
+import json
  6
+from datetime import date
  7
+
  8
+from random import random
  9
+from math import floor
  10
+from pprint import pprint
  11
+
  12
+from urllib import urlencode
  13
+
  14
+try:
  15
+    from bs4 import BeautifulSoup
  16
+    import mechanize
  17
+except ImportError:
  18
+    print ("Not all the nessesary libs are installed. " +
  19
+           "Please see requirements.txt.")
  20
+    sys.exit(1)
  21
+
  22
+try:
  23
+    from config import EMAIL, PASSWORD
  24
+except ImportError:
  25
+    print "You should provide config.py file with EMAIL and PASSWORD."
  26
+    sys.exit(1)
  27
+
  28
+try:
  29
+    from config import TARGETDIR
  30
+except ImportError:
  31
+    TARGETDIR = ''
  32
+
  33
+site_url = 'https://education.10gen.com'
  34
+login_url = '/login'
  35
+dashboard_url = '/dashboard'
  36
+youtube_url = 'http://www.youtube.com/watch?v='
  37
+
  38
+username_xpath = '/html/body/section/section[1]/section[1]/section/ul/li[2]/span[2]'
  39
+
  40
+COOKIEFILE = 'c:/Users/EB186011/cookie_10gen.txt'
  41
+
  42
+def makeCsrf():
  43
+    t = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
  44
+    e = 24
  45
+    csrftoken = list()
  46
+    for i in range(0,e):
  47
+        csrftoken.append(t[int(floor(random()*len(t)))])
  48
+    return ''.join(csrftoken)
  49
+
  50
+def csrfCookie(csrftoken):
  51
+    return mechanize.Cookie(version=0,
  52
+            name='csrftoken',
  53
+            value=csrftoken,
  54
+            port=None, port_specified=False,
  55
+            domain='10gen.com',
  56
+            domain_specified=False,
  57
+            domain_initial_dot=False,
  58
+            path='/', path_specified=True,
  59
+            secure=False, expires=None,
  60
+            discard=True,
  61
+            comment=None, comment_url=None,
  62
+            rest={'HttpOnly': None}, rfc2109=False)
  63
+
  64
+
  65
+br = mechanize.Browser()
  66
+cj = mechanize.LWPCookieJar()
  67
+csrftoken = makeCsrf()
  68
+cj.set_cookie(csrfCookie(csrftoken))
  69
+br.set_handle_robots(False)
  70
+br.set_cookiejar(cj)
  71
+br.addheaders.append(('X-CSRFToken',csrftoken))
  72
+br.addheaders.append(('Referer','https://education.10gen.com'))
  73
+try:
  74
+    login_resp = br.open(site_url + login_url, urlencode({'email':EMAIL, 'password':PASSWORD}))
  75
+except mechanize.HTTPError, e:
  76
+    print "Unexpected error:", e.code
  77
+    exit()
  78
+login_state = json.loads(login_resp.read())
  79
+if not login_state.get('success'):
  80
+    print login_state.get('value')
  81
+    exit()
  82
+
  83
+dashboard = br.open(site_url + dashboard_url)
  84
+dashboard_soup = BeautifulSoup(dashboard.read())
  85
+username = dashboard_soup.find('section', 'user-info').findAll('span')[1].text
  86
+print 'Logged as %s\n\n' % username
  87
+
  88
+my_courses = dashboard_soup.findAll('article', 'my-course')
  89
+for my_course in my_courses:
  90
+    course_url = my_course.a['href']
  91
+    course_name = my_course.h3.text
  92
+    f = open(course_name + '.txt', 'w')
  93
+    print '%s' % course_name
  94
+    courseware_url = re.sub(r'\/info$','/courseware',course_url)
  95
+    courseware = br.open(site_url+courseware_url)
  96
+    courseware_soup = BeautifulSoup(courseware.read())
  97
+    chapters = courseware_soup.findAll('div','chapter')
  98
+    for chapter in chapters:
  99
+        chapter_title = chapter.find('h3').find('a').text
  100
+        print '\t%s' % chapter_title
  101
+        paragraphs = chapter.find('ul').findAll('li',' ')
  102
+        for paragraph in paragraphs:
  103
+            par_name = paragraph.p.text
  104
+            par_url = paragraph.a['href']
  105
+            par = br.open(site_url + par_url)
  106
+            par_soup = BeautifulSoup(par.read())
  107
+            content = par_soup.findAll('div','seq_contents')[0].text
  108
+            content_soup = BeautifulSoup(content)
  109
+            video_stream = content_soup.find('div','video')['data-streams']
  110
+            video_id = video_stream.split(':')[1]
  111
+            video_url = youtube_url + video_id
  112
+            print '\t\t%s: %s' % (par_name, video_url)
  113
+            f.writelines(video_url+'\n')
  114
+    f.close()
  115
+    print '\nYou can now downlaod lecture videos with the following command:\n    youtube-dl -a "%s.txt" -A -t\n' % course_name

0 notes on commit f07a8b9

Please sign in to comment.
Something went wrong with that request. Please try again.