Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
  • Loading branch information
soraxas committed Oct 12, 2017
0 parents commit 7d789ce
Show file tree
Hide file tree
Showing 8 changed files with 681 additions and 0 deletions.
90 changes: 90 additions & 0 deletions EchoCourse.py
@@ -0,0 +1,90 @@
import json
import sys

from selenium import webdriver
from EchoVideos import EchoVideos


class EchoCourse(object):

def __init__(self, uuid, titles):
self._course_id = ""
self._uuid = uuid
self._titles = titles
self._videos = None
self._driver = None

self._hostname = "https://view.streaming.sydney.edu.au:8443"
self._url = "{}/ess/portal/section/{}".format(self._hostname, self._uuid)
self._video_url = "{}/ess/client/api/sections/{}/section-data.json?pageSize=100".format(self._hostname, self._uuid)

def get_videos(self):
if self._driver is None:
self._blow_up("webdriver not set yet!!!", "")
if not self._videos:
try:
course_data_json = self._get_course_data()
videos_json = course_data_json["section"]["presentations"]["pageContents"]
self._videos = EchoVideos(videos_json, self._titles, self._driver)
except KeyError as e:
self._blow_up("Unable to parse course videos from JSON (course_data)", e)

return self._videos

@property
def uuid(self):
return self._uuid

@property
def hostname(self):
return self._hostname

@property
def url(self):
return self._url

@property
def video_url(self):
return self._video_url

@property
def course_id(self):
if self._course_id == "":
try:
# driver = webdriver.PhantomJS() #TODO Redo this. Maybe use a singleton factory to request the lecho360 driver?s
self.driver.get(self._url) # Initialize to establish the 'anon' cookie that Echo360 sends.
self.driver.get(self._video_url)
course_data_json = self._get_course_data()

self._course_id = course_data_json["section"]["course"]["identifier"]
except KeyError as e:
self._blow_up("Unable to parse course id (e.g. CS473) from JSON (course_data)", e)

return self._course_id

@property
def driver(self):
if self._driver is None:
self._blow_up("webdriver not set yet!!!", "")
return self._driver

def _get_course_data(self):
try:
self.driver.get(self.video_url)
# self.driver.get_screenshot_as_file('./2.png')
# print(dir(self.driver))
# print('ha')
# print(self.driver.page_source)
json_str = self.driver.find_element_by_tag_name("pre").text

return json.loads(json_str)
except ValueError as e:
self._blow_up("Unable to retrieve JSON (course_data) from url", e)

def set_driver(self, driver):
self._driver = driver

def _blow_up(self, msg, e):
print(msg)
print("Exception: {}".format(str(e)))
sys.exit(1)
112 changes: 112 additions & 0 deletions EchoDownloader.py
@@ -0,0 +1,112 @@
import dateutil.parser
import os
import sys
import urllib.request, urllib.error, urllib.parse

from selenium import webdriver


class EchoDownloader(object):

def __init__(self, course, output_dir, date_range, username, password):
self._course = course
self._output_dir = output_dir
self._date_range = date_range

# self._useragent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36"

self._useragent = "Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25"
# self._driver = webdriver.PhantomJS()

from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
dcap = dict(DesiredCapabilities.PHANTOMJS)
dcap["phantomjs.page.settings.userAgent"] = (
"Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 "
"(KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25"
)
self._driver = webdriver.PhantomJS(desired_capabilities=dcap)


# Monkey Patch, set the course's driver to the one from downloader
self._course.set_driver(self._driver)

# Initialize to establish the 'anon' cookie that Echo360 sends.
print('Accessing {0}'.format(self._course.url))
self._driver.get(self._course.url)


# Input username and password:
user_name = self._driver.find_element_by_id('j_username')
user_name.clear()
user_name.send_keys(username)

user_passwd = self._driver.find_element_by_id('j_password')
user_passwd.clear()
user_passwd.send_keys(password)


login_btn = self._driver.find_element_by_id('login-btn')
login_btn.submit()

self._videos = []

def download_all(self):
videos = self._course.get_videos().videos
filtered_videos = [video for video in videos if self._in_date_range(video.date)]
total_videos = len(filtered_videos)

# Download the newest video first but maintain it's original index
# in case a JSON file isn't passed (and we need to label them as
# Lecture 1, 2, ...)
for i, video in reversed(list(enumerate(filtered_videos))):
# TODO Check if the lecture number is included in the JSON object.
lecture_number = self._find_pos(videos, video)
title = video.title if (video.title != "") else "Lecture {}".format(lecture_number+1)
filename = self._get_filename(self._course.course_id, video.date, title)

print(("Downloading {} of {}: {}".format(total_videos - i, total_videos, video.url)))
print((" to {}\n".format(filename)))
self._download_as(video.url, filename)

@property
def useragent(self):
return self._useragent

@useragent.setter
def useragent(self, useragent):
self._useragent = useragent

def _download_as(self, video, filename):
print(video)
print(filename)
exit()
try:
request = urllib.request.Request(video)
request.add_header('User-Agent', self._useragent)
opener = urllib.request.build_opener()

with open(os.path.join(self._output_dir, filename), "wb") as local_file:
local_file.write(opener.open(request).read())

except urllib.error.HTTPError as e:
print(("HTTP Error:", e.code, video))
except urllib.error.URLError as e:
print(("URL Error:", e.reason, video))

def _initialize(self, echo_course):
self._driver.get(self._course.url)

def _get_filename(self, course, date, title):
return "{} - {} - {}.m4v".format(course, date, title)

def _in_date_range(self, date_string):
the_date = dateutil.parser.parse(date_string).date()
return self._date_range[0] <= the_date and the_date <= self._date_range[1]


def _find_pos(self, videos, the_video):
for i, video in enumerate(videos):
if video.date == the_video.date:
return i

return -1
96 changes: 96 additions & 0 deletions EchoVideos.py
@@ -0,0 +1,96 @@
import dateutil.parser
import datetime
import operator
import sys

class EchoVideos(object):

def __init__(self, videos_json, titles, driver):
assert(videos_json is not None)

self._driver = driver
self._videos = []
for video_json in videos_json:
video_date = EchoVideo.get_date(video_json)
video_title = self._get_title(titles, video_date)
self._videos.append(EchoVideo(video_json, video_title, self._driver))

self._videos.sort(key=operator.attrgetter("date"))

@property
def videos(self):
return self._videos

def _get_title(self, titles, date):
if titles is None:
return ""
try:
for title in titles:
title_date = dateutil.parser.parse(title["date"]).date()
if date == title_date:
return title["title"].encode("ascii")
return ""

except KeyError as e:
blow_up("Unable to parse either titles or course_data JSON", e)

def _blow_up(self, str, e):
print(str)
print("Exception: {}".format(str(e)))
sys.exit(1)



class EchoVideo(object):

def __init__(self, video_json, title, driver):
self._title = title
self._driver = driver

try:
video_url = "{0}".format(video_json["richMedia"])
video_url = str(video_url) # cast back to string
# a = 'https://view.streaming.sydney.edu.au:8443/ess/echo/presentation/1a700a60-d42f-4e24-bd5d-d23d2d8dd134'
# print(video_url)
# print(a)
self._driver.get(video_url)
# self._driver.get_screenshot_as_file('./211.png')
# self._driver.get(a)
# self._driver.get_screenshot_as_file('./212.png')
# import time
# time.sleep(1)
# # self._driver.get_screenshot_as_file('./211.png')
# self._driver.get('http://getright.com/useragent.html')
# self._driver.get_screenshot_as_file('./2.png')
m3u8_url = self._driver.find_element_by_id('content-player').find_element_by_tag_name('video').get_attribute('src')

self._url = m3u8_url

date = dateutil.parser.parse(video_json["startTime"]).date()
self._date = date.strftime("%Y-%m-%d")
except KeyError as e:
self._blow_up("Unable to parse video data from JSON (course_data)", e)

@property
def title(self):
return self._title

@property
def date(self):
return self._date

@property
def url(self):
return self._url

@staticmethod
def get_date(video_json):
try:
return dateutil.parser.parse(video_json["startTime"]).date()
except KeyError as e:
self._blow_up("Unable to parse video date from JSON (video data)", e)

def _blow_up(self, str, e):
print(str)
print("Exception: {}".format(str(e)))
sys.exit(1)
21 changes: 21 additions & 0 deletions LICENSE
@@ -0,0 +1,21 @@
The MIT License (MIT)

Copyright (c) 2017 Oscar Lai

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

0 comments on commit 7d789ce

Please sign in to comment.