/
videos.py
126 lines (103 loc) · 4.19 KB
/
videos.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import dateutil.parser
import operator
import sys
import selenium
import logging
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException
_LOGGER = logging.getLogger(__name__)
def update_course_retrieval_progress(current, total):
prefix = '>> Retrieving echo360 Course Info... '
status = "{}/{} videos".format(current, total)
text = "\r{0} {1} ".format(prefix, status)
sys.stdout.write(text)
sys.stdout.flush()
class EchoVideos(object):
def __init__(self, videos_json, driver):
assert (videos_json is not None)
self._driver = driver
self._videos = []
total_videos_num = len(videos_json)
update_course_retrieval_progress(0, total_videos_num)
for i, video_json in enumerate(videos_json):
video_date = EchoVideo.get_date(video_json)
self._videos.append(EchoVideo(video_json, self._driver))
update_course_retrieval_progress(i + 1, total_videos_num)
self._videos.sort(key=operator.attrgetter("date"))
@property
def videos(self):
return self._videos
def _blow_up(self, str, e):
print(str)
print("Exception: {}".format(str(e)))
sys.exit(1)
class EchoVideo(object):
def __init__(self, video_json, driver):
self._driver = driver
try:
video_url = "{0}".format(video_json["richMedia"])
video_url = str(video_url) # cast back to string
self._driver.get(video_url)
_LOGGER.debug("Dumping video page at %s: %s",
video_url,
self._driver.page_source)
m3u8_url = self._loop_find_m3u8_url(video_url, waitsecond=30)
self._url = m3u8_url
date = dateutil.parser.parse(video_json["startTime"]).date()
self._date = date.strftime("%Y-%m-%d")
self._title = video_json['title']
except KeyError as e:
self._blow_up("Unable to parse video data from JSON (course_data)",
e)
def _loop_find_m3u8_url(self, video_url, waitsecond=15, max_attempts=5):
stale_attempt = 1
refresh_attempt = 1
while True:
self._driver.get(video_url)
try:
# wait for maximum second before timeout
WebDriverWait(self._driver, waitsecond).until(
EC.presence_of_element_located((By.ID, "content-player")))
return self._driver.find_element_by_id(
'content-player').find_element_by_tag_name(
'video').get_attribute('src')
except selenium.common.exceptions.TimeoutException:
if refresh_attempt >= max_attempts:
print(
'\r\nERROR: Connection timeouted after {} second for {} attempts... \
Possibly internet problem?'.format(
waitsecond, max_attempts))
raise
refresh_attempt += 1
except StaleElementReferenceException:
if stale_attempt >= max_attempts:
print(
'\r\nERROR: Elements are not stable to retrieve after {} attempts... \
Possibly internet problem?'.format(max_attempts))
raise
stale_attempt += 1
@property
def date(self):
return self._date
@property
def url(self):
return self._url
@property
def title(self):
if type(self._title) != str:
# it's type unicode for python2
return self._title.encode('utf-8')
return self._title
@staticmethod
def get_date(video_json):
try:
return dateutil.parser.parse(video_json["startTime"]).date()
except KeyError as e:
self._blow_up("Unable to parse video date from JSON (video data)",
e)
def _blow_up(self, str, e):
print(str)
print("Exception: {}".format(str(e)))
sys.exit(1)