### Handle Imports

In [51]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver import ChromeOptions

### Setup helper data classes

In [52]:
class Lesson:
    def __init__(self, title, link, video_link):
        self.title = title
        self.link = link
        self.video_link = video_link

### Set data directory

In [53]:
data_dir = "/home/hardik/Projects/Python/magoosh-offline/chrome_data"
options = ChromeOptions()
options.add_argument("--user-data-dir=" + data_dir)

### Launch driver instance

In [56]:
driver = webdriver.Chrome(options=options)

### Open Magoosh Lessons Page

In [57]:
driver.get("https://gre.magoosh.com/lessons")

### Get Lesson List with Links

In [58]:
all_links = driver.find_elements(by=By.CLASS_NAME, value="lesson-item")

print("Found " + str(len(all_links)) + " elements")

Found 10 elements


### Loop over links and print their title and link and create Lesson list

In [59]:
lesson_list = []

In [60]:
for link in all_links:
    a_element = link.find_element(by=By.TAG_NAME, value="a")
    temp = Lesson(a_element.text, a_element.get_attribute("href"), "")
    lesson_list.append(temp)

print("Generated list of " + str(len(lesson_list)) + " lessons.")

Generated list of 10 lessons.


### Loop over lessons to get their video link

In [61]:
for index in range(len(lesson_list)):
    lesson = lesson_list[index]
    driver.get(lesson.link)
    player_element = driver.find_element(by=By.CLASS_NAME, value="vjs-tech")
    lesson_list[index].video_link = player_element.get_attribute("src")

### Print list of lessons to check

In [62]:
for lesson in lesson_list:
    print(lesson.title)
    print(lesson.video_link)
    print("---")

General Introduction 6:03
https://d296n67kxwq0ge.cloudfront.net/audio_video/dede92e8b7f0336a021237d31cd27e25eee639b7-video-959/web_webm.webm
---
What's on the GRE? 5:05
https://d296n67kxwq0ge.cloudfront.net/audio_video/b1691cd3f2ace57111faeb05d8b99372f28af5c7-video-885/web_webm.webm
---
Math Section Breakdown 3:59
https://d296n67kxwq0ge.cloudfront.net/audio_video/06ed0107e7f1a75b92ce53d5d638c44670ba80a1-video-889/web_webm.webm
---
Verbal Section Breakdown 4:23
https://d296n67kxwq0ge.cloudfront.net/audio_video/10f76691edd3e0eb88671da08ff42ff36022facf-video-891/web_webm.webm
---
Scoring Range 2:06
https://d296n67kxwq0ge.cloudfront.net/audio_video/dfdd123629daba510a051aaec30198d6216dc201-video-888/web_webm.webm
---
Computer Adaptive Testing 2:53
https://d296n67kxwq0ge.cloudfront.net/audio_video/3b682dcae09c1b0c03c5166b13ceab46306c5f39-video-890/web_webm.webm
---
Skipping Questions and Pacing 8:19
https://d296n67kxwq0ge.cloudfront.net/audio_video/811e768cbc21cbda734476036716400ec1fdf9f0-vi

### Backing up lesson list

In [63]:
bkp_lessons = lesson_list

### Sanitize file names

In [68]:
for index in range(len(bkp_lessons)):
    lesson = bkp_lessons[index]
    sanitized_name = lesson.title[0:lesson.title.rindex(' ')]
    sanitized_name = str(index + 1) + "_" + sanitized_name.replace(" ", "_")
    bkp_lessons[index].title = sanitized_name