# Udemy Grabber

In [113]:
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import pandas as pd
from random import randint
from time import sleep
import re
from datetime import datetime, timedelta

In [114]:
driver = webdriver.Firefox()

headers = requests.utils.default_headers()
headers['User-Agent'] = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'

## Gather Courses

In [115]:
udemy = []

### yofreesamples.com

In [116]:
freesamples_courses = set()
page = requests.get('https://yofreesamples.com/courses/free-discounted-udemy-courses-list/', headers=headers, timeout=10)
soup = BeautifulSoup(page.text, 'html.parser')
lis = soup.find("ul", {"class": "list"}).find_all('li')
freesamples_courses = freesamples_courses.union([li.find('a', {'class': 'course_title'})['href'] for li in lis])

In [117]:
print('Found {} courses.'.format(len(freesamples_courses)))

Found 84 courses.


### bez-postovneho.cz

In [118]:
bez = set()
bez_courses = set()
for i in range(1, 5):
    page = requests.get('https://bez-postovneho.cz/?page={}'.format(i), headers=headers, timeout=10)
    soup = BeautifulSoup(page.text, 'html.parser')
    links = soup.find_all(href=re.compile("free-udemy-course/"))
    bez = bez.union([link['href'] for link in links])

for course in bez:
    course = requests.get(course, headers=headers, timeout=10)
    soup = BeautifulSoup(course.text, 'html.parser')
    tag = soup.find(href=re.compile('udemy.com/course[\S]*couponCode'))
    if tag:
        bez_courses.add(tag['href'])

In [119]:
print('Found {} courses.'.format(len(bez_courses)))

Found 28 courses.


### reddit

In [120]:
reddit_courses = set()
regex = re.compile("http[s]*://www.udemy.com/course/[\w\d\-]{5,}/[\w?\d/]*couponCode=[\w]{3,}")

page = requests.get('https://www.reddit.com/r/Udemy/.json', headers=headers)
for article in page.json()['data']['children']:
    if datetime.utcfromtimestamp(article['data']['created_utc']) > (datetime.now() - timedelta(10)):
        text = article['data']['selftext']
        urls = re.findall(regex, text)
        if urls:
            reddit_courses = reddit_courses.union(urls)

In [121]:
print('Found {} courses.'.format(len(reddit_courses)))

Found 3 courses.


### couponscorpion

In [122]:
scorpion = set()
scorpion_courses = set()
for i in range(1, 10):
    page = requests.get('https://couponscorpion.com/page/{}/'.format(i), headers=headers, timeout=10)
    soup = BeautifulSoup(page.text, 'html.parser')
    articles = soup.find_all('article')
    scorpion = scorpion.union([course for course in articles if course.find('span', 'grid_onsale')])

for course in scorpion:
    if course.find('span', 'grid_onsale').text == '100% OFF':
        driver.get(course.find(href=True)['href'])
        sleep(4)
        link = driver.find_element_by_xpath("//span[@class='rh_button_wrapper']/a").get_attribute('href')
        driver.get(link)
        scorpion_courses.add(driver.current_url[:driver.current_url.find('&')])

In [123]:
print('Found {} courses.'.format(len(scorpion_courses)))

Found 38 courses.


### discudemy

In [124]:
discudemy = set()
discudemy_courses = set()

for i in range(1, 10):
    page = requests.get('https://www.discudemy.com/all/{}'.format(i), headers=headers, timeout=10)
    soup = BeautifulSoup(page.text, 'html.parser')
    for div in soup.find_all('div', 'content'):
        if div.find('div', 'header'):
            if div.find('div', 'meta').text.find('->') > 0:
                discudemy.add(div.find('a', href=True)['href'])

regex = re.compile("/[\w\d\-]+/")
for course in discudemy:
    page = requests.get(regex.sub('/go/', course), headers=headers, timeout=10)
    soup = BeautifulSoup(page.text, 'html.parser')
    discudemy_courses.add(soup.find('div', 'ui segment').find('a', href=True)['href'])

### Merge Courses

In [125]:
udemy = scorpion_courses | reddit_courses | bez_courses | freesamples_courses | discudemy_courses

In [126]:
print('Found {} courses.'.format(len(udemy)))

Found 147 courses.


## Analyse
Find courses on Udemy, verify that they are free but not permanently, and crawl metadata. Use `sleep` to prevent Udemy from blocking us.

In [127]:
def get_course_info(link):
    driver.get(link)

    # get title, price, topics, language, and rating
    try:
        price = driver.find_element_by_xpath("//div[@class='price-text']/span[1]").text[17:]
        if price == 'Kostenlos':
            topic = driver.find_elements_by_xpath("//a[@class='topic-menu__link']")
            topics = [t.text for t in topic]
            rate = driver.find_elements_by_xpath("//span[@class='tooltip-container']")
            rating = [r.text for r in rate if not r.text.strip() == ""]
            title = driver.find_element_by_xpath('//h1').text
            language = driver.find_element_by_xpath("//div[@class='clp-lead__locale']").text

            data = {'title': title,
                    'link': link,
                    'rating': float(rating[0][:3].replace(',', '.')),
                    'num_ratings': int(rating[0][5:rating[0].find('Bew')].replace('.', '')),
                    'language': language,
                   }
            tops = dict(zip(['topic_' + str(t) for t in list(range(len(topics)))], topics))
            return {**data, **tops}
    except:
        pass

In [128]:
df = pd.DataFrame(columns=['title', 'link', 'rating', 'topic_0', 'topic_1', 'topic_2'])
for link in set(udemy):
    df = df.append(get_course_info(link), ignore_index=True)
    sleep(randint(1,2))

## Find valuable Courses with high rating

In [129]:
valuable = df[(df.rating > 3) & (df.num_ratings > 8)].sort_values(by=['language', 'topic_0', 'topic_1'])

In [130]:
valuable

Unnamed: 0,title,link,rating,topic_0,topic_1,topic_2,language,num_ratings
23,Basic Bookkeeping Hacks,https://www.udemy.com/course/basic-bookkeeping...,3.5,Business,Finanzen,Buchhaltung,Englisch,56.0
26,Save on Your Taxes,https://www.udemy.com/course/save-on-your-taxe...,3.9,Business,Finanzen,Steuererklärung,Englisch,33.0
86,Forex Trading: The Complete Guide for Beginners,https://www.udemy.com/course/success-forex/?co...,3.9,Business,Finanzen,Devisenmarkt,Englisch,83.0
44,Vocal Presentation Skills From Hollywood Legends,https://www.udemy.com/course/vocal-presentatio...,3.9,Business,Kommunikation,Singen,Englisch,34.0
101,The Complete Udemy Instructor Mastermind [Unof...,https://www.udemy.com/course/the-complete-udem...,4.6,Business,Medien,Online-Kurs-Erstellung,Englisch,10.0
...,...,...,...,...,...,...,...,...
105,Learn Advanced jQuery,https://www.udemy.com/course/the-learn-jquery-...,3.7,Programmierung,Webentwicklung,jQuery,Englisch,93.0
106,Podcast in The Classroom - Kindergarten to Hig...,https://www.udemy.com/course/learn-to-podcast-...,5.0,Wissenschaft & Lehre,Lehrausbildung,Podcasts,Englisch,17.0
73,How To Create An Online Course That Sells,https://www.udemy.com/course/how-to-create-an-...,4.4,Wissenschaft & Lehre,Online-Unterricht,Online-Kurs-Erstellung,Englisch,160.0
17,Understanding the GMAT: Introduction Course,https://www.udemy.com/course/understanding-the...,3.7,Wissenschaft & Lehre,Prüfungsvorbereitung,GMAT,Englisch,194.0


In [131]:
path = datetime.now().strftime('%Y-%m-%d %H-%M')
valuable.to_csv('./udemy-courses/' + path + '.csv')

In [132]:
old_courses = pd.read_csv('./udemy-courses/2020-02-12 21-10.csv')
new_courses = [course.title for course in valuable.itertuples() if not course.title in list(old_courses.title)]

## Login to mydealz

In [56]:
driver.get('https://www.mydealz.de/user/social/provider/google')

NoSuchElementException: Message: Unable to locate element: input


## Edit Deal

In [57]:
languages = valuable.language.unique()

In [58]:
bold = driver.find_element_by_xpath("//button[@class='toolbar-btn toolbar-btn--bold space--mr-1']/span")
li = driver.find_element_by_xpath("//button[@class='toolbar-btn toolbar-btn--unorderedlist space--mr-1']/span")
link = driver.find_element_by_xpath("//button[@class='cept-linkBtn space--mr-1']/span")
editor = driver.find_element_by_xpath("//div[@class='redactor-box aGrid']/div")
editor.click()

In [59]:
def list_courses(courses):
    li.click()
    for course in courses.itertuples():
        if course.rating > 4.5:
            bold.click()
        if course.title in new_courses:
            editor.send_keys('*neu ')
        link.click()
        driver.find_element_by_id('linkPopover-href').send_keys(course.link)
        driver.find_element_by_id('linkPopover-content').send_keys(course.title)
        driver.find_element_by_id('linkPopover-content').send_keys(Keys.RETURN)
        if course.rating > 4.5:
            bold.click()
        stars = '★' * int(round(course.rating, 0)) + '☆' * (5 - int(round(course.rating, 0)))
        num_ratings = '({} Bewertungen)'.format(int(course.num_ratings))
        editor.send_keys(', ' + stars + ' ' + num_ratings)
    
        editor.send_keys(Keys.RETURN)
        sleep(0.5)
    editor.send_keys(Keys.RETURN)

In [60]:
for language in languages:
    if len(languages) > 1:
        bold.click()
        editor.send_keys(language)
        bold.click()
        editor.send_keys(Keys.ENTER)

    val_lang = valuable[valuable.language == language]
    main_topics = val_lang.topic_0.unique()
    for main_topic in main_topics:
        editor.send_keys(main_topic)
        editor.send_keys(Keys.RETURN)
        list_courses(val_lang[valuable.topic_0 == main_topic])
    editor.send_keys(Keys.RETURN)

  del sys.path[0]


In [62]:
for course in valuable.itertuples():
    print(course.link)

https://www.udemy.com/course/wordpressdeveloper/?couponCode=FREEAR
https://www.udemy.com/course/basic-bookkeeping/?couponCode=EAA7B1C697EC63217A3A
https://www.udemy.com/course/save-on-your-taxes/?couponCode=EA50BBCF78CB0A6ECE3F
https://www.udemy.com/course/success-forex/?couponCode=SUPERPROMO45F
https://www.udemy.com/course/vocal-presentation-skills-from-broadcasters-hollywood-legends/?couponCode=696FFF1A1E7CB3649B04
https://www.udemy.com/course/the-complete-udemy-instructor-mastermind-unofficial-course/?couponCode=061F7BF8E16B07D44537
https://www.udemy.com/course/scrum-case-studies/?couponCode=120220_FREE
https://www.udemy.com/course/eft-negotiation-success/?couponCode=065A2DA3BA771C112C6F
https://www.udemy.com/course/outsource-mastery/?couponCode=FEBFREEOUTSOURCE
https://www.udemy.com/course/aftereffectsmasterclass/?couponCode=FREELIMITED
https://www.udemy.com/course/ultra-speed-3d-animation-film-making-with-plotagon/?couponCode=1FA40E18F0F4470D1118
https://www.udemy.com/course/speed

## Subscribe to Courses
Login manually

In [133]:
driver.get('https://www.udemy.com')
driver.find_element_by_xpath("//button[@class='btn btn-quaternary']").click()

In [134]:
for link in valuable.link:
    driver.get(link)
    try:
        sleep(randint(4, 10))
        btn = driver.find_element_by_xpath("//button[@data-purpose='buy-this-course-button']")
        if btn.text == 'Jetzt einschreiben':
            btn.click()
            sleep(6)
        else:
            print(link, 'already subscribed.')
    except:
        print(link, 'already subscribed.')

https://www.udemy.com/course/basic-bookkeeping/?couponCode=EAA7B1C697EC63217A3A already subscribed.
https://www.udemy.com/course/save-on-your-taxes/?couponCode=EA50BBCF78CB0A6ECE3F already subscribed.
https://www.udemy.com/course/success-forex/?couponCode=SUPERPROMO45F already subscribed.
https://www.udemy.com/course/vocal-presentation-skills-from-broadcasters-hollywood-legends/?couponCode=696FFF1A1E7CB3649B04 already subscribed.
https://www.udemy.com/course/the-complete-udemy-instructor-mastermind-unofficial-course/?couponCode=061F7BF8E16B07D44537 already subscribed.
https://www.udemy.com/course/scrum-case-studies/?couponCode=120220_FREE already subscribed.
https://www.udemy.com/course/eft-negotiation-success/?couponCode=065A2DA3BA771C112C6F already subscribed.
https://www.udemy.com/course/outsource-mastery/?couponCode=FEBFREEOUTSOURCE already subscribed.
https://www.udemy.com/course/aftereffectsmasterclass/?couponCode=FREELIMITED already subscribed.
https://www.udemy.com/course/ultra