In [11]:
import requests
import time
import json
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd

In [26]:
def lessons_completed_by_date(gh_un, gh_pw, df, path):
    '''
    Function to add track, lessons completed, last lesson completed on, and what lessons the student completed 
    by date
    
    Args:
        un (str): GitHub username
        pw (str): GitHub password
        df (dataframe): input dataframe; should have learn url for the student.  Track, lessons completed,
                        last lessons completed will be added to this dataframe.  Dataframe needs to have 
                        a column named `name` for student name and a column named `learn_url` for the 
                        url for their learn account 
        path (str): path to save the csvs for the lessons the students completed
        
    Returns:
        Nothing directly; adds to the intput dataframe (df) and saves csvs to the specified path
        
    '''
    driver = webdriver.Chrome()
    
    # initial url to visit, used to initiate the logon
    driver.get('http://learn.co/superguy200')

    # sign into github 

    # click sign up with github button
    sign_in_gh = driver.find_element_by_xpath('//*[@id="js--region-main"]/div/div[1]/div/div/div[5]/form/div/div[6]/a')
    sign_in_gh.click()

    # enter github username 
    un = driver.find_element_by_xpath('//*[@id="login_field"]')
    un.send_keys(gh_un)

    # enter github password
    pw = driver.find_element_by_xpath('//*[@id="password"]')
    pw.send_keys(gh_pw)

    # click submit
    sn = driver.find_element_by_xpath('//*[@id="login"]/form/div[2]/input[8]')
    sn.click()

    for index, row in df.iterrows():

        # visit url
        driver.get(row['learn_url'])

        # let load
        time.sleep(2)

        # grab html
        my_html = driver.page_source
        soup = BeautifulSoup(my_html, 'html.parser')

        # check to see if they are in bootcamp prep
        curriculum = soup.find('h4', 
                  class_ = 'heading heading--level-6 heading--color-green heading--font-size-larger heading--weight-bolder').text
        print(f'{row["name"]} - {curriculum}')
        if curriculum == 'Bootcamp Prep':
            df.loc[index, 'curriculum'] = curriculum
            continue
        else:
            # add curriculum
            df.loc[index, 'curriculum'] = curriculum

            # grab lessons completed 
            les_completed = soup.find('span', 
                                       class_ = 'heading heading--level-1 heading--color-green heading--font-size-largest heading--weight-lighter').text
            df.loc[index, 'lessons_completed'] = les_completed

            # grab last lesson completed 
            las_completed = soup.find('h4', 
                                      class_ = 'heading heading--level-1 heading--color-blue heading--weight-lighter').text
            df.loc[index, 'last_lesson'] = las_completed

            # click load more a bunch
            while True:
                try:
                    lm = driver.find_element_by_xpath("//*[contains(text(), 'Load more')]")
                    lm.click()
                    time.sleep(2)
                except:
                    break

            # grab html
            time.sleep(2)
            my_html = driver.page_source
            soup = BeautifulSoup(my_html, 'html.parser')

            all_labs = soup.findAll('div', 
                              class_ = 'module module--flush-wings util--padding-tl util--padding-bl')

            dates_list = []
            lessons_list = []

            for d in all_labs:
                date = d.find('div', 
                              class_ = 'heading heading--level-2 heading--color-grey-light').text
                lessons = d.find('ul',
                                      class_ = 'list list--spacing-large list--separators-grey-faintest')
                for l_lesson in lessons.findAll('div', class_ = 'media-block__content'):
                    dates_list.append(date)
                    lessons_list.append(l_lesson.find('a').text)

            df_ls = pd.DataFrame({'dates': dates_list,
                              'lesson': lessons_list})

            df_ls.to_csv(f'{path}{row["url"].split("/")[-1]}.csv', index = False)

## Example Run 

#### Step 1 - Grab GitHub username and password 

In [27]:
def get_keys(path):
    with open(path) as f:
        return json.load(f)

In [28]:
cred = get_keys('/Users/jeffreyherman/.secret/github.json')
git_un = cred['github_username']
git_pw = cred['github_password']

#### Step 2 - Grab DataFrame of Students with learn url 
Dataframe needs to have a column named `name` and a column named `learn_url` to work 

In [29]:
df = pd.read_csv('canceled_sp.csv')
df.head()

Unnamed: 0,name,url,program,cohort,pacing,past_cohorts,ed_coach_used,ed_coach_available,learn_url,curriculum,lessons_completed,last_lesson
0,adam audycki,/staff/students/superguy200,Program: Online Data Science Bootcamp,Cohort: online-ds-sp-000,Pacing: Self Paced,,1,1,http://learn.co/superguy200,Bootcamp Prep,0,
1,Alex Govea,/staff/students/govenfist,Program: Online Data Science Bootcamp,Cohort: online-ds-sp-000,Pacing: Self Paced,,0,0,http://learn.co/govenfist,Data Science Career v1.1,165,Introduction To Probability - Lab
2,Alex Zawatsky,/staff/students/azawatsky,Program: Online Data Science Bootcamp,Cohort: online-ds-sp-000,Pacing: Self Paced,,0,0,http://learn.co/azawatsky,Bootcamp Prep,0,
3,Ali Kafagy,/staff/students/kafagy,Program: Online Data Science Bootcamp,Cohort: online-ds-sp-000,Pacing: Self Paced,,1,1,http://learn.co/kafagy,Data Science Bootcamp Prep,31,Calculating Distance Lab
4,Allison C,/staff/students/alcampbell25,Program: Online Data Science Bootcamp,Cohort: online-ds-sp-000,Pacing: Self Paced,,0,0,http://learn.co/alcampbell25,Data Science Bootcamp Prep,61,Applying Gradient Descent Lab


#### Step 3 - Specify path to save csvs 

In [30]:
my_path = './lessons/canceled/'

#### Step 4 - Run the Function

In [31]:
lessons_completed_by_date(git_un, git_pw, df, my_path)

adam audycki - Bootcamp Prep
Alex Govea - Data Science Career v1.1
Alex Zawatsky - Bootcamp Prep
Ali Kafagy - Data Science Bootcamp Prep
Allison C - Data Science Bootcamp Prep
Anamaria Spulber - Bootcamp Prep
Carl Harrison - Data Science Bootcamp Prep
Dina Fomina-Yadlin - Bootcamp Prep
Emily McMillen - Bootcamp Prep
Esther Chan - Data Science Bootcamp Prep
Garrett Noble - Data Science Bootcamp Prep
hanan algurashy - Data Science Bootcamp Prep
ipbyrne - Bootcamp Prep
Jason - Bootcamp Prep
Jenna Bazner - Data Science Bootcamp Prep
Jeremy Kao - Data Science Bootcamp Prep
Johanna Kunin - Data Science Bootcamp Prep
Juan Lozada - Bootcamp Prep
Kai Luo - Data Science Career V2
Kelisabeth Domond - Data Science Bootcamp Prep
Kevin Nguyen - Bootcamp Prep
Kyle Lawrence - Data Science Bootcamp Prep
Luis Concepcion - Bootcamp Prep v2
Marie Liskaser - Bootcamp Prep
Mark Daniels - Bootcamp Prep
Matthew Mollinedo - Data Science Bootcamp Prep
mgn mnn - Bootcamp Prep
Natasha J McLeod - Bootcamp Prep
Ngh