In [1]:
import requests
import time
import json
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd

In [157]:
def project_dates_grades(gh_un, gh_pw, df1):
    '''
    Function to add date of completion on project and the grade the student received 
    
    Args:
        un (str): GitHub username
        pw (str): GitHub password
        df1 (dataframe): input dataframe; should have learn url for the student.  Date of completed projects
                        and grade received will be added to the dataframe.  Dataframe should have a column
                        named `url` that corresponds to the url for the students learn account
                        
    Returns:
        df (dataframe): updated dataframe with added columns for completed projects and grade received 
    '''
    # make driver object and visit intial website 
    driver = webdriver.Chrome()
    driver.get('https://instruction.learn.co/staff/students/afeinberg22')
    time.sleep(2)
    
    # login
    un = driver.find_element_by_xpath('//*[@id="login_field"]')
    un.send_keys(git_un)

    pw = driver.find_element_by_xpath('//*[@id="password"]')
    pw.send_keys(git_pw)

    # click submit
    sn = driver.find_element_by_xpath('//*[@id="login"]/form/div[2]/input[8]')
    sn.click()
    time.sleep(2)
    
    # login to learn
    driver.get('http://learn.co/superguy200')
    time.sleep(2)
    
    sign_in_gh = driver.find_element_by_xpath('//*[@id="js--region-main"]/div/div[1]/div/div/div[5]/form/div/div[6]/a')
    sign_in_gh.click()
    
    
    
    # add new columns to dataframe
    df = df1.copy()
    df['Project_1'] = ''
    df['Project_2'] = ''
    df['Project_3'] = ''
    df['Project_4'] = ''
    df['Project_5'] = ''
    df['Project_1_Grade'] = ''
    df['Project_2_Grade'] = ''
    df['Project_3_Grade'] = ''
    df['Project_4_Grade'] = ''
    df['Project_5_Grade'] = ''

    
    for index, row in df.iterrows():
        # visit instructor app url 
        driver.get(f'https://instruction.learn.co/{row["url"]}')
                   
        # grab html
        my_html = driver.page_source
        soup = BeautifulSoup(my_html, 'html.parser')
        
                   
        # grab link for portfolio projects
        port_prof = soup.find('div', class_ = 'custom-badges').findAll('a', class_ = 'custom-badge-icon')[-2]['href']
                   
        # visit portfolio projects link and grab html
        driver.get(port_prof)
        time.sleep(2)
        my_html = driver.page_source
        soup = BeautifulSoup(my_html, 'html.parser')
        
        skip_list = []
        project_number = 0
        # the html adds another div if the project is completed, so I am going to skip those
        for index1, project in enumerate(soup.findAll('div', class_ = 'util--padding-txl util--padding-bxl')):
            if index1 not in skip_list:
                project_number += 1
            if project.find('span', 
                        class_ = 'heading heading--level-4 heading--color-grey heading--weight-lighter').text == 'Complete':
                driver.get(f'https://learn.co{project.find("a")["href"]}')
                time.sleep(2)
                my_html_learn = driver.page_source
                soup_learn = BeautifulSoup(my_html_learn, 'html.parser')
                date = soup_learn.find('h5', class_='heading--level-2 heading--weight-bolder heading--color-grey').text
                grade = soup_learn.findAll('h5', class_ = 'heading--level-4 heading--weight-lighter heading--color-grey util--padding-ts')[-1].text.split(':')[-1].strip()
                df.at[index, f'Project_{project_number}'] = date
                df.at[index, f'Project_{project_number}_Grade'] = grade
                skip_list.append(index1 + 1)
    return df
        

## Example Run

#### Step 1 - Grab Github username and password

In [158]:
def get_keys(path):
    with open(path) as f:
        return json.load(f)

In [159]:
cred = get_keys('/Users/jeffreyherman/.secret/github.json')
git_un = cred['github_username']
git_pw = cred['github_password']

#### Step 2- Grab dataframe with instructor url 
Dataframe needs to have a column named `url` that is the students instructor app url 

In [160]:
df = pd.read_csv('active_df.csv')

In [161]:
# grab subst of the dataframe
df_test = df[(df['name'] == 'a a') | (df['name'].str.contains('Feinberg')) | 
             (df['name'].str.contains('Igor Kirko'))]
df_test

Unnamed: 0,name,url,program,cohort,pacing,past_cohorts,ed_coach_used,ed_coach_available,learn_url
0,a a,/staff/students/deannalinn,Program: Online Data Science Bootcamp,Cohort: online-ds-sp-000,Pacing: Self Paced,Past Cohorts:\n v-000 - left on 201...,2,2,http://learn.co/deannalinn
5,Aaron Feinberg,/staff/students/afeinberg22,Program: Online Data Science Bootcamp,Cohort: online-ds-sp-000,Pacing: Self Paced,,3,3,http://learn.co/afeinberg22
926,Igor Kirko,/staff/students/maja-thurup,Program: Online Data Science Bootcamp,Cohort: online-ds-sp-000,Pacing: Self Paced,,2,2,http://learn.co/maja-thurup


In [162]:
new_df = project_dates_grades(git_un, git_pw, df_test)

In [164]:
new_df

Unnamed: 0,name,url,program,cohort,pacing,past_cohorts,ed_coach_used,ed_coach_available,learn_url,Project_1,Project_2,Project_3,Project_4,Project_5,Project_1_Grade,Project_2_Grade,Project_3_Grade,Project_4_Grade,Project_5_Grade
0,a a,/staff/students/deannalinn,Program: Online Data Science Bootcamp,Cohort: online-ds-sp-000,Pacing: Self Paced,Past Cohorts:\n v-000 - left on 201...,2,2,http://learn.co/deannalinn,,,,,,,,,,
5,Aaron Feinberg,/staff/students/afeinberg22,Program: Online Data Science Bootcamp,Cohort: online-ds-sp-000,Pacing: Self Paced,,3,3,http://learn.co/afeinberg22,10/17/2019,,,,,7.0,,,,
926,Igor Kirko,/staff/students/maja-thurup,Program: Online Data Science Bootcamp,Cohort: online-ds-sp-000,Pacing: Self Paced,,2,2,http://learn.co/maja-thurup,12/23/2019,12/23/2019,,,,7.0,7.0,,,
