Before starting make sure you run `pip install -r requirements.txt` command to install the required dependencies

In [None]:
from bs4 import BeautifulSoup
import pandas as pd
import time
import datetime
import os
from pathlib import Path
from selenium.webdriver.support.ui import Select

from helium import *

Define your username and password in the cell below

In [None]:
USERNAME = 'your-email@domain.com'
PASSWORD = 'your-password'

Lets define required function for extraction of table, conversion of element into bueautiful soup object and extraction of tables from multiple pages.

In [None]:
def get_soup(elem):
    return BeautifulSoup(elem.get_attribute('innerHTML'), 'html.parser')

def get_datatable(cols, n_rows):
    """
    creates pandas dataframe
    cols:List: column names 
    n_rows:int: number of rows
    """
    return pd.DataFrame(columns=cols, index=range(n_rows))

def get_row(elem, tag):
    """
    extracts row
    """
    e = elem.find_elements_by_tag_name(tag)
    return [get_soup(h).getText() for h in e]

def get_link(row):
    """
    extracts link from the row
    """
    return row.find_element_by_tag_name('a').get_attribute('href')

def get_table(t, link=None):
    """
    extracts the table from a given page - make sure you give a single table element if there are multiple tables. 
    This will extract column names from the table head
    t: table element
    """
    
    head = t.find_element_by_tag_name('thead')
    body = t.find_element_by_tag_name('tbody')

    columns = get_row(head, 'th')

    rows = body.find_elements_by_tag_name('tr')
    n_rows = len(rows)
    
    df = get_datatable(columns, n_rows)
    
    links = []
    for i, r in enumerate(rows):
        vals = get_row(r, 'td')
        if link is not None: links.append((vals[link], get_link(r)))
        for j, v in enumerate(vals):
            df.iloc[i, j] = v
            
    return df, links

        
def create_folder(username):
    """
    creates folder for saving reports 
    the structure is as follows
    username -> date -> report
                     -> assingment
    username:str: the username for creating folder
    returns:Path: Path variable for the folder
    """
    root = Path(username)
    folder = Path(f'{username}/{datetime.datetime.today().strftime("%Y-%m-%d")}')
    try:
        os.mkdir(str(root))   
    except:
        print('folder present already')
        
    try:
        os.mkdir(str(folder))
        os.mkdir(str(folder/'activity_log'))
        os.mkdir(str(folder/'assignments'))
    except:
        print('internal folder also present')
    return folder


def multi_page_table(browser, wait_element, link=None):
    """
    Traverses multiple pages using next button to extract multipage reports/tables 
    browser: webdriver that is controlling the browser
    wait_element: give some element which can determine if the table is present on the page
    link: links to be extracted (give index of the column from which the link needs to be extracted)
    returns: complete table
    """
    wait_until(Text(wait_element).exists)
    
    n = Button('Next')
    if n.exists() and n.is_enabled():
        table_list, links_list = [], []
        while n.is_enabled():
            wait_until(Text(wait_element).exists)
            table_element = browser.find_element_by_tag_name('table')
            table, links = get_table(table_element, link=link)
            table_list.append(table)
            links_list.extend(links)
            click(n)
                
        table = pd.concat(table_list)
    else: 
        t = browser.find_element_by_tag_name('table')
        table, links_list = get_table(t, link=link)
        
    if link is not None: return table, links_list
    else: return table    

In [None]:
browser = start_chrome()
browser.maximize_window() # just to make sure that the table isnt divided into two to fit into the page\

#login 
go_to("https://www.khanacademy.org/login")
write(USERNAME, into='Email or username')
write(PASSWORD, into='Password')
click('Log in')

In [None]:
#change with your classname 
class_name = "Example Class"
wait_until(Text(class_name).exists)
click(class_name)

In [None]:
#to handle updating of class information when new students join in
try: click('Update')
except: pass

We need to extract data from activity overview and we are considering data from the today only

In [None]:
wait_until(Text('Activity overview').exists)
click('Activity overview')

wait_until(Text('Last 7 days').exists)
click('Last 7 days')
click('Today')

Creating folder to save the reports

In [None]:
folder = create_folder(USERNAME)

This will collect information about all the students in your class, more of an overview

In [None]:
all_students_table, links = multi_page_table(browser, 'SKILLS LEVELED UP', link=0)
all_students_table.to_csv(folder/'Progress Report (All Students).csv', index=None)

This will collect information about individual student by clicking their links and collecting information about their progress and assigments

In [None]:
for name, l in links:
    go_to(l)
    try:  
        if Text('No results').exists(): raise ValueError
        st_activity = multi_page_table(browser, 'CORRECT/TOTAL PROBLEMS', None)
        st_activity.to_csv(folder/f'activity_log/{name}.csv', index=None)
    except: 
        print(f'{name} Has no records')
        
    try: 
        click('Assignments')
        as_activity = multi_page_table(browser, 'DUE DATE & TIME', None)
        as_activity.to_csv(folder/f'assignments/{name}_assignment.csv', index=None)
    except:
        print(f'{name} has no assignment record')