In [1]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
import pandas as pd
import os
import pickle
import getpass
from io import StringIO
import pickle

In [2]:
class Session:
    def __init__(self):
        # Get username and password from user
        self.username = input("Input your MWS username: ")
        self.password = getpass.getpass("Input your MWS password: ")
        
        # Configure webdriver
        service = Service(executable_path="C:/Users/treharne/Documents/chromedriver.exe")
        options = Options()
        options.add_argument('--ignore-certificate-errors')
        options.add_argument('--headless')
        self.browser = webdriver.Chrome(service=service, options=options)
        
        # Login
        self.browser.get('https://tulip.liv.ac.uk/pls/new_portal/webwise.tul_bs_portal.home')
        
        username_input = self.browser.find_element(By.XPATH, "//input[@name='p_username']")
        password_input = self.browser.find_element(By.XPATH, "//input[@name='p_password']")

        # Get submit button by text "LOG IN"
        submit_button = self.browser.find_element(By.XPATH, "//button[contains(text(),'LOG IN')]")
        
        username_input.send_keys(self.username)
        password_input.send_keys(self.password)
        
        submit_button.click()
        
        # Wait for 5 seconds
        print("wait ...")
        time.sleep(5)
        
        # Find all elements with the class "verification-code"
        elements = self.browser.find_elements(By.CLASS_NAME, "verification-code")
        
        # Print the text content of each element
        for element in elements:
            print("Verification code for DUO")
            print(element.text)

        # Countdown loop
        for i in range(20, 0, -1):
            print("You have {} seconds to enter your verification code.".format(i), end="\r")
            time.sleep(1)
        
        # Confirm trust browser
        try:
            self.browser.find_element(By.ID, "trust-browser-button").click()
            print("")
            print("Login Success.")
        except:
            print("")
            print("Login Failure")

In [3]:
start = Session()

wait ...
Verification code for DUO
8375
You have 1 seconds to enter your verification code..
Login Success.


In [4]:
def get_student_modules(student_id):

    start.browser.get("https://tulip.liv.ac.uk/pls/new_portal/webwise.systgenq_new.p_liv_idenquiry")

    id_input = start.browser.find_element(By.XPATH, "//input[@name='p_idno']")
    id_input.send_keys(student_id)

    registered = start.browser.find_elements(By.XPATH, "//input[@name='p_registered']")[1]
    registered.click()

    start.browser.find_element(By.XPATH, "//input[@value='Search']").click()

    # Get a element by text "Select"
    try:
        start.browser.find_element(By.XPATH, "//a[contains(text(),'Select')]").click()
        
    except:
        print("Couldn't find student: ", student_id)
        return None

    # Get a element by text "Academic History"
    start.browser.find_element(By.XPATH, "//font[contains(text(),'Academic History')]").click()

    academic_history = []

    while True:
        # Get a element by text "Next"
        try:
            academic_history.extend(pd.read_html(StringIO(start.browser.page_source)))
            start.browser.find_element(By.XPATH, "//input[@value='Previous Term']").click()
        except:
            break

    start.browser.find_element(By.XPATH, "//font[contains(text(),'Current Modules')]").click()

    current_modules = pd.read_html(StringIO(start.browser.page_source))

    return {
        "academic_history": academic_history,
        "current_modules": current_modules
    }

In [5]:
def read_pickle(fname):# if data.pickle doesn't exist, create it
    if not os.path.exists(fname):
        data = {}
        with open("data.pickle", "wb") as f:
            pickle.dump({}, f)
        return data
    else:
        with open(fname, "rb") as f:
            data = pickle.load(f)
        return data

In [6]:
def get_student_ids_from_enrollments(fname):
    df = pd.read_csv(fname)

    temp = [x[:9] for x in df["sis_user_id"]]

    student_ids = []
    for student in temp:
        try:
            student_ids.append(int(student))
        except:
            pass

    return list(set(student_ids))
            
                   

In [7]:
student_ids = get_student_ids_from_enrollments("enrollments.csv")

In [8]:
len(student_ids)

3890

In [9]:
from tqdm import tqdm

data = read_pickle("data.pickle")

for student_id in tqdm(student_ids):
    if student_id not in data:
        data[student_id] = get_student_modules(student_id)
        with open("data.pickle", "wb") as f:
            pickle.dump(data, f)

  7%|▋         | 270/3890 [04:18<42:06,  1.43it/s]  

Couldn't find student:  201769587


  7%|▋         | 272/3890 [04:27<57:57,  1.04it/s]

Couldn't find student:  201769636


 18%|█▊        | 686/3890 [08:13<31:27,  1.70it/s]  

Couldn't find student:  201590189


 22%|██▏       | 859/3890 [09:16<19:19,  2.62it/s]

Couldn't find student:  201639687


 23%|██▎       | 878/3890 [09:24<19:10,  2.62it/s]

Couldn't find student:  201746273


 33%|███▎      | 1278/3890 [12:12<19:21,  2.25it/s]

Couldn't find student:  201763660


 35%|███▍      | 1358/3890 [12:19<09:21,  4.51it/s]

Couldn't find student:  201763668


 42%|████▏     | 1646/3890 [15:37<37:50,  1.01s/it]  

Couldn't find student:  201764268


 53%|█████▎    | 2068/3890 [17:28<06:56,  4.38it/s]

Couldn't find student:  201617606


 55%|█████▍    | 2126/3890 [17:45<07:32,  3.89it/s]

Couldn't find student:  201675132


 56%|█████▋    | 2191/3890 [18:03<07:44,  3.66it/s]

Couldn't find student:  201765317


 58%|█████▊    | 2271/3890 [18:38<09:06,  2.96it/s]

Couldn't find student:  201749090


 60%|█████▉    | 2320/3890 [19:02<10:57,  2.39it/s]

Couldn't find student:  201634529


 67%|██████▋   | 2621/3890 [20:54<14:30,  1.46it/s]

Couldn't find student:  201659766


 76%|███████▋  | 2975/3890 [22:13<02:55,  5.21it/s]

Couldn't find student:  201635895


 77%|███████▋  | 3001/3890 [22:26<03:55,  3.78it/s]

Couldn't find student:  201635991


 78%|███████▊  | 3043/3890 [22:48<05:50,  2.42it/s]

Couldn't find student:  201562329


 79%|███████▉  | 3077/3890 [23:03<05:28,  2.48it/s]

Couldn't find student:  201759129


 80%|████████  | 3131/3890 [23:10<03:11,  3.96it/s]

Couldn't find student:  201767323


 85%|████████▍ | 3302/3890 [24:32<02:59,  3.27it/s]

Couldn't find student:  201636643


 86%|████████▌ | 3327/3890 [24:40<02:53,  3.24it/s]

Couldn't find student:  201661310


 87%|████████▋ | 3378/3890 [24:46<01:53,  4.50it/s]

Couldn't find student:  201644948


 89%|████████▉ | 3453/3890 [25:19<02:34,  2.82it/s]

Couldn't find student:  201645112


 93%|█████████▎| 3603/3890 [26:19<01:41,  2.82it/s]

Couldn't find student:  201588138


 93%|█████████▎| 3626/3890 [26:26<01:30,  2.90it/s]

Couldn't find student:  201768356


100%|██████████| 3890/3890 [29:24<00:00,  2.20it/s]


In [16]:
len(data)

1221