In [1]:
# import modules
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from bs4 import BeautifulSoup
import pandas as pd
import ethnicolr as ec
from time import sleep
from pathlib import Path
import re

# open browser and go to directory
driver = str(Path().resolve().parents[2]) + "\Tools\geckodriver.exe"
browser = webdriver.Firefox(executable_path=driver)
browser.get('https://www.lakeheadu.ca/faculty-and-staff/directory')

# find campus selection box and submit box
campus_select = Select(browser.find_element_by_id('loc_id'))
campus_labels = [c.text for c in campus_select.options][1:]
submit_button = browser.find_element_by_id('directory-search')

# dictionary to hold data
data = {
    'First Name' : [],
    'Last Name' : [],
    'Email' : []
}

# scrape all campuses
for campus_label in campus_labels:
    # select next campus
    campus_select.select_by_visible_text(campus_label)
    department_select = None
    
    # find department selection box
    if campus_label == "Thunder Bay":
        department_select = Select(browser.find_element_by_id("group_TBAY"))
    elif campus_label == "Orillia":
        department_select = Select(browser.find_element_by_id("group_ORIL"))
    else:
        department_select = Select(browser.find_element_by_id("group_BARR"))
    department_labels = [d.text for d in department_select.options][1:]
    
    # scrape all departments
    for department_label in department_labels:
        # select next department and click submit
        department_select.select_by_visible_text(department_label)
        submit_button.click()
        
        # use BeautifulSoup to parse raw code
        soup=BeautifulSoup(browser.page_source,'lxml')
        
        # find data
        results = soup.find("div", id="result").find_all("div", class_ = re.compile(r"^clearfix row views-row-"))
        
        # print heading
        print("CAMPUS: %s \t DEPARTMENT: %s" %(campus_label, department_label))
        
        # make sure results were found
        if results == []:
            # print error message 
            print("No results found!\n")
            # pause scraping for 15 seconds (robots.txt asks for 10 second delay)
            sleep(15)
            continue
                
        # iterate through each person
        for prof in results:
            # obtain contact info
            full_name = prof.find("div", class_ = "grid-3").find("span", class_ = "name").get_text().split(" ")
            fname = full_name[1]
            lname = full_name[-1]
            email = prof.find("div", class_ = "grid-4").find("span", class_ = "email").find("a", href=re.compile(r"^mailto:")).get_text()
            
            # store contact info in data dictionary
            data['First Name'] += [fname]
            data['Last Name'] += [lname]
            data['Email'] += [email]

            # print contact info
            print(fname, lname, email)
        
        # pause scraping for 15 seconds (robots.txt asks for 10 second delay)
        print()
        sleep(15)
        
# create pandas dataframe and remove any duplicate entries
df = pd.DataFrame(data)
df = df.drop_duplicates()
    
# determine ethnicity
df = ec.pred_wiki_name(df,list(df)[1],list(df)[0])
    
# print dataframe and write data to csv file
print(df)
df.to_csv("lakehead parsed.csv")

# close browser
browser.quit()

Using TensorFlow backend.


CAMPUS: Thunder Bay 	 DEPARTMENT: Aboriginal Cultural & Support Services
Sheryl O'Reilly aboriginalcounsellor@lakeheadu.ca
Sheila Pelletier-Demerah acss@lakeheadu.ca
Yolanda Twance acss1@lakeheadu.ca

CAMPUS: Thunder Bay 	 DEPARTMENT: Aboriginal Initiatives
Denise Baxter vp.ai@lakeheadu.ca
Anna Chief aboriginaloutreach@lakeheadu.ca
Ashley Dokuchie adokuchi@lakeheadu.ca
Lisa Harris amp@lakeheadu.ca
Jerri-Lynn Orr nap@lakeheadu.ca

CAMPUS: Thunder Bay 	 DEPARTMENT: Accounts Receivable
Kirsten Arnew karnew@lakeheadu.ca
Jennifer Caren jacaren@lakeheadu.ca
Christopher Gallinger clgallin@lakeheadu.ca
Jessica Gerlach jgerlach@lakeheadu.ca
Sylvia Griffiths sgriffit@lakeheadu.ca
Jessica Plemel-Sperry jessie.sperry@lakeheadu.ca
Yvonne Vennes-Kennedy yvonne.vennes-kennedy@lakeheadu.ca

CAMPUS: Thunder Bay 	 DEPARTMENT: Administration, Orillia Campus
Christopher Glover cdglover@lakeheadu.ca

CAMPUS: Thunder Bay 	 DEPARTMENT: Admissions (Undergraduate)
Susan Auld susan.auld@lakeheadu.ca
Nicholas Ch