In [None]:
# import modules
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from bs4 import BeautifulSoup
import pandas as pd
import ethnicolr as ec
from time import sleep
from pathlib import Path

# open browser and go to directory
driver = str(Path().resolve().parents[2]) + "\Tools\geckodriver.exe"
browser = webdriver.Firefox(executable_path=driver)
browser.get('https://brocku.ca/directory/')

# find department selection box and submit button
submit_button = browser.find_element_by_id('submit')
departments_select = Select(browser.find_element_by_id('departments'))
departments_labels = [o.text for o in departments_select.options][1:]

# dictionary to hold data
data = {
    'First Name' : [],
    'Last Name' : [],
    'Email' : []
}

# scrape all departments
for department_label in departments_labels:
    # select next department and click submit
    departments_select.select_by_visible_text(department_label)
    submit_button.click()

    # use BeautifulSoup to parse raw code
    soup=BeautifulSoup(browser.page_source,'lxml')

    # find data table and its contents
    datatable = soup.find("table", id="datatable")
    tablebody = datatable.find("tbody")
    rows = tablebody.find_all("tr")

    # iterate through each person
    for row in rows:
        # make sure results were found
        if row.find("td", string="Sorry, no results were found."):
            break
        else:
            # obtain contact info
            fname = row.find("td", class_="firstname").get_text()
            lname = row.find("td", class_="lastname").get_text()
            email = row.find("td", class_="email").get_text()

            # edge cases for if table cell is empty
            if fname == "":
                fname = "N.A."
            if lname == "":
                lname = "N.A."
            if email == "":
                email = "No Email"

            # store contact info in data dictionary
            data['First Name'] += [fname]
            data['Last Name'] += [lname]
            data['Email'] += [email]

            print(fname, lname, email)
        
    # pause scraping for 10 seconds (standard)
    sleep(10)
    
# create pandas dataframe and remove any duplicate entries
df = pd.DataFrame(data)
df = df.drop_duplicates()
    
# determine ethnicity
df = ec.pred_wiki_name(df,list(df)[1],list(df)[0])
    
# print dataframe and write data to csv file
print(df)
df.to_csv("brock parsed.csv")

# close browser
browser.quit()

Using TensorFlow backend.


Kristen Atack katack@brocku.ca
Monica Drenth mdrenth@brocku.ca
Henry Gerbrandt hgerbrandt@brocku.ca
Laura Lane llane@brocku.ca
Philipp Lesmana plesmana@brocku.ca
Sarah Miller smiller2@brocku.ca
Allyson Miller amiller4@brocku.ca
Gail I Neff gneff@brocku.ca
Maggie Whitfield mwhitfield@brocku.ca
