In [1]:
# import modules
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import pandas as pd
import ethnicolr as ec
from pathlib import Path
import string
import sys
from time import sleep
from bs4 import BeautifulSoup
        
# dictionary to hold data
data = {
    'First Name' : [],
    'Last Name' : [],
    'Email' : []
}

# unwanted prefixes
prefixes = ["President",
            "Prof",
            "Dr",
            "Dean",
            "Dr.",
            "Professor",
            "Prof.",
            "Mr",
            "Mr.",
            "Miss",
            "Ms",
            "Ms.",
            "Mrs",
            "Mrs.",
            "P.Eng.",
            "Ophthalmologist",
            "Rev."]

# open browser and go to directory
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--disable-extensions')
driver = str(Path().resolve().parents[2]) + "\Tools\chromedriver.exe"
browser = webdriver.Chrome(executable_path=driver, options=chrome_options)
timeout = 60
browser.get('https://www.uwo.ca/westerndir/')

# search nothing
search_button = browser.find_element_by_xpath('//*[@id="TabbedPanels1"]/div/div[1]/div/form/input[4]')
search_button.click()

# wait for page to load
try:
    element_present = EC.presence_of_element_located((By.ID, 'first_name_no-js'))
    WebDriverWait(browser, timeout).until(element_present)
except TimeoutException:
    sys.exit("Timed out waiting for page to load")

# iterate through all queries
for q in string.ascii_lowercase:
    # find first name field, search button
    fname_field = browser.find_element_by_id('first_name_no-js')
    search_button = browser.find_element_by_xpath("/html/body/div[4]/div[3]/div[1]/form/input[4]")
    
    # make next query
    fname_field.clear()
    fname_field.send_keys(q)
    search_button.click()
    
    # wait for page to load
    sleep(10)
    
    # use BeautifulSoup to parse raw code
    soup=BeautifulSoup(browser.page_source,'lxml')
    
    # find results
    datatable = soup.find("table", id="people_search_results")
        
    # iterate through each person
    for prof in datatable.findAll("tr")[1:]:
        # obtain contact info and clean it
        full_name = prof.findAll("td")[0].find("a").get_text().title().split(", ")
        full_name[0] = full_name[0][:len(full_name[0])-1]
        full_name.extend(full_name[1].split())
        del full_name[1]
        if full_name[1] in prefixes:
            del full_name[1]
        lname = full_name[0]
        fname = None
        if len(full_name) == 1:
            fname = "N.A."
        else:
            fname = ' '.join(full_name[1:])

        email =  prof.findAll("td")[1].find("a")
        if email == None:
            email = "No Email"
        else:
            email = email.get_text()

        # store contact info in data dictionary
        data['First Name'] += [fname]
        data['Last Name'] += [lname]
        data['Email'] += [email]
    
        # print contact info
        print(fname, lname, email)
        
# close browser
browser.quit()

# create pandas dataframe and remove any duplicate entries
df = pd.DataFrame(data)
df = df.drop_duplicates()
    
# determine ethnicity
df = ec.pred_wiki_name(df,list(df)[1],list(df)[0])
    
# print dataframe and write data to csv file
print(df)
df.to_csv("western parsed.csv")

Using TensorFlow backend.


Ahmed Abba aabbas24@uwo.ca
Alina Abbas aabbasi7@uwo.ca
Abdalla Abdalla Hassa ahass29@uwo.ca
Androu Abdalmala aabdalma@uwo.ca
Abdalla Abdelhad aabdal2@uwo.ca
Ayah Abusamah aabusam5@uwo.ca
Awatif Abuzgai aabuzgai@uwo.ca
Adriana Acimovi No Email
Alison Adai aadair@uwo.ca
Ailene Adam aadams@housing.uwo.ca
Aislinn Adam aadams59@uwo.ca
Alicia Adam aadams49@uwo.ca
Allison Adam aadams69@uwo.ca
Aiham Adaw aadawi2@uwo.ca
Akinade Adebowal aadebowa@uwo.ca
Abdalla Adlan Alhashm aalhash@uwo.ca
Angella Adr aadra@uwo.ca
Alok Agarwa aagarwa9@uwo.ca
Anita Aggarwa aaggar44@uwo.ca
Azmi Agh aagha7@uwo.ca
Alaina Aguann aaguann2@uwo.ca
Angie Lizet Aguilar Gonzale aaguila3@uwo.ca
Ahsan Ahma aahma67@uwo.ca
Abdelaziz Ahme aahmed53@uwo.ca
Ahmed Ahme aahme259@uwo.ca
Aly Ahme aahme243@uwo.ca
Ashfaque Ahme aahmed32@uwo.ca
Alice A zai2@uwo.ca
Aimaiti Aikerem aaikerem@uwo.ca
Aishajiang Aizezikal aeziz@ivey.ca
Alireza Akbar aakbari@uwo.ca
Azita Akbari Moazam aakbati@uwo.ca
Adeola Akinlaj aakinla@uwo.ca
Ayodeji Akinlaj