In [20]:
# import modules
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from bs4 import BeautifulSoup
import pandas as pd
import ethnicolr as ec
from pathlib import Path

# open browser and go to directory
driver = str(Path().resolve().parents[2]) + "\Tools\geckodriver.exe"
browser = webdriver.Firefox(executable_path=driver)
browser.get('https://www1.ocadu.ca/directory/')

# use BeautifulSoup to parse raw code
soup=BeautifulSoup(browser.page_source,'lxml')

# close browser
browser.quit()

# dictionary to hold data
data = {
    'First Name' : [],
    'Last Name' : [],
    'Email' : []
}

# find data
profs = soup.find("table", id="directory").find("tbody")

# iterate through each person
for prof in profs.findAll("tr"):
    # obtain contact info
    full_name = None
    email = None
    try:
        full_name = prof.find("td", class_="name").find("strong").find("span").find("a").get_text().split()
        email = prof.find("td", class_="name").find("em").get_text()
    except AttributeError:
        full_name = prof.find("td", class_="name").find("strong").find("span").find("strong").get_text().split()
        email = "No Email"
    print(full_name)
    fname = full_name[1]
    lname = full_name[0][:len(full_name[0])-1]
    
    # store contact info in data dictionary
    data['First Name'] += [fname]
    data['Last Name'] += [lname]
    data['Email'] += [email]

    # print contact info
    print(fname, lname, email)

# formatting
print()

# create pandas dataframe and remove any duplicate entries
df = pd.DataFrame(data)
df = df.drop_duplicates()
    
# determine ethnicity
df = ec.pred_wiki_name(df,list(df)[1],list(df)[0])
    
# print dataframe and write data to csv file
print(df)
df.to_csv("ocad parsed.csv")

['Adams,', 'Debbie']
Debbie Adams dadams@faculty.ocadu.ca
['Adams,', 'Kathryn']
Kathryn Adams kadams@faculty.ocadu.ca
['Adatia,', 'Irfaan']
Irfaan Adatia iadatia@ocadu.ca
['Addeo,', 'Stephen']
Stephen Addeo saddeo@faculty.ocadu.ca
['Adler-Gitalis,', 'Sharyn']
Sharyn Adler-Gitalis sadlergitalis@faculty.ocadu.ca
['Ahrens-Embleton,', 'Anne']
Anne Ahrens-Embleton aahrensembleton@ocadu.ca
['Akbary,', 'Sayeda']
Sayeda Akbary sakbary@ocadu.ca
['Alber,', 'Beth']
Beth Alber No Email
['Aleong,', 'Tamara']
Tamara Aleong taleong@ocadu.ca
['Allen,', 'Gale']
Gale Allen gallen@ocadu.ca
['Allen,', 'Kristen']
Kristen Allen kallen@ocadu.ca
['Allen,', 'Lillian']
Lillian Allen lallen@faculty.ocadu.ca
['Allen,', 'Lisa']
Lisa Allen lisaallen@ocadu.ca
['Aloisi,', 'Paul']
Paul Aloisi paloisi@faculty.ocadu.ca
['Alvarez,', 'Francisco']
Francisco Alvarez falvarez@ocadu.ca
['Amani,', 'Golboo']
Golboo Amani gamani@faculty.ocadu.ca
['Amir,', 'Haider']
Haider Amir haideramir@faculty.ocadu.ca
['Anderson,', 'Midnight'