In [2]:
# required modules
import requests
from bs4 import BeautifulSoup
from collections import deque
import pandas as pd
import ethnicolr as ec
from time import sleep
import string
import re

# a queue of urls to be crawled
new_urls = deque(["https://www.queensu.ca/search/people/?search_text=" + alpha for alpha in string.ascii_lowercase])
                  
# a set of urls that we have already crawled
processed_urls = set()

# dictionary to hold names and email addresses
data = {
    'First Name' : [],
    'Last Name' : [],
    'Email' : []
}

# process urls one by one until we exhaust the queue
while len(new_urls):
    # move next url from the queue to the set of processed urls
    url = new_urls.popleft()
    processed_urls.add(url)
    
    # get url's content
    print("Processing %s" % url)
    response = requests.get(url)
    content = response.text

    # create a beutiful soup for the html document
    soup = BeautifulSoup(content,'lxml')
    
    # find results
    profs = soup.find("div", id="people-results").find("table").find("tbody").findAll("tr")
    
    # iterate through each person
    for prof in profs:
        # obtain contact info
        full_name = prof.find("td").get_text().split()
        fname = full_name[0]
        lname = full_name[-1]
        email = prof.find("a", href=re.compile(r"^mailto:"))
        if email == None:
            email = "No Email"
        else:
            email = email.get_text()
        
        # store contact info in data dictionary
        data['First Name'] += [fname]
        data['Last Name'] += [lname]
        data['Email'] += [email]
                
        # print contact info
        print(fname, lname, email)
        
    # pause web scraping for 10 seconds
    sleep(10)
        
# create pandas dataframe and remove any duplicate entries
df = pd.DataFrame(data)
df = df.drop_duplicates().reset_index(drop=True)
    
# determine ethnicity
df = ec.pred_wiki_name(df,list(df)[1],list(df)[0])
    
# print dataframe and write data to csv file
print(df)
df.to_csv("queens parsed.csv")

Processing https://www.queensu.ca/search/people/?search_text=a
. Praphulla pp33@queensu.ca
A Wright acw@queensu.ca
A Ginsburg david.ginsburg@krcc.on.ca
A Birk birka@queensu.ca
A. Carson scott.carson@queensu.ca
Aaditya Bhatia ab312@queensu.ca
Aaliyan Khan No Email
Aamer Mahmud mahmud@queensu.ca
Aaron Aggarwal ana1@queensu.ca
Aaron Ball balla@queensu.ca
Aaron Campbell campbela@queensu.ca
Aaron Fernandes afernandes@queensu.ca
Aaron Rosenstein ahr1@queensu.ca
Aaron Holmberg aaron.holmberg@queensu.ca
Aaron Jackson aj81@queensu.ca
Aaron McGregor am345@queensu.ca
Aaron Best anb5@queensu.ca
Aaron Bailey arb11@queensu.ca
Aaron Ruberto ar133@queensu.ca
Aaron Rutter rutteraj@queensu.ca
Aaron Sander jgs3@queensu.ca
Aaron Vincent aaron.vincent@queensu.ca
Aaron Visser aaron.visser@queensu.ca
Aaron Wood-Lyons amwl@queensu.ca
Aaron Zolderdo ajz2@queensu.ca
Aazar Kashi kashi.a@queensu.ca
Abbey Lanzo No Email
Abbey Pender alp6@queensu.ca
Abbey Arnott ara4@queensu.ca
Abbie Walker saw13@queensu.ca
Abby Os