In [113]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [114]:
url = "https://www.cs.umd.edu/people/phonebook/grad-student"
response = requests.get(url)

soup = BeautifulSoup(response.text, 'html.parser')

In [115]:
def _extract_contact(student):
    contact = student.find('td', {'class': 'views-field-contact'})
    contact_link = contact.find('a')
    if contact_link:
        contact_link = contact_link.get('href') if contact_link else ""
        email = 'https://www.cs.umd.edu' + contact_link
    else:
        user_element = contact.find('span', {'class': 'u'})
        user = user_element.text if user_element else ""
        domain_element = contact.find('span', {'class': 'd'})
        domain = domain_element.text.replace(' ', '') if domain_element else ""
        email = user + "@" + domain.replace('[.dot.]', '.') if user and domain else ""
    return email

In [116]:
def _extract_website(student):
    website = student.find('td', {'class': 'views-field-field-profile-website'}).find('a')
    website_link = website.get('href') if website else ""
    return website_link

In [117]:
def _extract_photo_link(student):
    photo = student.find('td', {'class': 'views-field views-field-field-person-photo'})
    photo_link = photo.find('img').get('src')
    return photo_link

In [118]:
data = []
for student in soup.select('tr.odd, tr.even'):
    last_name = student.find('td', {'class': 'views-field-field-person-last-name'}).text.strip()
    first_name = student.find('td', {'class': 'views-field-field-person-first-name'}).text.strip()
    degree = student.find('td', {'class': 'views-field-field-degree'}).text.strip()
    location = student.find('td', {'class': 'views-field-field-profile-location'}).text.strip()
    phone = student.find('td', {'class': 'views-field-field-profile-phone'}).text.strip()
    email = _extract_contact(student)
    website = _extract_website(student)
    photo_link = _extract_photo_link(student)
    data.append([last_name, first_name, degree, location, phone, email, website, photo_link])

df = pd.DataFrame(data, columns=['Last Name', 'First Name', 'Degree', 'Location', 'Phone', 'Email', 'Website', 'Photo_link'])

In [119]:
df[:60]

Unnamed: 0,Last Name,First Name,Degree,Location,Phone,Email,Website,Photo_link
0,Abbaszadeh,Kasra,PhD Student,,,kasraz@umd.edu,,https://www.cs.umd.edu/sites/default/files/sty...
1,Abrar,Saad Mohammad,PhD Student,IRB 4104,,sabrar@umd.edu,,https://www.cs.umd.edu/sites/default/files/sty...
2,Acharya,Aditya,PhD Candidate,IRB 2112,,adach@umd.edu,,https://www.cs.umd.edu/sites/default/files/sty...
3,Acquaye,Christabel,PhD Student,,,cacquaye@umd.edu,,https://www.cs.umd.edu/sites/default/files/sty...
4,Agarwal,Vatsal,PhD Student,,,vatsalag@umd.edu,,https://www.cs.umd.edu/sites/default/files/sty...
5,Agrawal,Sweta,PhD Candidate,IRB 4108,,sweagraw@umd.edu,http://sweta20.github.io,https://www.cs.umd.edu/sites/default/files/sty...
6,Agrawal,Aakriti,PhD Student,IRB 5108,,agrawal5@umd.edu,,https://www.cs.umd.edu/sites/default/files/sty...
7,Agubuzo,Amby,Master's Student,,,aagubuzo@umd.edu,,https://www.cs.umd.edu/sites/default/files/sty...
8,Aguinaldo,Angeline,PhD Candidate,IRB 2116,,aaguinal@cs.umd.edu,https://aaguinal.github.io/,https://www.cs.umd.edu/sites/default/files/sty...
9,Ahmadi,Ali,PhD Student,,,ahmadia@umd.edu,,https://www.cs.umd.edu/sites/default/files/sty...


In [120]:
df.to_csv('UMD_CS_Phonebook_Graduate_Students.csv')