In [16]:
class Faculty:
    def __init__(self, name, title, profile_url):
        self.name = name
        self.title = title
        self.profile_url = profile_url
        self.research_areas = []
        self.email = ""
        self.website = ""
        self.biography = ""

In [96]:
import requests
from bs4 import BeautifulSoup
import csv

def get_professor_data(faculty):
    # Fetch the page content

    url = faculty.profile_url
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')

    # Extract Name
    name_tag = soup.find('h1', class_='page-title')
    name = name_tag.span.text.strip() if name_tag else "N/A"

    # Extract Research Areas
    research_areas_tag = soup.find('strong', string="Research Areas:")
    if research_areas_tag:
       research_areas = research_areas_tag.find_parent('p').get_text(strip=True).replace('Research Areas:', '').strip()
    else:
        research_areas = []

    # Extract Email
    email_tag = soup.find('p', class_='card-block__text').find('a', href=lambda href: href and href.startswith('mailto:'))
    email = email_tag.get_text(strip=True) if email_tag else "N/A"

    # Extract Website
    website_tag = soup.find('p', class_='card-block__text').find('a', href=lambda href: href and href.startswith('http'))
    website = website_tag.get_text(strip=True) if website_tag else "N/A"

    # Extract Biography
    biography_tag = soup.find('div', class_='field--name-field-person-biography')
    biography = biography_tag.get_text(strip=True) if biography_tag else "N/A"

    faculty.research_areas = research_areas
    faculty.email = email
    faculty.website = website
    faculty.biography = biography

    return faculty

In [97]:
import requests
from bs4 import BeautifulSoup

base_url = "https://scs.gatech.edu/people/faculty/"

faculty_list = []
# Loop through each letter (A to Z)
for letter in "abcdefghijklmnopqrstuvwxyz":
    url = base_url + letter
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, "html.parser")

        # Find all faculty blocks
        faculty_blocks = soup.find_all("div", class_="profile-card__content")

        for faculty in faculty_blocks:
            name = faculty.find("h4", class_="card-block__title").get_text(strip=True)
            title = faculty.find("h6", class_="card-block__subtitle").get_text(strip=True)
            profile_url = "https://scs.gatech.edu" + faculty.find("a")["href"]

            faculty_det = Faculty(name, title, profile_url)
            faculty_det = get_professor_data(faculty_det)
            faculty_list.append(faculty_det)

            # Print the details
            print(f"Name: {faculty_det.name}")
            print(f"Title: {faculty_det.title}")
            print(f"Profile URL: {faculty_det.profile_url}")
            print(f"Research Areas: {faculty_det.research_areas}")
            print(f"Email: {faculty_det.email}")
            print(f"Website: {faculty_det.website}")
            print("-" * 40)
    else:
        print(f"Failed to retrieve the page for letter {letter}. Status code: {response.status_code}")

Name: Jacob Abernethy
Title: Associate Professor
Profile URL: https://scs.gatech.edu/people/jacob-abernethy
Research Areas: Machine Learning
Email: prof@gatech.edu
Website: N/A
----------------------------------------
Name: Mustaque Ahamad
Title: USG Regents Entrepreneur Professor
Profile URL: https://scs.gatech.edu/people/mustaque-ahamad
Research Areas: Cybersecurity, Distributed Systems
Email: mustaque.ahamad@cc.gatech.edu
Website: N/A
----------------------------------------
Name: Mostafa Ammar
Title: Regents' Professor, Interim Chair of SCS
Profile URL: https://scs.gatech.edu/people/mostafa-ammar
Research Areas: Networking
Email: ammar@cc.gatech.edu
Website: N/A
----------------------------------------
Name: Joy Arulraj
Title: Associate Professor
Profile URL: https://scs.gatech.edu/people/joy-arulraj
Research Areas: Database systems, machine learning
Email: jarulraj3@gatech.edu
Website: N/A
----------------------------------------
Name: Ramin Ayanzadeh
Title: Postdoctoral Fellow
Pr

In [99]:
with open('faculty_details.csv', mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["Name", "Title", "Profile URL", "Research Areas", "Email", "Website", "Biography"])
    for faculty in faculty_list:
        writer.writerow([faculty.name, faculty.title, faculty.profile_url, faculty.research_areas, faculty.email, faculty.website, faculty.biography])