In [44]:
import requests
from bs4 import BeautifulSoup
from tabulate import tabulate

# Define the URL of the faculty directory page
url = "https://www.cs.princeton.edu/people/faculty"

# Send an HTTP GET request to the faculty directory page
response = requests.get(url)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Parse the HTML content of the page
    soup = BeautifulSoup(response.text, "html.parser")

    # Find all the professor name elements
    prof_name_elements = soup.find_all('h2', class_='person-name')

    # Find all the professor title elements
    prof_title_elements = soup.find_all('div', class_='person-title')

    # Find all the professor homepage link elements
    prof_links_elements = soup.find_all('div', class_='person-links')

    # Find all the professor research area elements
    prof_research_elements = soup.find_all('p', class_='person-research-interests')

    # Extract professor names, titles, homepage links, and research areas
    professor_data = []
    for name, title, links, research in zip(
        prof_name_elements, prof_title_elements, prof_links_elements, prof_research_elements
    ):
        professor_name = name.a.text.strip()
        professor_title = title.text.strip()

        # Extract homepage link from the first anchor within person-links div
        homepage_link = links.find('a', href=True)
        if homepage_link:
            homepage_url = homepage_link['href']
        else:
            homepage_url = "Homepage link not available"

        # Extract research areas from the research element
        research_areas = research.span.next_sibling.strip()

        professor_data.append((professor_name, professor_title, homepage_url, research_areas))

    # Create a table with headers and data
    table = tabulate(
        professor_data,
        headers=["Professor Name", "Professor Title", "Homepage URL", "Research Areas"],
        tablefmt="pipe",
    )

    # Save the table to a text file
    with open("professors_info.txt", "w") as file:
        file.write(table)

    print("Data saved to professors_info.txt")
else:
    print(f"Failed to retrieve data from the faculty directory page at {url}")


Data saved to professors_info.txt
