In [1]:
import requests
from bs4 import BeautifulSoup
import csv

def extract_staff_info(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html.parser")

    for style in soup.find_all('style'):
        style.decompose()

    for script in soup.find_all('script'):
        script.decompose()

    staff_data = []
    for element in soup.find_all(['a', 'p', 'span']):
        email = element.get('href')
        if email and email.startswith('mailto:'):
            email = email[7:]  # Remove 'mailto:' prefix
            name = element.get_text().strip()
            staff_data.append({'Name': name, 'Email': email})

    return staff_data

def save_to_csv(data, filename):
    keys = data[0].keys()
    with open(filename, 'w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=keys)
        writer.writeheader()
        writer.writerows(data)

def process_school_websites(urls):
    extracted_data = []
    for url in urls:
        print(f"Extracting data from: {url}")
        staff_info = extract_staff_info(url)
        extracted_data.extend(staff_info)

    output_file = 'beautiful_soup_staff_info.csv'
    save_to_csv(extracted_data, output_file)
    print(f"Data extracted and saved to: {output_file}")

# List of URLs of school websites to extract data from
school_urls = [
    "https://www.smore.com/3pdhf",
    "https://bedfordjfhs.sharpschool.net/our_staff/staff_directory"
#     "http://blcschs.ss19.sharpschool.com"

    # Add more URLs here
]

process_school_websites(school_urls)


Extracting data from: https://www.smore.com/3pdhf
Extracting data from: http://bedfordjfhs.sharpschool.net/
Extracting data from: http://blcschs.ss19.sharpschool.com
Data extracted and saved to: beautiful_soup_staff_info.csv
