# Music
## Symphony
### https://www.pittsburghsymphony.org/


In [15]:
###################################### Events ######################################

import json
import time
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options


class PittsburghSymphonyScraper:
    def __init__(self):
        chrome_options = Options()
        chrome_options.add_argument("--headless")
        chrome_options.add_argument("--disable-gpu")
        chrome_options.add_argument("--no-sandbox")
        chrome_options.add_argument("--disable-dev-shm-usage")
        self.driver = webdriver.Chrome(options=chrome_options)
        self.base_url = "https://www.pittsburghsymphony.org/calendar?page="
        self.result_file = "../raw_documents/Pittsburgh_Symphony.json"

    def fetch_page(self, page_num):
        url = self.base_url + str(page_num)
        try:
            self.driver.get(url)
            time.sleep(3)  # wait the page to load
            page_source = self.driver.page_source
            soup = BeautifulSoup(page_source, "html.parser")
            return soup
        except Exception as e:
            print(f"Error fetching page {page_num}: {e}")
            return None

    def extract_event_info(self, soup):
        events = []
        event_list = soup.find_all('article', class_='event')
        for event in event_list:
            try:
                title = event.find('h3', class_='title').get_text(strip=True)
                time = event.find('time', class_='range').get_text(strip=True)
                venue = event.find('div', class_='venue').get_text(strip=True)
                organization = event.find('div', class_='organization').get_text(strip=True)

                event_info = {
                    "event_name": title,
                    "event_time": time,
                    "venue": venue,
                    "organization": organization
                }
                events.append(event_info)
            except Exception as e:
                print(f"Error extracting event info: {e}")
        return events

    def append_to_json(self, events):    
        # write in time
        try:
            with open(self.result_file, 'a') as f:
                for event in events:
                    json.dump(event, f, indent=4)
                    f.write("\n")
        except Exception as e:
            print(f"Error writing to JSON file: {e}")

    def scrape(self):
        for page_num in range(1, 6):
            print(f"Scraping page {page_num}...")
            soup = self.fetch_page(page_num)
            if soup:
                events = self.extract_event_info(soup)
                self.append_to_json(events)

    def close(self):
        self.driver.quit()


if __name__ == "__main__":
    scraper = PittsburghSymphonyScraper()
    scraper.scrape()
    scraper.close()
    print("Pittsburgh Symphony Scraping completed.")

Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Pittsburgh Symphony Scraping completed.


In [14]:
###################################### Musicians ######################################
import requests
from bs4 import BeautifulSoup
import json
import os

# Pittsburgh Symphony Orchestra musicians page URL
url = "https://www.pittsburghsymphony.org/pso_home/web/musicians"

# Send a GET request to the page
response = requests.get(url)

# Parse the page content using BeautifulSoup
soup = BeautifulSoup(response.content, "html.parser")

# Initialize a dictionary to store musician data
musicians_data = {}

# Define the path of the existing JSON file
json_file_path = "../raw_documents/Pittsburgh_Symphony.json"

# If the file exists, load the existing data
if os.path.exists(json_file_path):
    with open(json_file_path, "r") as json_file:
        try:
            musicians_data = json.load(json_file)
        except json.JSONDecodeError:
            musicians_data = {}

# Define a helper function to get musician introduction from subpages
def get_musician_introduction(subpage_url):
    try:
        subpage_response = requests.get(subpage_url)
        subpage_soup = BeautifulSoup(subpage_response.content, "html.parser")
        bio_text_div = subpage_soup.find("div", class_="bio-text")
        if bio_text_div:
            return bio_text_div.get_text(strip=True, separator=" ")
    except Exception as e:
        print(f"Error accessing {subpage_url}: {e}")
    return None

# Loop over sections such as First Violin, Second Violin, etc.
for section in soup.find_all("h3"):
    section_name = section.get_text().strip()
    # If this section already exists in the JSON, skip it to avoid duplicates
    if section_name in musicians_data:
        continue
    
    musicians_data[section_name] = []
    
    # Find the <p> tag containing musician list under each section
    musician_list = section.find_next("p")
    
    # Check if musician_list exists
    if musician_list:
        # Get all musician names and titles within the <p> tag
        for musician in musician_list.find_all("a"):
            musician_name = musician.get_text(strip=True)
            musician_title = musician_list.get_text(strip=True).split('|')[1].strip() if '|' in musician_list.get_text() else ""
            musician_data = {
                "name": musician_name,
                "title": musician_title
            }
            
            # If musician has a subpage, fetch their introduction
            musician_subpage_url = musician.get("href")
            if musician_subpage_url:
                full_subpage_url = f"https://www.pittsburghsymphony.org{musician_subpage_url}"
                introduction = get_musician_introduction(full_subpage_url)
                if introduction:
                    musician_data["introduction"] = introduction
            
            # Append each musician's data to the list under the section
            musicians_data[section_name].append(musician_data)

# Append new data to the JSON file
with open(json_file_path, "w") as json_file:
    json.dump(musicians_data, json_file, indent=4)
    json_file.write("\n")  # Optional newline for better readability

print("Musicians' data has been appended to the JSON file.")

Musicians' data has been appended to the JSON file.


## Opera
### https://pittsburghopera.org/

## Cultural Trust
### https://trustarts.org/

# Museum
## Carnegie Museum
### https://carnegiemuseums.org/

## Heinz History Center
### https://www.heinzhistorycenter.org/

## The Frick
### https://www.thefrickpittsburgh.org/

## More

# Food
## Food Festivals
### https://www.visitpittsburgh.com/events-festivals/food-festivals/

## Picklesburgh
### https://www.picklesburgh.com/

## Pittsburgh Taco Fest
### https://www.pghtacofest.com/

## Pittsburgh Restaurant Week
### https://pittsburghrestaurantweek.com/

## Little Italy Days
### https://littleitalydays.com/

## Banana Split Fest
### https://bananasplitfest.com/