## Import function informtion
### Writing all the import code here 

In [1]:
import time
import requests
import sys
from bs4 import BeautifulSoup
import csv
import pandas as pd

## Using BeautifulSoup APIs 
### Using BeautifulSoup APIs to extract the event information from "https://visitseattle.org/events"

In [3]:

def scrape_seattle_events(url):
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')

        events = []
        event_containers = soup.find_all('div', class_='event-info')

        for event_container in event_containers:
            title = event_container.find('h2').text.strip()
            date = event_container.find('span', class_='date').text.strip()
            location = event_container.find('span', class_='location').text.strip()

            events.append({
                'title': title,
                'date': date,
                'location': location
            })

        return events

    else:
        print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
        return []

url = "https://visitseattle.org/events"
seattle_events = scrape_seattle_events(url)
print("URL: ", url)

print(sys.getsizeof(seattle_events))
print(seattle_events)

# Access the first element
if seattle_events:
    first_element = seattle_events[0]
    print(f"The first element in the list is:\nTitle: {first_element['title']}\nDate: {first_element['date']}\nLocation: {first_element['location']}")
else:
    print("No events found.")

# Print the first few events
for event in seattle_events[:5]:
    print(f"Title: {event['title']}\nDate: {event['date']}\nLocation: {event['location']}\n---")


URL:  https://visitseattle.org/events
56
[]
No events found.


## Using BeautifulSoup APIs 
### Using BeautifulSoup APIs to extract the event information from "https://visitseattle.org/events"

In [4]:
url = "https://visitseattle.org/events/page/1"

# Create -> POST
# Read   -> GET
# Update -> PUT
# Delete -> DELETE
#
# requests.post("https://concert.com/buy")
# requests.put("https://github.com/ianchen06")
# requests.delete("https://facebook.com/ianchen06/post/1")

res = requests.get(url)
res.status_code
res.text

# Specify the file name
html_file_name = "seattleevents.html"

# Write the HTML content to the file
with open(html_file_name, "w", encoding="utf-8") as html_file:
    html_file.write(res.text)
soup = BeautifulSoup(res.text, "html.parser")

selector = "div.search-result-preview > div > h3 > a"

a_eles = soup.select(selector)
a_eles
[x['href'] for x in a_eles]


['https://visitseattle.org/events/an-evening-with-lucia-micarelli/',
 'https://visitseattle.org/events/dylan-leblanc/',
 'https://visitseattle.org/events/eliades-ochoa/',
 'https://visitseattle.org/events/k-wayne-yang/',
 'https://visitseattle.org/events/an-evening-with-lucia-micarelli-a-valentines-celebration/',
 'https://visitseattle.org/events/juanes/',
 'https://visitseattle.org/events/black-dogs/',
 'https://visitseattle.org/events/black-history-month-keynote-with-dr-doretha-williams/',
 'https://visitseattle.org/events/hear-me-talkin-to-you-womxn-and-blues/']

## Retrieve all the events from the pages
### Task. This code goes through all the events pages, 9 in number, and retrieves all the events from the pages. The urls for all the events are printed. Creating a global event list where all the events are stored.

In [5]:
# Global list to store all event links across pages
global_event_list = []

def extract(page_num):
    global global_event_list

    # Define the base URL
    base_url = "https://visitseattle.org/events/page/"

    # Construct the URL for the given page
    url = f"{base_url}{page_num}"

    # Send an HTTP request to the URL
    res = requests.get(url)

    # Check if the request was successful (status code 200)
    if res.status_code == 200:
        # Specify the file name
        html_file_name = f"seattleevents_page_{page_num}.html"

        # Write the HTML content to the file
        with open(html_file_name, "w", encoding="utf-8") as html_file:
            html_file.write(res.text)

        # Parse the HTML content
        soup = BeautifulSoup(res.text, "html.parser")

        # Extract event links using the specified selector
        selector = "div.search-result-preview > div > h3 > a"
        a_eles = soup.select(selector)

        # Store event links in the global list
        event_links = [x['href'] for x in a_eles]
        global_event_list.extend(event_links)

        # Print event links
        print(f"Event links on page {page_num}: {event_links}")

        # Call the function recursively for the next page (if applicable)
        if page_num < 9:
            extract(page_num + 1)

    else:
        print(f"Failed to retrieve the webpage. Status code: {res.status_code}")

# Start the extraction process from page 1
extract(1)

# Print the global list of all event links
print("Global Event List:", global_event_list)

Event links on page 1: ['https://visitseattle.org/events/an-evening-with-lucia-micarelli/', 'https://visitseattle.org/events/dylan-leblanc/', 'https://visitseattle.org/events/eliades-ochoa/', 'https://visitseattle.org/events/k-wayne-yang/', 'https://visitseattle.org/events/an-evening-with-lucia-micarelli-a-valentines-celebration/', 'https://visitseattle.org/events/juanes/', 'https://visitseattle.org/events/black-dogs/', 'https://visitseattle.org/events/black-history-month-keynote-with-dr-doretha-williams/', 'https://visitseattle.org/events/hear-me-talkin-to-you-womxn-and-blues/']
Event links on page 2: ['https://visitseattle.org/events/maddie-zahm/', 'https://visitseattle.org/events/my-dinner-with-sam-aaron-verzosa/', 'https://visitseattle.org/events/olympia-music-history-project/', 'https://visitseattle.org/events/radiolab/', 'https://visitseattle.org/events/silversun-pickups/', 'https://visitseattle.org/events/the-brodies/', 'https://visitseattle.org/events/cory-wong-winter-2024-tour

## Create a data store for all the events
### Task. This code goes through all the events in the global events list and retrieves the name, date, location, event type, region of all the events from and stores it in an events_test.csv page. 

In [6]:
def scrape_event_details(event_url):
    # Send an HTTP GET request to the detail page URL
    res = requests.get(event_url)
    print(event_url)

    if res.status_code == 200:
        soup = BeautifulSoup(res.text, 'html.parser')

        # Extract event details
        #name_element = soup.find('h1', class_='page-title')
        #date_element = soup.find('span', class_='span')
        #location_element = soup.find('span', class_='location')
        #event_type_element = soup.find('span', class_='event-type')
        #region_element = soup.find('span', class_='region')
        
        name = soup.find('h1', class_='page-title').text.strip()
        date = soup.find("h4").find_all("span")[0].text.strip()
        location = soup.find("h4").find_all("span")[1].text.strip()
        event_type = soup.find_all("a", class_="button big medium black category")[0].text.strip()
        region = soup.find_all("a", class_="button big medium black category")[1].text.strip()

        # Check if elements are found before accessing their properties
        #name = name_element.text.strip() if name_element else "N/A"
        #date = date_element.text.strip() if date_element else "N/A"
        #location = location_element.text.strip() if location_element else "N/A"
        #event_type = event_type_element.text.strip() if event_type_element else "N/A"
        #region = region_element.text.strip() if region_element else "N/A"

        return {
            'Name': name,
            'Date': date,
            'Location': location,
            'Type': event_type,
            'Region': region
        }

    else:
        print(f"Failed to retrieve the event details. Status code: {res.status_code}")
        return None

# List of event URLs obtained in step 1
event_urls = global_event_list

# Create a CSV file to store the event details
csv_file_path = "events.csv"

# Open the CSV file in write mode with a CSV writer
with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
    fieldnames = ['Name', 'Date', 'Location', 'Type', 'Region']
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

    # Write the header row to the CSV file
    writer.writeheader()

    # Loop through each event URL and scrape details
    for event_url in event_urls:
        event_details = scrape_event_details(event_url)

        if event_details:
            # Write event details to the CSV file
            writer.writerow(event_details)

print(f"Event details have been scraped and stored in '{csv_file_path}'.")

https://visitseattle.org/events/an-evening-with-lucia-micarelli/
https://visitseattle.org/events/dylan-leblanc/
https://visitseattle.org/events/eliades-ochoa/
https://visitseattle.org/events/k-wayne-yang/
https://visitseattle.org/events/an-evening-with-lucia-micarelli-a-valentines-celebration/
https://visitseattle.org/events/juanes/
https://visitseattle.org/events/black-dogs/
https://visitseattle.org/events/black-history-month-keynote-with-dr-doretha-williams/
https://visitseattle.org/events/hear-me-talkin-to-you-womxn-and-blues/
https://visitseattle.org/events/maddie-zahm/
https://visitseattle.org/events/my-dinner-with-sam-aaron-verzosa/
https://visitseattle.org/events/olympia-music-history-project/
https://visitseattle.org/events/radiolab/
https://visitseattle.org/events/silversun-pickups/
https://visitseattle.org/events/the-brodies/
https://visitseattle.org/events/cory-wong-winter-2024-tour/
https://visitseattle.org/events/left-on-tenth/
https://visitseattle.org/events/marlon-wayans

## Getting latitute and longitude information from the place information
### Task. This code goes through all the events in the global events list and retrieves location information. Then it gets the latitiude and longitude information from the https://nominatim.openstreetmap.org/search website. This is stored in the events_data_test.csv file

In [13]:
# Global list to store all event links across pages
global_event_list = []

def extract(page_num):
    global global_event_list

    # Define the base URL
    base_url = "https://visitseattle.org/events/page/"

    # Construct the URL for the given page
    url = f"{base_url}{page_num}"

    # Send an HTTP request to the URL
    res = requests.get(url)

    # Check if the request was successful (status code 200)
    if res.status_code == 200:
        # Specify the file name
        html_file_name = f"seattleevents_page_{page_num}.html"

        # Write the HTML content to the file
        with open(html_file_name, "w", encoding="utf-8") as html_file:
            html_file.write(res.text)

        # Parse the HTML content
        soup = BeautifulSoup(res.text, "html.parser")

        # Extract event links using the specified selector
        selector = "div.search-result-preview > div > h3 > a"
        a_eles = soup.select(selector)

        # Store event links in the global list
        event_links = [x['href'] for x in a_eles]
        global_event_list.extend(event_links)

        # Print event links
        print(f"Event links on page {page_num}: {event_links}")

        # Call the function recursively for the next page (if applicable)
        if page_num < 9:
            extract(page_num + 1)

    else:
        print(f"Failed to retrieve the webpage. Status code: {res.status_code}")

# Function to look up the location using OpenStreetMap Nominatim API
def lookup_location(location):
    nominatim_url = "https://nominatim.openstreetmap.org/search"
    params = {'q': location, 'format': 'json'}

    response = requests.get(nominatim_url, params=params)

    if response.status_code == 200:
        data = response.json()
        if data:
            return {
                'latitude': data[0]['lat'],
                'longitude': data[0]['lon'],
                'display_name': data[0]['display_name']
            }
    else:
        print(f"Failed to look up the location. Status code: {response.status_code}")
    return None

# Function to fetch weather data using National Weather Service API
def fetch_weather(latitude, longitude):
    weather_api_url = f"https://api.weather.gov/points/{latitude},{longitude}/forecast"
    
    response = requests.get(weather_api_url)

    if response.status_code == 200:
        data = response.json()
        if 'properties' in data:
            periods = data['properties']['periods']
            day_time_forecast = next((period for period in periods if 'Day' in period['name']), None)

            if day_time_forecast:
                return {
                    'temperature': day_time_forecast['temperature'],
                    'short_forecast': day_time_forecast['shortForecast']
                }
    else:
        print(f"Failed to fetch weather data. Status code: {response.status_code}")
    return None

# Start the extraction process from page 1
extract(1)

# Create a CSV file to store the data
csv_file_path = "events_data_test.csv"

# Open the CSV file in write mode with a CSV writer
with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
    fieldnames = ['Event Link', 'Location', 'Latitude', 'Longitude', 'Weather Temperature', 'Weather Forecast']
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

    # Write the header row to the CSV file
    writer.writeheader()

    # Loop through each event link, look up the location, fetch weather, and write to CSV
    for event_link in global_event_list:
        # Look up the location
        location_data = lookup_location(event_link)

        if location_data:
            # Fetch weather data
            weather_data = fetch_weather(location_data['latitude'], location_data['longitude'])
            print(weather_data)
            # Write data to CSV
            writer.writerow({
                'Event Link': event_link,
                'Location': location_data['display_name'],
                'Latitude': location_data['latitude'],
                'Longitude': location_data['longitude'],
                'Weather Temperature': weather_data['temperature'] if weather_data else "N/A",
                'Weather Forecast': weather_data['short_forecast'] if weather_data else "N/A"
            })

print(f"Data has been scraped, and weather/location information stored in '{csv_file_path}'.")

Event links on page 1: ['https://visitseattle.org/events/an-evening-with-lucia-micarelli/', 'https://visitseattle.org/events/dylan-leblanc/', 'https://visitseattle.org/events/eliades-ochoa/', 'https://visitseattle.org/events/k-wayne-yang/', 'https://visitseattle.org/events/an-evening-with-lucia-micarelli-a-valentines-celebration/', 'https://visitseattle.org/events/juanes/', 'https://visitseattle.org/events/black-dogs/', 'https://visitseattle.org/events/black-history-month-keynote-with-dr-doretha-williams/', 'https://visitseattle.org/events/hear-me-talkin-to-you-womxn-and-blues/']
Event links on page 2: ['https://visitseattle.org/events/maddie-zahm/', 'https://visitseattle.org/events/my-dinner-with-sam-aaron-verzosa/', 'https://visitseattle.org/events/olympia-music-history-project/', 'https://visitseattle.org/events/radiolab/', 'https://visitseattle.org/events/silversun-pickups/', 'https://visitseattle.org/events/the-brodies/', 'https://visitseattle.org/events/cory-wong-winter-2024-tour

## Getting latitute and longitude information from the place information
### Task. Testing code. Debugging and testing. 

In [14]:
# location name to lat lon

# Option 1: just string manipulation
base_url = "https://nominatim.openstreetmap.org/search.php"
# query_params_str = "?q=Wallingford%2C+Seattle&format=jsonv2"
#ful_url = base_url + query_params_str

# Option 2: use dictionary to represent query params
#           use requests.get(url, params=query_params) to attach the query param dict
query_params = {
    "q": "Fremont, Seattle",
    "format": "jsonv2"
}

print(query_params)
res = requests.get(base_url, params=query_params)
res.json()

weather_api_url = f"https://api.weather.gov/points/{res.json()[0]['lat']},{res.json()[0]['lon']}"

print(weather_api_url)

res = requests.get(weather_api_url)

print(res)

point_dict = res.json()

forcast_url = point_dict['properties']['forecast']

res_forecast = requests.get(forcast_url)
weather_data = res_forecast.json()
print(weather_data)
print(res_forecast.json().keys())

point_dict= res_forecast.json()
forecast = weather_data['properties']['periods'][0]['detailedForecast']

#forcast_url = point_dict['properties']['detailedForecast']
print(forecast)

{'q': 'Fremont, Seattle', 'format': 'jsonv2'}
https://api.weather.gov/points/47.6504529,-122.3499861
<Response [200]>
{'@context': ['https://geojson.org/geojson-ld/geojson-context.jsonld', {'@version': '1.1', 'wx': 'https://api.weather.gov/ontology#', 'geo': 'http://www.opengis.net/ont/geosparql#', 'unit': 'http://codes.wmo.int/common/unit/', '@vocab': 'https://api.weather.gov/ontology#'}], 'type': 'Feature', 'geometry': {'type': 'Polygon', 'coordinates': [[[-122.3812868, 47.652808], [-122.375051, 47.632287], [-122.3446098, 47.636483899999995], [-122.35083900000001, 47.65700509999999], [-122.3812868, 47.652808]]]}, 'properties': {'updated': '2024-02-12T23:04:25+00:00', 'units': 'us', 'forecastGenerator': 'BaselineForecastGenerator', 'generatedAt': '2024-02-13T01:21:30+00:00', 'updateTime': '2024-02-12T23:04:25+00:00', 'validTimes': '2024-02-12T17:00:00+00:00/P7DT11H', 'elevation': {'unitCode': 'wmoUnit:m', 'value': 60.0456}, 'periods': [{'number': 1, 'name': 'This Afternoon', 'startTim

## Getting latitute and longitude information from the place information and getting weather information
### Task. This code goes through all the events in the global events from the csv file and retrieves location information. Then it gets the latitiude and longitude information from the https://nominatim.openstreetmap.org/search website. Then using this latitude and longitude information, it calls the "https://api.weather.gov/points/{lat},{lon}" weather API and then parses this information, retrieves the relevant data and finally stores it in the "events_weather_data.csv" file

In [15]:
def get_lat_lon(location):
    base_url = "https://nominatim.openstreetmap.org/search.php"
    query_params = {"q": location, "format": "jsonv2"}
    response = requests.get(base_url, params=query_params)
    location_data = response.json()
    if location_data:
        return location_data[0]["lat"], location_data[0]["lon"]
    else:
        return None, None

def get_weather_info(lat, lon):
    weather_url = f"https://api.weather.gov/points/{lat},{lon}"
    weather_response = requests.get(weather_url)
    weather_data = weather_response.json()
    res = requests.get(weather_api_url)
    point_dict = res.json()
    forecast_url = point_dict['properties']['forecast']
    res_forecast = requests.get(forecast_url)
    weather_data = res_forecast.json()
    point_dict= res_forecast.json()
    forecast = weather_data['properties']['periods'][0]['detailedForecast']
    return forecast


# Read the CSV file (replace with your actual file path)
csv_file = "events.csv"
df = pd.read_csv(csv_file)

output_file = "events_weather_data.csv"
with open(output_file, "w", newline="") as csvfile:
    fieldnames = ["Location", "Lat", "Lon", "Weather"]
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()

    for index, row in df.iterrows():
        location_name = row["Region"]  # Assuming the column name is "Region"
        if "/" in location_name:
            location_name = location_name.split("/")[0].strip()  # Take the first part before "/"
        location_name += ", Seattle"
        
        lat, lon = get_lat_lon(location_name)
        if lat and lon:
            forecast = get_weather_info(lat, lon)
            writer.writerow({"Location": location_name, "Lat": lat, "Lon": lon, "Weather": forecast})
        else:
            print(f"Latitude and longitude not found for {location_name}.")
