# Stadiums

In [1]:
# Might need to install pyesridump to get access to esri2geojson
# !pip install pyesridump

In [2]:
# Load Python tools and Jupyter config
import os
import json
import subprocess
import requests
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

In [3]:
# Create output directory if it doesn't exist
os.makedirs("data/geo", exist_ok=True)

In [4]:
# Dictionary of Esri endpoints
endpoint_dict = {
    "MLB": "https://services.arcgis.com/P3ePLMYs2RVChkJx/arcgis/rest/services/Major_League_Baseball_Stadiums/FeatureServer/0",
    "NFL": "https://services.arcgis.com/V6ZHFr6zdgNZuVG0/arcgis/rest/services/NFL_Stadiums_CSV/FeatureServer/0",
    "MLS": 'https://services2.arcgis.com/I9cUOJUZvdGAJncI/arcgis/rest/services/FinalProject_gdb/FeatureServer/0',
    "NBA": 'https://services1.arcgis.com/0MSEUqKaxRlEPj5g/arcgis/rest/services/NBA_Arenas/FeatureServer/0',
    "NHL": "https://services5.arcgis.com/yhL5dRej97QO0Sj3/arcgis/rest/services/NHL_Teams/FeatureServer/0"
}

In [5]:
# # Iterate through endpoints and dump data
# for name, url in endpoint_dict.items():
#     output_file = f"data/geo/{name}.geojson"
#     print(f"Processing {name}...")

#     try:
#         # Run the pyesridump command
#         subprocess.run(["esri2geojson", url, output_file], check=True, text=True)
#         print(f"Successfully saved {name} to {output_file}")
#     except subprocess.CalledProcessError as e:
#         print(f"Failed to process {name}: {e}")

In [6]:
# Alternatively, make a more traditional request if you don't need to paginate

In [7]:
# Query parameters
params = {
    'outFields': '*',
    'where': '1=1',  
    'f': 'geojson'   
}

# Process each endpoint

gdfs = []
for name, url in endpoint_dict.items():
    output_file = f"data/geo/{name.lower()}_layer.geojson"
    print(f"Processing {name.replace('_', ' ')}")

    try:
        # Make the request
        response = requests.get(f"{url}/query", params=params)
        response.raise_for_status()

        # Parse response
        data = response.json()
        if 'features' in data:
            # Load data into GeoDataFrame
            src = gpd.GeoDataFrame.from_features(data['features']).rename(columns={'MLS_TEAM':'team', 'NAME1': 'name', 'USER_Arena': 'name'})
            src['sports_league'] = name
            src.columns = src.columns.str.lower().str.replace('user_', '')
            # Save GeoDataFrame as GeoJSON and append to a list
            keep_cols = ['name', 'city', 'team', 'sports_league', 'geometry']
            gdf = src[keep_cols].copy()
            gdf.to_file(output_file, driver='GeoJSON')
            gdfs.append(gdf)
        else:
            print(f"Warning: No features found in response for {name}.")
    except Exception as e:
        print(f"Error processing {name}: {e}")

Processing MLB
Processing NFL
Processing MLS
Processing NBA
Processing NHL


In [8]:
combined_gdf = gpd.GeoDataFrame(pd.concat(gdfs)).set_crs('EPSG: 3857')
combined_gdf = combined_gdf.to_crs('EPSG: 3857')

In [9]:
combined_gdf["lng"] = combined_gdf.geometry.x
combined_gdf["lat"] = combined_gdf.geometry.y

In [10]:
states_gdf = gpd.read_file('https://stilesdata.com/gis/usa_states_esri_simple.json').to_crs('EPSG: 3857')
states_gdf.columns = states_gdf.columns.str.lower()

In [11]:
final_gdf = gpd.sjoin(combined_gdf, states_gdf[['state_name', 'state_abbr', 'geometry']], predicate='within', how='left').drop('index_right', axis=1)

In [12]:
final_gdf.drop('geometry', axis=1).to_json('data/processed/pro_stadiums.json', indent=4, orient='records')
final_gdf.drop('geometry', axis=1).to_csv('data/processed/pro_stadiums.csv', index=False)