In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import os

Load the MLB stadiums data into a dataframe & then convert to a geodataframe

In [None]:
# Load the stadium data
stadium_csv_path = '/Users/mitchellhamilton/m-r-ham.github.io/mitchymaps.github.io/projects/mlb-analysis/data/mlb_stadiums_geocoded.csv'
stadiums = pd.read_csv(stadium_csv_path)

# Convert to GeoDataFrame
stadiums['geometry'] = stadiums.apply(lambda row: Point(row['Longitude'], row['Latitude']), axis=1)
stadiums_gdf = gpd.GeoDataFrame(stadiums, geometry='geometry')

# Ensure the GeoDataFrame has the correct CRS
stadiums_gdf.set_crs(epsg=4326, inplace=True)

Load census tract shapefiles for each state & merge into 1

In [None]:
# Directory where all state shapefiles are extracted
shapefile_dir = '/path/to/shapefiles/'  # Update with your actual directory

# Load all shapefiles and merge them into a single GeoDataFrame
all_tracts_gdf = gpd.GeoDataFrame()
for state_shapefile in os.listdir(shapefile_dir):
    if state_shapefile.endswith('.shp'):
        state_gdf = gpd.read_file(os.path.join(shapefile_dir, state_shapefile))
        all_tracts_gdf = all_tracts_gdf.append(state_gdf, ignore_index=True)

# Ensure the GeoDataFrame has the correct CRS
all_tracts_gdf.set_crs(epsg=4326, inplace=True)

# Calculate centroids of tracts
all_tracts_gdf['centroid'] = all_tracts_gdf.centroid


Calculate proximity

In [None]:
from shapely.ops import nearest_points

def calculate_nearest(row, destination_gdf, point_column='centroid', dest_point_column='geometry'):
    destination_geom = destination_gdf[dest_point_column].unary_union
    nearest_geom = nearest_points(row[point_column], destination_geom)[1]
    return nearest_geom

# Add nearest stadium geometry to tracts GeoDataFrame
all_tracts_gdf['nearest_stadium'] = all_tracts_gdf.apply(calculate_nearest, destination_gdf=stadiums_gdf, axis=1)

# Calculate distance to nearest stadium
all_tracts_gdf['distance_to_stadium'] = all_tracts_gdf.apply(
    lambda row: row['centroid'].distance(row['nearest_stadium']),
    axis=1
)

# Convert distance to kilometers (assuming the distance is in degrees, approximate conversion)
all_tracts_gdf['distance_to_stadium_km'] = all_tracts_gdf['distance_to_stadium'] * 111  # Rough conversion factor

In [None]:
import folium

# Create a map centered on the USA
map_center = [39.8283, -98.5795]
m = folium.Map(location=map_center, zoom_start=4)

# Add tracts colored by distance to nearest stadium
all_tracts_gdf['lat'] = all_tracts_gdf['centroid'].apply(lambda point: point.y)
all_tracts_gdf['lon'] = all_tracts_gdf['centroid'].apply(lambda point: point.x)

for idx, row in all_tracts_gdf.iterrows():
    folium.CircleMarker(
        location=[row['lat'], row['lon']],
        radius=3,
        color='blue' if row['distance_to_stadium_km'] <= 10 else 'red',
        fill=True,
        fill_opacity=0.6,
        popup=f"Tract: {row['GEOID']}<br>Distance to stadium: {row['distance_to_stadium_km']:.2f} km"
    ).add_to(m)

# Add stadium markers
for idx, row in stadiums_gdf.iterrows():
    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
        popup=f"{row['Team']}<br>{row['Stadium']}"
    ).add_to(m)

# Save map to an HTML file
m.save('tracts_stadium_proximity_map.html')

Quick analysis

In [None]:
# Summary statistics
average_distance = all_tracts_gdf['distance_to_stadium_km'].mean()
tracts_within_10km = all_tracts_gdf[all_tracts_gdf['distance_to_stadium_km'] <= 10].shape[0]

print(f"Average distance to nearest stadium: {average_distance:.2f} km")
print(f"Number of tracts within 10 km of a stadium: {tracts_within_10km}")


FULL CODE -- SPLIT AND UPDATE LATER!

In [None]:
import geopandas as gpd
import os
import pandas as pd
from shapely.geometry import Point
from shapely.ops import nearest_points
import folium

# Directory where all state shapefiles are extracted
shapefile_dir = '/path/to/shapefiles/'  # Update with your actual directory

# Load all shapefiles and merge them into a single GeoDataFrame
all_tracts_gdf = gpd.GeoDataFrame()
for state_shapefile in os.listdir(shapefile_dir):
    if state_shapefile.endswith('.shp'):
        state_gdf = gpd.read_file(os.path.join(shapefile_dir, state_shapefile))
        all_tracts_gdf = all_tracts_gdf.append(state_gdf, ignore_index=True)

# Ensure the GeoDataFrame has the correct CRS
all_tracts_gdf.set_crs(epsg=4326, inplace=True)

# Calculate centroids of tracts
all_tracts_gdf['centroid'] = all_tracts_gdf.centroid

# Load the stadium data
stadium_csv_path = '/path/to/your/output_csv_file.csv'
stadiums = pd.read_csv(stadium_csv_path)

# Convert to GeoDataFrame
stadiums['geometry'] = stadiums.apply(lambda row: Point(row['Longitude'], row['Latitude']), axis=1)
stadiums_gdf = gpd.GeoDataFrame(stadiums, geometry='geometry')

# Ensure the GeoDataFrame has the correct CRS
stadiums_gdf.set_crs(epsg=4326, inplace=True)

# Calculate the nearest stadium
def calculate_nearest(row, destination_gdf, point_column='centroid', dest_point_column='geometry'):
    destination_geom = destination_gdf[dest_point_column].unary_union
    nearest_geom = nearest_points(row[point_column], destination_geom)[1]
    return nearest_geom

# Add nearest stadium geometry to tracts GeoDataFrame
all_tracts_gdf['nearest_stadium'] = all_tracts_gdf.apply(calculate_nearest, destination_gdf=stadiums_gdf, axis=1)

# Calculate distance to nearest stadium in km
all_tracts_gdf['distance_to_stadium_km'] = all_tracts_gdf.apply(
    lambda row: row['centroid'].distance(row['nearest_stadium']),
    axis=1
) * 111  # Rough conversion factor

# Convert distance to miles
all_tracts_gdf['distance_to_stadium_miles'] = all_tracts_gdf['distance_to_stadium_km'] * 0.621371

# Summary statistics
average_distance_miles = all_tracts_gdf['distance_to_stadium_miles'].mean()
tracts_within_10_miles = all_tracts_gdf[all_tracts_gdf['distance_to_stadium_miles'] <= 10].shape[0]

print(f"Average distance to nearest stadium: {average_distance_miles:.2f} miles")
print(f"Number of tracts within 10 miles of a stadium: {tracts_within_10_miles}")

# Visualization
# Create a map centered on the USA
map_center = [39.8283, -98.5795]
m = folium.Map(location=map_center, zoom_start=4)

# Add tracts colored by distance to nearest stadium
all_tracts_gdf['lat'] = all_tracts_gdf['centroid'].apply(lambda point: point.y)
all_tracts_gdf['lon'] = all_tracts_gdf['centroid'].apply(lambda point: point.x)

for idx, row in all_tracts_gdf.iterrows():
    folium.CircleMarker(
        location=[row['lat'], row['lon']],
        radius=3,
        color='blue' if row['distance_to_stadium_miles'] <= 10 else 'red',
        fill=True,
        fill_opacity=0.6,
        popup=f"Tract: {row['GEOID']}<br>Distance to stadium: {row['distance_to_stadium_miles']:.2f} miles"
    ).add_to(m)

# Add stadium markers
for idx, row in stadiums_gdf.iterrows():
    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
        popup=f"{row['Team']}<br>{row['Stadium']}"
    ).add_to(m)

# Save map to an HTML file
m.save('tracts_stadium_proximity_map.html')