MLB attendance analysis and correlation with household income & population density

In [36]:
import geopandas as gpd
from pygris import tracts
import pandas as pd

# Define the FIPS codes for the states to exclude Alaska and Hawaii
fips = [state for state in tracts.states_2021()['STUSPS'] if state not in ['AK', 'HI']]

# Initialize an empty GeoDataFrame for all census tracts
all_census_tracts = gpd.GeoDataFrame()

# Loop through each state and fetch the census tracts
for state in fips:
    state_tracts = tracts(state=state, year=2021, cb=True)
    all_census_tracts = pd.concat([all_census_tracts, state_tracts], ignore_index=True)

# Ensure the CRS is set correctly
all_census_tracts = all_census_tracts.set_crs(epsg=4326, allow_override=True)

# Load MLB stadiums data
stadium_csv_path = '/Users/mitchellhamilton/m-r-ham.github.io/mitchymaps.github.io/projects/mlb-analysis/data/mlb_stadiums_geocoded_logos.csv'
stadiums_gdf = gpd.read_file(stadium_csv_path)
stadiums_gdf = gpd.GeoDataFrame(stadiums_gdf, geometry=gpd.points_from_xy(stadiums_gdf.Longitude, stadiums_gdf.Latitude), crs="EPSG:4326")

print("Loaded census tracts and MLB stadiums data.")


AttributeError: 'function' object has no attribute 'states_2021'

In [None]:
# Ensure the stadiums GeoDataFrame is in the correct CRS and project to a suitable CRS
stadiums_gdf = stadiums_gdf.to_crs(epsg=3857)

# Create a 32 km (20-mile) buffer around each stadium
buffer_distance = 32000  # in meters
stadiums_gdf['buffer'] = stadiums_gdf.buffer(buffer_distance)

# Transform the buffers back to the original geographic CRS
stadiums_gdf = stadiums_gdf.set_geometry('buffer').to_crs(epsg=4326)

# Ensure all census tracts are in the correct CRS
all_census_tracts = all_census_tracts.to_crs(epsg=4326)

# Find census tracts within the buffers
all_tracts_in_buffers = gpd.sjoin(all_census_tracts, stadiums_gdf[['buffer']], how='inner', op='intersects')

In [None]:
from pygris.data import get_census

# Define the variables to fetch
variables = [
    "B01003_001E",  # Total Population
    "B19013_001E",  # Median Household Income
    "B25077_001E",  # Median Home Value
    "B02001_002E",  # White Population
    "B02001_003E",  # Black Population
    "B02001_004E",  # American Indian and Alaska Native Population
    "B02001_005E",  # Asian Population
    "B02001_006E",  # Native Hawaiian and Other Pacific Islander Population
    "B02001_007E",  # Some Other Race Population
    "B02001_008E"   # Two or More Races Population
]

# Fetch census data for the required variables
census_data = get_census(
    dataset='acs/acs5',
    variables=variables,
    year=2021,
    params={
        "for": "tract:*",
        "in": f"state:*"
    },
    guess_dtypes=True,
    return_geoid=True
)

# Convert to GeoDataFrame and merge with census tracts
census_gdf = gpd.GeoDataFrame(census_data)
census_gdf = census_gdf.set_geometry('geometry')
all_tracts_in_buffers = all_tracts_in_buffers.merge(census_gdf, on='GEOID')

# Calculate the area of each tract in square kilometers
all_tracts_in_buffers['area_km2'] = all_tracts_in_buffers.geometry.area / 1e6

# Calculate population density (people per square kilometer)
all_tracts_in_buffers['population_density'] = all_tracts_in_buffers['B01003_001E'] / all_tracts_in_buffers['area_km2']

# Calculate population density (people per square mile)
all_tracts_in_buffers['population_density_sq_mi'] = all_tracts_in_buffers['B01003_001E'] / (all_tracts_in_buffers['area_km2'] * 0.386102)

In [None]:
import matplotlib.pyplot as plt

# Define the color scale range based on actual data
vmin = all_tracts_in_buffers['population_density_sq_mi'].min()
vmax = all_tracts_in_buffers['population_density_sq_mi'].max()

# Plot Population Density within buffers
fig, ax = plt.subplots(figsize=(10, 6))
all_tracts_in_buffers.plot(column='population_density_sq_mi', cmap='viridis', legend=True, ax=ax, vmin=vmin, vmax=vmax)
stadiums_gdf.set_geometry('geometry').plot(ax=ax, color='red', markersize=50, label='Stadiums')
plt.title('Population Density (people per sq mile) within 20-Mile Buffers Around MLB Stadiums')
plt.legend()
plt.show()

# Save the plot
plt.savefig('/path/to/save/density_mlb_stadiums.png', dpi=300, bbox_inches='tight')