## 1. Import Required Libraries

In [46]:
# Core data manipulation and analysis
from time import sleep
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

# Geospatial libraries
import geopandas as gpd
from shapely.geometry import Point, Polygon

import overpy  # For OpenStreetMap data
from urllib.parse import urlencode

from pathlib import Path



## 2. Load Parcel Data

In [47]:
# Define data paths
DATA_DIR = Path("../datasets")
PARCELS_SHP = DATA_DIR / "Westchester_County_Parcels" / "Westchester_Parcels.shp"
PARCELS_CSV = DATA_DIR / "Westchester_County_Parcels_3053716610941103405.csv"

# Load the shapefile (preferred for spatial analysis)

parcels_gdf = gpd.read_file(PARCELS_SHP)


### filter for Bedford only

In [48]:
parcels_gdf = parcels_gdf.loc[parcels_gdf.MUNI_NAME == 'Bedford']

In [None]:
if parcels_gdf.crs != 'EPSG:4326':
    parcels_wgs84 = parcels_gdf.to_crs('EPSG:4326')
else:
    parcels_wgs84 = parcels_gdf.copy()

bounds = parcels_wgs84.total_bounds  # [minx, miny, maxx, maxy]
# Start with a smaller sample area for testing (optional)
# You can modify this to use the full area
# bounds = [
#     bounds[0], bounds[1],  # SW corner
#     bounds[0] + (bounds[2] - bounds[0]) * 0.1,  # Reduced area for testing
#     bounds[1] + (bounds[3] - bounds[1]) * 0.1
# ]

In [None]:

api = overpy.Overpass()

# Construct Overpass query for buildings
query = f"""
[out:json][timeout:60];
(
    way["building"]({bounds[1]},{bounds[0]},{bounds[3]},{bounds[2]});
    relation["building"]({bounds[1]},{bounds[0]},{bounds[3]},{bounds[2]});
);
out geom;
"""
result = api.query(query)

buildings = []

In [None]:

found_ids=  [x['osm_id'] for x in buildings]


for way in result.ways:

    if way.id not in found_ids:
        print(f'trying to load: {way.id}')
        sleep(0.5)
        if len(way.get_nodes(resolve_missing=True)) > 2:  # Valid polygon
            coords = [(float(node.lon), float(node.lat)) for node in way.get_nodes(resolve_missing=True)]
            if coords[0] != coords[-1]:  # Close polygon if needed
                coords.append(coords[0])
            
            if len(coords) >= 4:  # Valid polygon needs at least 4 points
                polygon = Polygon(coords)
                buildings.append({
                    'geometry': polygon,
                    'osm_id': way.id,
                    'building_type': way.tags.get('building', 'yes'),
                    'source': 'osm'
                })


trying to load: 60207229
trying to load: 70262091


140

13

In [None]:
buildings_gdf = gpd.GeoDataFrame(buildings, crs='EPSG:4326')

{'geometry': <POLYGON ((-73.961 40.887, -73.961 40.887, -73.961 40.887, -73.961 40.887, -...>,
 'osm_id': 371517665,
 'building_type': 'school',
 'source': 'osm'}

In [None]:

# Function to get OpenStreetMap buildings

    print("Querying OpenStreetMap for buildings...")
    
    
    
    # Convert to GeoDataFrame
    
            

print("Building data access functions defined successfully!")
# Convert to WGS84 for web mapping if needed


Building data access functions defined successfully!

Converting from EPSG:3857 to WGS84...
Conversion complete

=== Loading OpenStreetMap Buildings ===
Querying OpenStreetMap for buildings...
<overpy.Way id=307005597 nodes=[3120469901, 9947748772, 9947748488, 3120469913, 3120469875, 9947748489, 9947748491, 9947748492, 9947748490, 3120469920, 9947748580, 3120469898, 3120469910, 9947748290, 3120469873, 3120469901]>
Error retrieving OSM buildings: 'Way' object has no attribute 'nd'



=== Loading OpenStreetMap Buildings ===
Querying OpenStreetMap for buildings...
Error retrieving OSM buildings: 'Way' object has no attribute 'nd'


## 4. Data Exploration and Preprocessing

## 5. Spatial Join Operations

Perform spatial joins to determine which buildings fall within each parcel boundary.