In [None]:
# Core data manipulation and analysis
from pathlib import Path
import pickle
from time import sleep
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

# Geospatial libraries
import geopandas as gpd
from shapely.geometry import Point, Polygon

import overpy  # For OpenStreetMap data
from urllib.parse import urlencode


import os
os.chdir(r'C:\code\bedford-ubid')

from pymodule.file_ops import read_pickle, write_pickle



In [None]:

bounds = [-73.74815136, 41.16565413, -73.59163084, 41.28549615]

In [4]:

api = overpy.Overpass()

# Construct Overpass query for buildings
query = f"""
[out:json][timeout:60];
(
    way["building"]({bounds[1]},{bounds[0]},{bounds[3]},{bounds[2]});
    relation["building"]({bounds[1]},{bounds[0]},{bounds[3]},{bounds[2]});
);
out geom;
"""
result = api.query(query)




In [5]:
# ids to be loaded
ids_to_process = [x.id for x in result.ways]

ids_to_process

total_id_length = len(ids_to_process)


In [6]:
def try_get_record(way: overpy.Way):
    if len(way.get_nodes(resolve_missing=True)) > 2:  # Valid polygon
        coords = [(float(node.lon), float(node.lat)) for node in way.get_nodes(resolve_missing=True)]
        if coords[0] != coords[-1]:  # Close polygon if needed
            coords.append(coords[0])

        if len(coords) >= 4:  # Valid polygon needs at least 4 points
            polygon = Polygon(coords)
            return {
                'coordinates': coords,
                'geometry': polygon,
                'osm_id': way.id,
                'building_type': way.tags.get('building', 'yes'),
                'source': 'osm',
                # 'way': way
            }
        
    return None


In [None]:

cached_results_file = './datasets/openmap_cache/openmap_cache.pickle'

if not Path(cached_results_file).exists():
    # write_pickle([], cached_id_list_file)
    write_pickle([], cached_results_file)

# cached_id_list = read_pickle(cached_id_list_file)
cached_results = read_pickle(cached_results_file)
cached_ids_list = [x['osm_id'] for x in cached_results]

for w in result.ways:
    id = w.id
    try:
        if id not in cached_ids_list:
            sleep(0.1)
            record = try_get_record(w)
            cached_results.append(record)
            write_pickle(cached_results, cached_results_file)
            # print(f'wrote id: {id}')
    except:
        sleep(1)
        pass


In [None]:
cached_results = read_pickle(cached_results_file)
len(cached_results) / len(result.ways)

cached_results[0]

{'coordinates': [(-73.6101177, 41.273537),
  (-73.6101767, 41.2736358),
  (-73.6101794, 41.2736922),
  (-73.6101687, 41.2737325),
  (-73.6101902, 41.2737991),
  (-73.6101258, 41.2738999),
  (-73.6101767, 41.2739482),
  (-73.6102948, 41.2740248),
  (-73.6104423, 41.274049),
  (-73.6105227, 41.2741075),
  (-73.6106595, 41.2741841),
  (-73.6108017, 41.2742587),
  (-73.611263, 41.2743454),
  (-73.6114213, 41.2742889),
  (-73.6115822, 41.2742123),
  (-73.6118907, 41.2739785),
  (-73.6119604, 41.2738192),
  (-73.6120194, 41.2735672),
  (-73.6119524, 41.2733657),
  (-73.6118129, 41.2731963),
  (-73.611711, 41.2730592),
  (-73.6114159, 41.2727286),
  (-73.6111826, 41.2724666),
  (-73.6110297, 41.2723073),
  (-73.6108768, 41.2721158),
  (-73.6105818, 41.2719062),
  (-73.610178, 41.2718996),
  (-73.6098373, 41.272077),
  (-73.6096308, 41.2722948),
  (-73.6092567, 41.2726601),
  (-73.609238, 41.2728234),
  (-73.6092863, 41.2729665),
  (-73.6097717, 41.2732508),
  (-73.6099085, 41.2733435),
  (-73

In [10]:

# def try_load(blist, result):
#     found_ids = [x['osm_id'] for x in blist]
#     for way in result.ways:
#         if way.id not in found_ids:
#             sleep(0.1)
#             if len(way.get_nodes(resolve_missing=True)) > 2:  # Valid polygon
#                 coords = [(float(node.lon), float(node.lat)) for node in way.get_nodes(resolve_missing=True)]
#                 if coords[0] != coords[-1]:  # Close polygon if needed
#                     coords.append(coords[0])

#                 if len(coords) >= 4:  # Valid polygon needs at least 4 points
#                     polygon = Polygon(coords)
#                     blist.append({
#                         'coordinates': coords,
#                         'geometry': polygon,
#                         'osm_id': way.id,
#                         'building_type': way.tags.get('building', 'yes'),
#                         'source': 'osm'
#                     })

In [11]:
def try_load(blist, result):
    found_ids = [x['osm_id'] for x in blist]
    for way in result.ways:
        if way.id not in found_ids:
            sleep(0.1)
            if len(way.get_nodes(resolve_missing=True)) > 2:  # Valid polygon
                coords = [(float(node.lon), float(node.lat)) for node in way.get_nodes(resolve_missing=True)]
                if coords[0] != coords[-1]:  # Close polygon if needed
                    coords.append(coords[0])

                if len(coords) >= 4:  # Valid polygon needs at least 4 points
                    polygon = Polygon(coords)
                    blist.append({
                        'coordinates': coords,
                        'geometry': polygon,
                        'osm_id': way.id,
                        'building_type': way.tags.get('building', 'yes'),
                        'source': 'osm'
                    })

                    

In [12]:
# try_bldg = []
# try_load(try_bldg, result)



In [13]:
buildings = []
current_length = 0
missing_ids_length = total_length - current_length

NameError: name 'total_length' is not defined

In [None]:

while missing_ids_length != 0:
    current_length = len(buildings)
    missing_ids_length = total_length - current_length
    print('trying...')
    print(f'{missing_ids_length} missing')
    try:
        try_load(buildings, result)
    except:
        pass

buildings

NameError: name 'total_length' is not defined

In [None]:
buildings

NameError: name 'buildings' is not defined

In [None]:

# found_ids = [x['osm_id'] for x in buildings]

# len_pre = len(buildings)
# for way in result.ways:

#     if way.id not in found_ids:
#         print(f'trying to load: {way.id}')
#         sleep(1)
#         if len(way.get_nodes(resolve_missing=True)) > 2:  # Valid polygon
#             coords = [(float(node.lon), float(node.lat)) for node in way.get_nodes(resolve_missing=True)]
#             if coords[0] != coords[-1]:  # Close polygon if needed
#                 coords.append(coords[0])
            
#             if len(coords) >= 4:  # Valid polygon needs at least 4 points
#                 polygon = Polygon(coords)
#                 buildings.append({
#                     'geometry': polygon,
#                     'osm_id': way.id,
#                     'building_type': way.tags.get('building', 'yes'),
#                     'source': 'osm'
#                 })
    

# len_post = len(buildings)
# print(f'{len_post - len_pre} added. total length: {len_post}')
            


In [None]:
buildings_gdf = gpd.GeoDataFrame(buildings, crs='EPSG:4326')

In [None]:

# Function to get OpenStreetMap buildings

    print("Querying OpenStreetMap for buildings...")
    
    
    
    # Convert to GeoDataFrame
    
            

print("Building data access functions defined successfully!")
# Convert to WGS84 for web mapping if needed


IndentationError: unexpected indent (260018951.py, line 3)


=== Loading OpenStreetMap Buildings ===
Querying OpenStreetMap for buildings...
Error retrieving OSM buildings: 'Way' object has no attribute 'nd'


## 4. Data Exploration and Preprocessing

## 5. Spatial Join Operations

Perform spatial joins to determine which buildings fall within each parcel boundary.