In [39]:
import requests
import random
import geopandas as gpd
from shapely.geometry import Polygon
import urllib3

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

In [40]:
url = "https://arcweb.hcad.org/server/rest/services/public/public_query/MapServer/0/query"

In [41]:
# Get all ObjectIDs
id_params = {
    "where": "1=1",
    "returnIdsOnly": "true",
    "f": "json"
}
id_response = requests.get(url, params=id_params, verify=False)
all_ids = id_response.json().get("objectIds", [])

# Sample 25 records
sample_ids = random.sample(all_ids, min(500, len(all_ids)))


In [42]:
## Feature Query Parameters
query_params = {
    "objectIds": ",".join(map(str, sample_ids)),
    "outFields": "*",
    "returnGeometry": "true",
    "f": "json",
    "outSR": "4326"
}

In [43]:
## Feature Query
query_response = requests.get(url, params=query_params, verify=False)
features = query_response.json().get("features", [])


In [44]:
# Step 4: Build attribute + polygon list
records = []
geometries = []

In [45]:
## Parsing Polygon Geometry
for feature in features:
    attrs = feature.get("attributes", {})
    rings = feature.get("geometry", {}).get("rings", [])
    try:
        if rings and isinstance(rings, list) and len(rings[0]) > 2:
            polygon = Polygon(rings[0])  # Use only outer ring for now
            records.append(attrs)
            geometries.append(polygon)
    except Exception:
        continue

In [46]:
# Step 5: Construct GeoDataFrame
if geometries:
    gdf = gpd.GeoDataFrame(records, geometry=geometries, crs="EPSG:4326")
else:
    raise ValueError("No valid geometries parsed from 'rings'.")

gdf.head()

Unnamed: 0,OBJECTID,HCAD_NUM,owner,subdivision,address,city,zip,parcel_type,state_class,appr_val,mkt_val,Shape.STArea(),Shape.STLength(),legal_lines,geometry
0,201,432130000025,DUNHAM POINTE DEVELOPMENT LLC,CY FAIR WEST M/R,0 NORTHWEST (OFF) FWY # OFF,CYPRESS,77433,,D2,,,4084331.0,9670.951613,TR 4|ABST 547 J W MOODY,"POLYGON ((-95.72775 29.98211, -95.72762 29.982..."
1,1515,1475440010001,PASADENA ISD,STRAWBERRY ROAD,2707 LAFFERTY RD,PASADENA,77502,,XV,,,419071.2,2680.22653,RES A BLK 1|(SCHOOL & RELATED USES)|PASADENA I...,"POLYGON ((-95.19551 29.67214, -95.19548 29.670..."
2,1716,1188880040126,WEST HARRIS COUNTY MUD 11,WESTBRIDGE SEC 1 & 2,0 WESTWILLOW DR,HOUSTON,77064,,XV,0.0,0.0,409792.6,2791.531547,RES A BLK 4|WESTBRIDGE SEC 2,"POLYGON ((-95.5527 29.89793, -95.55263 29.8979..."
3,4544,451750020240,RANGER II SILBER RD LP,NORTH POST OAK INDUSTRIAL II - ISD 01,1200 SILBER RD,HOUSTON,77055,,F1,11688911.0,11688911.0,372611.0,2749.38261,TR 30A|ABST 871 J WHARTON,"POLYGON ((-95.46176 29.79129, -95.46493 29.791..."
4,4722,1404180010001,WASTE CORPORATION OF,BEECHNUT / BISSONNET INT COMM,14515 AUTO PKY,HOUSTON,77083,,F1,2098311.0,2098311.0,371080.3,2710.194283,RES A BLK 1|MILLWORK SEC 2,"POLYGON ((-95.63818 29.6861, -95.63843 29.6860..."


In [48]:
# Load METRO MTA service area once
metro_gdf = gpd.read_file("..\REF\Metro_MTA_Tax_Area.geojson").to_crs(epsg=4326)
metro_union = metro_gdf.union_all()

# Container to hold unique intersecting records
final_records = []
final_geometries = []
used_ids = set()
target_count = 1000
batch_size = 500
max_attempts = 20  # avoid infinite loops

In [50]:
# resampling logic
def fetch_and_filter(batch_size=500):
    remaining_ids = list(set(all_ids) - used_ids)
    if len(remaining_ids) == 0:
        return

    sample_ids = random.sample(remaining_ids, min(batch_size, len(remaining_ids)))

    query_params = {
        "objectIds": ",".join(map(str, sample_ids)),
        "outFields": "*",
        "returnGeometry": "true",
        "f": "json",
        "outSR": "4326"
    }

    response = requests.get(url, params=query_params, verify=False)
    features = response.json().get("features", [])

    for feature in features:
        oid = feature.get("attributes", {}).get("OBJECTID")
        if oid in used_ids:
            continue

        rings = feature.get("geometry", {}).get("rings", [])
        if rings and isinstance(rings, list) and len(rings[0]) > 2:
            try:
                polygon = Polygon(rings[0])
                if polygon.intersects(metro_union):
                    final_records.append(feature["attributes"])
                    final_geometries.append(polygon)
                    used_ids.add(oid)
            except Exception:
                continue

In [51]:
attempts = 0
while len(final_records) < target_count and attempts < max_attempts:
    fetch_and_filter(batch_size)
    attempts += 1
    print(f"Collected {len(final_records)} valid records (attempt {attempts})")

Collected 402 valid records (attempt 1)
Collected 804 valid records (attempt 2)
Collected 1213 valid records (attempt 3)


In [52]:
final_gdf = gpd.GeoDataFrame(final_records, geometry=final_geometries, crs="EPSG:4326")

# MAP
Randomly Selected HCAD Parcels within METRO Service Area

In [54]:
import folium
from folium import GeoJson

In [55]:
m = folium.Map(location=[29.76, -95.37], zoom_start=11, tiles="cartodbpositron")

for _, row in final_gdf.iterrows():
    sim_geo = gpd.GeoSeries([row.geometry]).__geo_interface__
    folium.GeoJson(sim_geo).add_to(m)

m

# Exporting to geojson

In [56]:
from datetime import datetime
import os
import json

In [67]:
# Ensure OUTPUT directory exists
os.makedirs("OUTPUT", exist_ok=True)

In [64]:
# Metadata
metadata = {
    "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
    "sampling_batch_size": batch_size,
    "total_properties_sampled": len(final_gdf)
}

In [65]:
# File naming
timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"metro_intersecting_parcels_{timestamp_str}.geojson"
filepath = os.path.join("OUTPUT", filename)

In [72]:
# Save GeoJSON with metadata in a FeatureCollection
geojson_obj = json.loads(final_gdf.to_json())
geojson_obj["metadata"] = metadata

# Write to file
with open(filepath, "w") as f:
    json.dump(geojson_obj, f)

print(f"GeoJSON exported to: {filepath}")

GeoJSON exported to: OUTPUT\metro_intersecting_parcels_20250505_220658.geojson


In [69]:
import pandas as pd

In [71]:
# Extract HCAD_NUM values
hcad_df = final_gdf[["HCAD_NUM"]].copy()
hcad_df["timestamp"] = metadata["timestamp"]
hcad_df["sampling_batch_size"] = metadata["sampling_batch_size"]
hcad_df["total_properties_sampled"] = metadata["total_properties_sampled"]

# Define CSV path
csv_filename = f"metro_intersecting_parcels_HCAD_{timestamp_str}.csv"
csv_path = os.path.join("OUTPUT", csv_filename)

# Export to CSV
hcad_df.to_csv(csv_path, index=False)

print(f"HCAD_NUM list exported to: {csv_path}")

HCAD_NUM list exported to: OUTPUT\metro_intersecting_parcels_HCAD_20250505_220658.csv


# Real Property

HCAD has over 1.4 million parcels, this exercise will scrape the web search portion of HCAD's website to research real property data on the randomly selected batch. 