In [1]:
import pandas as pd
import json
import re
from difflib import SequenceMatcher

In [3]:
# === Strip ordinal suffixes like 103RD => 103 ===
def strip_ordinals(text):
    return re.sub(r'\b(\d+)(ST|ND|RD|TH)\b', r'\1', text)

# === Normalize Address Function ===
def normalize_address(addr):
    if not isinstance(addr, str):
        return ""
    addr = addr.upper()
    addr = strip_ordinals(addr)
    addr = re.sub(r'\s+(APT|UNIT|FL|#|FLOOR|STE)\s*\w+', '', addr)  # remove apartment/unit/etc.
    addr = re.sub(r'\bSTREET\b', 'ST', addr)
    addr = re.sub(r'\bAVENUE\b', 'AVE', addr)
    addr = re.sub(r'\bPLACE\b', 'PL', addr)
    addr = re.sub(r'\s+', ' ', addr).strip()
    return addr

# === Load 311 Complaints CSV ===
complaints_df = pd.read_csv("311_Service_Requests_from_2010_to_Present_20250412.csv", low_memory=False)

# === Normalize 311 Complaint Addresses ===
complaints_df = complaints_df[complaints_df['Incident Address'].notnull()].copy()
complaints_df['normalized_address'] = complaints_df['Incident Address'].apply(normalize_address)

# === Aggregate Complaint Counts by (Address, Type) ===
complaint_summary = (
    complaints_df
    .groupby(['normalized_address', 'Complaint Type'])
    .size()
    .reset_index(name='count')
)

# === Load Combined Listings JSON ===
with open("combined_listings.json", "r", encoding="utf-8") as f:
    listings = json.load(f)

# === Normalize Each Listing Address and Match Complaints ===
for listing in listings:
    normalized_listing_addr = normalize_address(listing.get("addr_street", ""))
    matched_complaints = complaint_summary[complaint_summary['normalized_address'] == normalized_listing_addr]

    complaints_dict = {
        row['Complaint Type']: int(row['count'])
        for _, row in matched_complaints.iterrows()
    }

    listing['building_complaints'] = complaints_dict

# === Save Final Listings ===
with open("combined_listings_with_complaints.json", "w", encoding="utf-8") as f:
    json.dump(listings, f, indent=2)

print("✅ Enriched listings saved to combined_listings_with_complaints.json")


✅ Enriched listings saved to combined_listings_with_complaints.json
