In [None]:
import http.client
import json
import time
from urllib.parse import quote


class ZillowScraper:
    """Scalable Zillow scraper with proper error handling and data extraction."""

    def __init__(self, api_key):
        self.api_key = api_key
        self.base_url = "api.hasdata.com"
        self.headers = {
            'x-api-key': api_key,
            'Content-Type': "application/json"
        }

    def _make_request(self, endpoint, max_retries=3):
        """Make HTTP request with retry logic."""
        for attempt in range(max_retries):
            conn = None
            try:
                conn = http.client.HTTPSConnection(self.base_url)
                conn.request("GET", endpoint, headers=self.headers)
                res = conn.getresponse()
                data = res.read()

                if res.status == 200:
                    return json.loads(data.decode("utf-8")), res.status
                elif res.status == 429:
                    wait_time = 2 ** attempt
                    print(f"Rate limit hit. Waiting {wait_time}s...")
                    time.sleep(wait_time)
                    continue
                else:
                    print(f"Error: Status {res.status} - {data.decode('utf-8')}")
                    return None, res.status
            except Exception as e:
                print(f"Request failed (attempt {attempt + 1}/{max_retries}): {e}")
                if attempt + 1 < max_retries:
                    time.sleep(2 ** attempt)
            finally:
                if conn:
                    conn.close()

        return None, 0

    def fetch_listings(self, keyword="Pittsburgh, PA", listing_type="forSale", page=1):
        """Fetch Zillow listings."""
        encoded_keyword = quote(keyword)
        endpoint = f"/scrape/zillow/listing?keyword={encoded_keyword}&type={listing_type}&page={page}"

        data, status = self._make_request(endpoint)
        if data and "properties" in data:
            return data["properties"], status
        return [], status

    def fetch_property_details(self, property_url, extract_emails=True):
        """Fetch detailed property information."""
        if not property_url or property_url == "N/A":
            return None

        encoded_url = quote(property_url, safe='')
        endpoint = f"/scrape/zillow/property?url={encoded_url}"
        if extract_emails:
            endpoint += "&extractAgentEmails=true"

        data, status = self._make_request(endpoint)
        return data if status == 200 else None

    @staticmethod
    def safe_get(data, path, default="N/A"):
        """Safely extract nested values from dictionary."""
        if isinstance(path, str):
            path = [path]

        current = data
        for key in path:
            if isinstance(current, dict) and key in current:
                current = current[key]
            else:
                return default

        # Return default for empty/null values
        if current in (None, "", [], {}):
            return default
        if isinstance(current, (int, float)) and current == 0 and default != 0:
            return default

        return current

    @staticmethod
    def format_price(value):
        """Format price values."""
        if value in (None, "", "N/A", 0):
            return "N/A"

        if isinstance(value, str):
            value = value.replace('$', '').replace(',', '').strip()
            try:
                value = float(value)
            except (ValueError, TypeError):
                return "N/A"

        if isinstance(value, (int, float)) and value > 0:
            return f"${value:,.0f}"

        return "N/A"

    def format_listing(self, listing_data, detailed_data=None):
        """Format listing into structured output."""

        # Extract from listing data - using actual field names from API
        property_id = self.safe_get(listing_data, "id")
        url = self.safe_get(listing_data, "url")
        image = self.safe_get(listing_data, "image")
        status = self.safe_get(listing_data, "status")

        # Address information
        address_raw = self.safe_get(listing_data, "addressRaw")
        address_obj = self.safe_get(listing_data, "address", {})
        street = self.safe_get(address_obj, "street")
        city = self.safe_get(address_obj, "city")
        state = self.safe_get(address_obj, "state")
        zipcode = self.safe_get(address_obj, "zipcode")

        # Coordinates
        lat = self.safe_get(listing_data, "lat")
        lng = self.safe_get(listing_data, "lng")

        # Extract prices
        price = self.safe_get(listing_data, "price", 0)
        zestimate = self.safe_get(listing_data, "zestimate", 0)
        rent_zestimate = self.safe_get(listing_data, "rentZestimate", 0)

        # Basic property info
        bedrooms = self.safe_get(listing_data, "beds", 0)
        bathrooms = self.safe_get(listing_data, "baths", 0)
        living_area = self.safe_get(listing_data, "area", 0)
        lot_area = self.safe_get(listing_data, "lotAreaValue", 0)
        lot_area_unit = self.safe_get(listing_data, "lotAreaUnits", "sqft")
        home_type = self.safe_get(listing_data, "homeType")

        # Other details
        days_on_zillow = self.safe_get(listing_data, "daysOnZillow", 0)
        currency = self.safe_get(listing_data, "currency", "$")

        formatted = {
            "property": {
                "id": property_id,
                "url": url,
                "image": image,
                "status": status,
                "currency": currency,
                "basic_info": {
                    "price": {
                        "current": self.format_price(price),
                        "zestimate": self.format_price(zestimate),
                        "rent_zestimate": self.format_price(rent_zestimate) + "/mo" if rent_zestimate and rent_zestimate != "N/A" else "N/A"
                    },
                    "address": {
                        "full": address_raw,
                        "street": street,
                        "city": city,
                        "state": state,
                        "zipcode": zipcode
                    },
                    "coordinates": {
                        "latitude": lat,
                        "longitude": lng
                    },
                    "specifications": {
                        "bedrooms": bedrooms if bedrooms != "N/A" else 0,
                        "bathrooms": bathrooms if bathrooms != "N/A" else 0,
                        "living_area_sqft": living_area if living_area != "N/A" else 0,
                        "lot_size": f"{lot_area} {lot_area_unit}" if lot_area and lot_area != "N/A" else "N/A",
                        "property_type": home_type
                    }
                },
                "listing_details": {
                    "days_on_zillow": days_on_zillow if days_on_zillow != "N/A" else 0
                }
            }
        }

        # Add detailed information if available
        if detailed_data:
            self._add_detailed_info(formatted, detailed_data)

        return formatted

    def _add_detailed_info(self, formatted, detailed_data):
        """Add detailed property information to formatted listing."""
        prop = formatted["property"]

        # The detailed data comes under 'property' key in response
        detail_prop = self.safe_get(detailed_data, "property", {})

        # Extract additional details
        price_per_sqft = self.safe_get(detail_prop, "pricePerSquareFoot", 0)
        description = self.safe_get(detail_prop, "description")
        year_built = self.safe_get(detail_prop, "yearBuilt", 0)

        # Tax and financial info
        tax_assessed = self.safe_get(detail_prop, "taxAssessment", 0)
        annual_tax = self.safe_get(detail_prop, "annualTax", 0)
        hoa_fee = self.safe_get(detail_prop, "hoaFee", 0)
        monthly_estimate = self.safe_get(detail_prop, "monthlyEstimate", 0)

        # Zestimate range
        zestimate_range = self.safe_get(detail_prop, "zestimateRange", {})
        zestimate_low = self.safe_get(zestimate_range, "low", 0)
        zestimate_high = self.safe_get(zestimate_range, "high", 0)

        # Agent info
        agent_info = self.safe_get(detail_prop, "listingAgent", {})
        agent_name = self.safe_get(agent_info, "name")
        agent_phone = self.safe_get(agent_info, "phone")
        agent_email = self.safe_get(agent_info, "email")
        brokerage = self.safe_get(agent_info, "brokerage")

        # Features
        parking_spaces = self.safe_get(detail_prop, "parkingSpaces", 0)
        parking_type = self.safe_get(detail_prop, "parkingType")
        appliances = self.safe_get(detail_prop, "appliances", [])
        cooling = self.safe_get(detail_prop, "cooling")
        heating = self.safe_get(detail_prop, "heating")
        flooring = self.safe_get(detail_prop, "flooring", [])
        has_pool = self.safe_get(detail_prop, "hasPool", False)
        fireplace_count = self.safe_get(detail_prop, "fireplaceCount", 0)
        basement = self.safe_get(detail_prop, "basement")

        # Location details
        township = self.safe_get(detail_prop, "township")
        school_district = self.safe_get(detail_prop, "schoolDistrict")
        walk_score = self.safe_get(detail_prop, "walkScore", 0)
        transit_score = self.safe_get(detail_prop, "transitScore", 0)

        # MLS info
        mls_number = self.safe_get(detail_prop, "mlsNumber")
        mls_source = self.safe_get(detail_prop, "mlsSource")

        # Update basic info with additional details
        if year_built and year_built != "N/A":
            prop["basic_info"]["specifications"]["year_built"] = year_built

        if price_per_sqft and price_per_sqft != "N/A":
            prop["basic_info"]["price"]["price_per_sqft"] = self.format_price(price_per_sqft) + "/sqft"

        # Add financial details
        prop["financial"] = {
            "monthly_estimate": self.format_price(monthly_estimate) + "/mo" if monthly_estimate and monthly_estimate != "N/A" else "N/A",
            "zestimate_range": {
                "low": self.format_price(zestimate_low),
                "high": self.format_price(zestimate_high)
            },
            "tax_assessment": self.format_price(tax_assessed),
            "annual_tax": self.format_price(annual_tax),
            "hoa_fee": self.format_price(hoa_fee) + "/mo" if hoa_fee and hoa_fee != "N/A" else "N/A"
        }

        # Add agent information
        prop["listing_agent"] = {
            "name": agent_name,
            "phone": agent_phone,
            "email": agent_email,
            "brokerage": brokerage,
            "mls_number": mls_number,
            "mls_source": mls_source
        }

        # Add features
        prop["features"] = {
            "description": description,
            "parking": {
                "spaces": parking_spaces if parking_spaces != "N/A" else 0,
                "type": parking_type
            },
            "appliances": appliances if appliances else [],
            "climate_control": {
                "cooling": cooling,
                "heating": heating
            },
            "interior": {
                "flooring": flooring if flooring else [],
                "fireplace_count": fireplace_count if fireplace_count != "N/A" else 0,
                "basement": basement
            },
            "exterior": {
                "has_pool": has_pool
            }
        }

        # Add location details
        prop["location"] = {
            "township": township,
            "school_district": school_district,
            "scores": {
                "walk_score": walk_score if walk_score != "N/A" else 0,
                "transit_score": transit_score if transit_score != "N/A" else 0
            }
        }

        # Price history
        price_history = self.safe_get(detail_prop, "priceHistory", [])
        if price_history:
            prop["price_history"] = price_history

        # Tax history
        tax_history = self.safe_get(detail_prop, "taxHistory", [])
        if tax_history:
            prop["tax_history"] = tax_history

        # Schools
        schools = self.safe_get(detail_prop, "schools", [])
        if schools:
            prop["schools"] = schools


def main():
    """Main execution function."""

    # Initialize scraper
    API_KEY = "7ff96a21-7af8-4e6d-b06d-9c3a6bd8d4d4"
    scraper = ZillowScraper(API_KEY)

    # Configuration
    KEYWORD = "Pittsburgh, PA"
    MAX_PAGES = 2
    FETCH_DETAILS = True  # Set to False to skip detailed fetching (faster)
    DELAY_BETWEEN_REQUESTS = 2  # Seconds between requests

    all_properties = []

    print(f"Starting scrape for: {KEYWORD}")
    print(f"Max pages: {MAX_PAGES}")
    print(f"Fetch details: {FETCH_DETAILS}\n")

    for page in range(1, MAX_PAGES + 1):
        print(f"\n{'='*60}")
        print(f"Fetching page {page}...")
        print(f"{'='*60}")

        listings, status = scraper.fetch_listings(KEYWORD, page=page)

        if status != 200:
            print(f"Failed to fetch page {page}. Status: {status}")
            if status == 403:
                print("API key may be invalid or expired.")
            break

        if not listings:
            print("No more listings found.")
            break

        print(f"Found {len(listings)} properties on page {page}")

        # DIAGNOSTIC: Print first listing structure
        if page == 1 and len(listings) > 0:
            print(f"\n*** DIAGNOSTIC: First listing keys ***")
            print(f"Available fields: {list(listings[0].keys())}")
            print(f"\n*** Sample listing data ***")
            print(json.dumps(listings[0], indent=2)[:1000] + "...")

        for idx, listing in enumerate(listings, 1):
            property_url = scraper.safe_get(listing, "url")
            property_id = scraper.safe_get(listing, "id")
            address = scraper.safe_get(listing, "addressRaw")

            print(f"\n[{idx}/{len(listings)}] ID: {property_id} | {address}")

            detailed_data = None
            if FETCH_DETAILS and property_url and property_url != "N/A":
                print(f"  └─ Fetching details...")
                detailed_data = scraper.fetch_property_details(property_url)

                # DIAGNOSTIC: Print first detailed response
                if page == 1 and idx == 1 and detailed_data:
                    print(f"\n*** DIAGNOSTIC: Detailed data structure ***")
                    print(json.dumps(detailed_data, indent=2)[:1500] + "...")

                time.sleep(DELAY_BETWEEN_REQUESTS)
            elif not property_url or property_url == "N/A":
                print(f"  └─ No URL available, skipping details")

            formatted_property = scraper.format_listing(listing, detailed_data)
            all_properties.append(formatted_property)

        print(f"\n✓ Completed page {page}")
        time.sleep(DELAY_BETWEEN_REQUESTS)

    # Print results
    print(f"\n\n{'#'*80}")
    print(f"SCRAPING COMPLETE - RESULTS")
    print(f"{'#'*80}\n")

    for idx, prop in enumerate(all_properties, 1):
        print(f"\n{'='*80}")
        print(f"PROPERTY {idx}/{len(all_properties)}")
        print(f"{'='*80}")
        print(json.dumps(prop, indent=2))

    # Summary
    print(f"\n\n{'*'*80}")
    print(f"SUMMARY")
    print(f"{'*'*80}")
    print(f"Total properties scraped: {len(all_properties)}")
    print(f"Search location: {KEYWORD}")
    print(f"Pages processed: {min(page, MAX_PAGES)}")

    # Save to file
    output_file = "zillow_properties.json"
    try:
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(all_properties, f, indent=2, ensure_ascii=False)
        print(f"\n✓ Data saved to: {output_file}")
    except Exception as e:
        print(f"\n✗ Failed to save file: {e}")


if __name__ == "__main__":
    main()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
    "id": "11428599",
    "url": "https://www.zillow.com/homedetails/4859-S-Emblem-Dr-Pittsburgh-PA-15236/11428599_zpid/",
    "image": "https://photos.zillowstatic.com/fp/cb80be088371017eee4b19ac60227998-p_e.jpg",
    "status": "FOR_SALE",
    "currency": "$",
    "basic_info": {
      "price": {
        "current": "$375,000",
        "zestimate": "N/A",
        "rent_zestimate": "N/A"
      },
      "address": {
        "full": "4859 S Emblem Dr, Pittsburgh, PA 15236",
        "street": "4859 S Emblem Dr",
        "city": "Pittsburgh",
        "state": "PA",
        "zipcode": "15236"
      },
      "coordinates": {
        "latitude": "N/A",
        "longitude": "N/A"
      },
      "specifications": {
        "bedrooms": 3,
        "bathrooms": 3,
        "living_area_sqft": 1721,
        "lot_size": "8999.496 sqft",
        "property_type": "SINGLE_FAMILY",
        "year_built": 1960
      }
    },
    "listing_detai

In [None]:
import http.client
import json
import time
from urllib.parse import quote


class ZillowCompleteScraper:
    """Complete Zillow scraper - extracts ALL available data."""

    def __init__(self, api_key):
        self.api_key = api_key
        self.base_url = "api.hasdata.com"
        self.headers = {
            'x-api-key': api_key,
            'Content-Type': "application/json"
        }

    def _make_request(self, endpoint, max_retries=3):
        """Make HTTP request with retry logic."""
        for attempt in range(max_retries):
            conn = None
            try:
                conn = http.client.HTTPSConnection(self.base_url, timeout=30)
                conn.request("GET", endpoint, headers=self.headers)
                res = conn.getresponse()
                data = res.read()

                if res.status == 200:
                    return json.loads(data.decode("utf-8")), res.status
                elif res.status == 429:
                    wait_time = 2 ** attempt
                    print(f"  ⚠ Rate limit. Waiting {wait_time}s...")
                    time.sleep(wait_time)
                    continue
                else:
                    print(f"  ✗ Status {res.status}: {data.decode('utf-8')[:200]}")
                    return None, res.status
            except Exception as e:
                print(f"  ✗ Error: {e}")
                if attempt + 1 < max_retries:
                    time.sleep(2 ** attempt)
            finally:
                if conn:
                    conn.close()
        return None, 0

    def fetch_listings(self, keyword="Pittsburgh, PA", listing_type="forSale", page=1):
        """Fetch Zillow listings."""
        encoded_keyword = quote(keyword)
        endpoint = f"/scrape/zillow/listing?keyword={encoded_keyword}&type={listing_type}&page={page}"

        data, status = self._make_request(endpoint)
        if data and "properties" in data:
            return data["properties"], status
        return [], status

    def fetch_property_details(self, property_url):
        """Fetch detailed property information."""
        if not property_url:
            return None

        encoded_url = quote(property_url, safe='')
        endpoint = f"/scrape/zillow/property?url={encoded_url}&extractAgentEmails=true"

        data, status = self._make_request(endpoint)
        return data if status == 200 else None

    @staticmethod
    def get_all_keys(data, parent_key='', sep='__'):
        """Recursively get all keys from nested dict."""
        keys = []
        if isinstance(data, dict):
            for k, v in data.items():
                new_key = f"{parent_key}{sep}{k}" if parent_key else k
                keys.append(new_key)
                if isinstance(v, dict):
                    keys.extend(ZillowCompleteScraper.get_all_keys(v, new_key, sep))
                elif isinstance(v, list) and len(v) > 0 and isinstance(v[0], dict):
                    keys.extend(ZillowCompleteScraper.get_all_keys(v[0], new_key, sep))
        return keys

    def extract_complete_data(self, listing, details):
        """Extract ALL data from listing and details - no field left behind."""

        # Start with complete listing data
        property_data = {
            "RAW_LISTING_DATA": listing.copy() if listing else {},
            "RAW_DETAILS_DATA": details.copy() if details else {}
        }

        # Extract detail property if exists
        detail_prop = details.get("property", {}) if details else {}

        # Build comprehensive structured data
        result = {
            # ==================== HEADER INFORMATION ====================
            "HEADER": {
                "price": listing.get("price"),
                "address_full": listing.get("addressRaw") or listing.get("address"),
                "bedrooms": listing.get("beds"),
                "bathrooms": listing.get("baths"),
                "square_footage": listing.get("area"),
                "estimated_monthly_payment": detail_prop.get("monthlyEstimate"),
                "property_type": listing.get("homeType") or detail_prop.get("homeType"),
                "year_built": detail_prop.get("yearBuilt"),
                "lot_size_value": listing.get("lotAreaValue"),
                "lot_size_unit": listing.get("lotAreaUnits"),
                "zestimate": listing.get("zestimate") or detail_prop.get("zestimate"),
                "price_per_sqft": detail_prop.get("pricePerSquareFoot"),
                "hoa_fee": detail_prop.get("hoaFee"),
                "rent_zestimate": listing.get("rentZestimate") or detail_prop.get("rentZestimate")
            },

            # ==================== COORDINATES ====================
            "COORDINATES": {
                "latitude": listing.get("lat"),
                "longitude": listing.get("lng")
            },

            # ==================== ADDRESS DETAILS ====================
            "ADDRESS": listing.get("address", {}),

            # ==================== PROPERTY IDENTIFIERS ====================
            "IDENTIFIERS": {
                "id": listing.get("id"),
                "zpid": listing.get("zpid") or detail_prop.get("zpid"),
                "url": listing.get("url"),
                "detailUrl": listing.get("detailUrl"),
                "hdpUrl": detail_prop.get("hdpUrl"),
                "status": listing.get("status") or detail_prop.get("homeStatus"),
                "currency": listing.get("currency")
            },

            # ==================== IMAGES ====================
            "IMAGES": {
                "main_image": listing.get("image"),
                "imgSrc": listing.get("imgSrc"),
                "all_photos": detail_prop.get("photos", []),
                "photo_count": detail_prop.get("photoCount")
            },

            # ==================== WHAT'S SPECIAL ====================
            "WHATS_SPECIAL": detail_prop.get("specialFeatures", []),

            # ==================== DESCRIPTION ====================
            "DESCRIPTION": detail_prop.get("description"),

            # ==================== FACTS & FEATURES ====================
            "FACTS_AND_FEATURES": {
                # INTERIOR
                "INTERIOR": {
                    "bedrooms_and_bathrooms": {
                        "bedrooms": detail_prop.get("bedrooms") or listing.get("beds"),
                        "bathrooms": detail_prop.get("bathrooms") or listing.get("baths"),
                        "full_bathrooms": detail_prop.get("fullBathrooms"),
                        "half_bathrooms": detail_prop.get("halfBathrooms"),
                        "three_quarter_bathrooms": detail_prop.get("threeQuarterBathrooms")
                    },
                    "rooms": detail_prop.get("rooms", {}),
                    "room_dimensions": detail_prop.get("roomDimensions", {}),
                    "features": {
                        "flooring": detail_prop.get("flooring", []),
                        "has_basement": detail_prop.get("hasBasement"),
                        "basement": detail_prop.get("basement"),
                        "basement_sqft": detail_prop.get("basementSqft")
                    },
                    "interior_area": {
                        "total_structure_area": detail_prop.get("totalStructureArea"),
                        "living_area": detail_prop.get("livingArea") or listing.get("area"),
                        "finished_sqft": detail_prop.get("finishedSqFt"),
                        "above_grade_finished_area": detail_prop.get("aboveGradeFinishedArea")
                    },
                    "kitchen": {
                        "features": detail_prop.get("kitchenFeatures", []),
                        "dimensions": detail_prop.get("rooms", {}).get("kitchen", {}).get("dimensions")
                    },
                    "bathroom_features": detail_prop.get("bathroomFeatures", []),
                    "appliances": detail_prop.get("appliances", []),
                    "laundry": detail_prop.get("laundryFeatures", []),
                    "fireplace": {
                        "count": detail_prop.get("fireplaceCount"),
                        "type": detail_prop.get("fireplaceType"),
                        "features": detail_prop.get("fireplaceFeatures", [])
                    },
                    "window_features": detail_prop.get("windowFeatures", []),
                    "door_features": detail_prop.get("doorFeatures", []),
                    "additional_features": detail_prop.get("additionalFeatures", [])
                },

                # PROPERTY
                "PROPERTY": {
                    "parking": {
                        "total_spaces": detail_prop.get("parkingSpaces"),
                        "parking_features": detail_prop.get("parkingFeatures", []),
                        "parking_type": detail_prop.get("parkingType"),
                        "has_attached_garage": detail_prop.get("hasAttachedGarage"),
                        "has_detached_garage": detail_prop.get("hasDetachedGarage"),
                        "garage_spaces": detail_prop.get("garageSpaces"),
                        "carport_spaces": detail_prop.get("carportSpaces")
                    },
                    "features": {
                        "levels": detail_prop.get("levels") or listing.get("levels"),
                        "stories": detail_prop.get("stories"),
                        "pool_features": detail_prop.get("poolFeatures", []),
                        "has_pool": detail_prop.get("hasPool"),
                        "pool_type": detail_prop.get("poolType"),
                        "spa_features": detail_prop.get("spaFeatures", []),
                        "has_spa": detail_prop.get("hasSpa")
                    },
                    "lot": {
                        "size": detail_prop.get("lotSize"),
                        "size_sqft": listing.get("lotAreaValue"),
                        "size_acres": detail_prop.get("lotSizeAcres"),
                        "dimensions": detail_prop.get("lotDimensions"),
                        "lot_features": detail_prop.get("lotFeatures", [])
                    },
                    "details": {
                        "parcel_number": detail_prop.get("parcelNumber"),
                        "apn": detail_prop.get("apn"),
                        "special_conditions": detail_prop.get("specialConditions"),
                        "listing_terms": detail_prop.get("listingTerms")
                    },
                    "outdoor": {
                        "patio_and_porch_features": detail_prop.get("patioAndPorchFeatures", []),
                        "fencing": detail_prop.get("fencing", []),
                        "landscaping": detail_prop.get("landscaping", [])
                    }
                },

                # CONSTRUCTION
                "CONSTRUCTION": {
                    "type_and_style": {
                        "home_type": detail_prop.get("homeType") or listing.get("homeType"),
                        "architectural_style": detail_prop.get("architecturalStyle"),
                        "property_subtype": detail_prop.get("propertySubType")
                    },
                    "materials": {
                        "construction_materials": detail_prop.get("constructionMaterials", []),
                        "exterior": detail_prop.get("exterior", []),
                        "foundation": detail_prop.get("foundation", []),
                        "roof": detail_prop.get("roofType") or detail_prop.get("roof")
                    },
                    "condition": detail_prop.get("condition"),
                    "year_built": detail_prop.get("yearBuilt"),
                    "year_renovated": detail_prop.get("yearRenovated")
                },

                # UTILITIES & GREEN ENERGY
                "UTILITIES_AND_GREEN_ENERGY": {
                    "sewer": detail_prop.get("sewer"),
                    "water": detail_prop.get("waterSource") or detail_prop.get("water"),
                    "heating": detail_prop.get("heating"),
                    "heating_type": detail_prop.get("heatingType"),
                    "cooling": detail_prop.get("cooling"),
                    "cooling_type": detail_prop.get("coolingType"),
                    "electric": detail_prop.get("electric"),
                    "gas": detail_prop.get("gas")
                },

                # COMMUNITY & HOA
                "COMMUNITY_AND_HOA": {
                    "location": {
                        "region": listing.get("address", {}).get("city"),
                        "city": listing.get("address", {}).get("city"),
                        "state": listing.get("address", {}).get("state"),
                        "zipcode": listing.get("address", {}).get("zipcode"),
                        "county": detail_prop.get("county"),
                        "township": detail_prop.get("township"),
                        "subdivision": detail_prop.get("subdivision"),
                        "neighborhood": detail_prop.get("neighborhood")
                    },
                    "hoa": {
                        "monthly_fee": detail_prop.get("hoaFee"),
                        "fee_frequency": detail_prop.get("hoaFeeFrequency"),
                        "hoa_name": detail_prop.get("hoaName"),
                        "association_amenities": detail_prop.get("associationAmenities", [])
                    },
                    "school_district": detail_prop.get("schoolDistrict")
                }
            },

            # ==================== FINANCIAL & LISTING DETAILS ====================
            "FINANCIAL_AND_LISTING_DETAILS": {
                "pricing": {
                    "listing_price": listing.get("price"),
                    "price_per_sqft": detail_prop.get("pricePerSquareFoot"),
                    "original_price": detail_prop.get("originalPrice"),
                    "price_cut": detail_prop.get("priceCut")
                },
                "estimates": {
                    "zestimate": detail_prop.get("zestimate") or listing.get("zestimate"),
                    "zestimate_low": detail_prop.get("zestimateRange", {}).get("low"),
                    "zestimate_high": detail_prop.get("zestimateRange", {}).get("high"),
                    "rent_zestimate": detail_prop.get("rentZestimate") or listing.get("rentZestimate"),
                    "monthly_estimate": detail_prop.get("monthlyEstimate")
                },
                "taxes": {
                    "tax_assessed_value": detail_prop.get("taxAssessedValue"),
                    "annual_tax": detail_prop.get("annualTax"),
                    "tax_year": detail_prop.get("taxYear"),
                    "property_tax_rate": detail_prop.get("propertyTaxRate")
                },
                "listing_info": {
                    "date_on_market": detail_prop.get("dateOnMarket"),
                    "date_sold": detail_prop.get("dateSold"),
                    "listing_updated": detail_prop.get("listingUpdated"),
                    "last_checked": detail_prop.get("lastChecked"),
                    "days_on_market": detail_prop.get("daysOnMarket"),
                    "cumulative_days_on_market": detail_prop.get("cumulativeDaysOnMarket")
                }
            },

            # ==================== LISTING ACTIVITY ====================
            "LISTING_ACTIVITY": {
                "time_on_zillow": listing.get("daysOnZillow"),
                "views": detail_prop.get("views"),
                "saves": detail_prop.get("saves"),
                "is_featured": listing.get("isFeatured"),
                "listing_sub_type": listing.get("listingSubType")
            },

            # ==================== LISTING AGENT ====================
            "LISTING_AGENT": detail_prop.get("listingAgent", {}),

            # ==================== MLS INFORMATION ====================
            "MLS_INFORMATION": {
                "mls_number": detail_prop.get("mlsNumber"),
                "mls_source": detail_prop.get("mlsSource"),
                "mls_id": detail_prop.get("mlsId"),
                "listing_provider": detail_prop.get("listingProvider")
            },

            # ==================== LOCATION SCORES ====================
            "LOCATION_SCORES": {
                "walk_score": detail_prop.get("walkScore"),
                "transit_score": detail_prop.get("transitScore"),
                "bike_score": detail_prop.get("bikeScore")
            },

            # ==================== NEARBY ====================
            "NEARBY": {
                "nearby_homes": detail_prop.get("nearbyHomes", []),
                "nearby_schools": detail_prop.get("nearbySchools", []),
                "points_of_interest": detail_prop.get("pointsOfInterest", [])
            },

            # ==================== SCHOOLS ====================
            "SCHOOLS": detail_prop.get("schools", []),

            # ==================== PRICE HISTORY ====================
            "PRICE_HISTORY": detail_prop.get("priceHistory", []),

            # ==================== TAX HISTORY ====================
            "TAX_HISTORY": detail_prop.get("taxHistory", []),

            # ==================== OPEN HOUSES ====================
            "OPEN_HOUSES": detail_prop.get("openHouses", []),

            # ==================== TOURS ====================
            "TOURS": {
                "has_3d_tour": detail_prop.get("has3DTour"),
                "has_video": detail_prop.get("hasVideo"),
                "virtual_tour_url": detail_prop.get("virtualTourUrl")
            },

            # ==================== ADDITIONAL METADATA ====================
            "METADATA": {
                "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
                "all_listing_keys": list(listing.keys()) if listing else [],
                "all_detail_keys": list(detail_prop.keys()) if detail_prop else []
            }
        }

        return result


def main():
    """Main execution."""

    # Configuration
    API_KEY = "e2477c14-214a-4165-ad95-13138b02df13"  # Active API key
    KEYWORD = "Pittsburgh, PA"
    MAX_PAGES = 1  # Change this to scrape more pages
    DELAY = 2
    SAVE_RAW_DATA = True  # Save raw API responses too

    print(f"\n{'='*80}")
    print(f"ZILLOW COMPLETE DATA SCRAPER - EXTRACT EVERYTHING")
    print(f"{'='*80}")
    print(f"Location: {KEYWORD}")
    print(f"Max Pages: {MAX_PAGES}")
    print(f"Save Raw Data: {SAVE_RAW_DATA}")
    print(f"{'='*80}\n")

    scraper = ZillowCompleteScraper(API_KEY)
    all_properties = []
    total_count = 0

    for page in range(1, MAX_PAGES + 1):
        print(f"\n📄 PAGE {page}/{MAX_PAGES}")
        print(f"{'-'*80}")

        listings, status = scraper.fetch_listings(KEYWORD, page=page)

        if status != 200:
            print(f"✗ Failed. Status: {status}")
            if status == 403:
                print("⚠ API key invalid or expired")
            break

        if not listings:
            print("ℹ No more listings")
            break

        print(f"✓ Found {len(listings)} properties\n")

        # Print available fields from first listing (diagnostic)
        if page == 1 and len(listings) > 0:
            print(f"\n*** AVAILABLE FIELDS IN LISTING ***")
            print(f"Keys: {list(listings[0].keys())}\n")

        for idx, listing in enumerate(listings, 1):
            url = listing.get("url")
            address = listing.get("addressRaw") or listing.get("address")
            price = listing.get("price")

            total_count += 1
            print(f"[{total_count}] {address}")
            print(f"    💰 ${price:,}" if price else "    💰 N/A")

            details = None
            if url:
                print(f"    ⏳ Fetching complete details...")
                details = scraper.fetch_property_details(url)

                # Print available fields from first detail response (diagnostic)
                if page == 1 and idx == 1 and details and "property" in details:
                    print(f"\n*** AVAILABLE FIELDS IN DETAILS ***")
                    print(f"Keys: {list(details['property'].keys())}\n")

                if details:
                    print(f"    ✓ Complete data retrieved")
                else:
                    print(f"    ⚠ Failed to get details")
                time.sleep(DELAY)
            else:
                print(f"    ⚠ No URL")

            complete_data = scraper.extract_complete_data(listing, details)
            all_properties.append(complete_data)
            print()

        print(f"✓ Page {page} completed")
        time.sleep(DELAY)

    # Save complete JSON
    output_file = "zillow_complete_data.json"
    print(f"\n💾 Saving data to {output_file}...")
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(all_properties, f, indent=2, ensure_ascii=False)
    print(f"✓ Saved!")

    # Display summary for each property
    print(f"\n{'#'*80}")
    print(f"COMPLETE PROPERTY DATA")
    print(f"{'#'*80}\n")

    for idx, prop in enumerate(all_properties, 1):
        header = prop.get("HEADER", {})
        coords = prop.get("COORDINATES", {})

        print(f"\n{'='*80}")
        print(f"PROPERTY {idx}/{len(all_properties)}")
        print(f"{'='*80}")
        print(f"\n💰 ${header.get('price'):,}" if header.get('price') else "N/A")
        print(f"📍 {header.get('address_full')}")
        print(f"🛏  {header.get('bedrooms')} beds | 🛁 {header.get('bathrooms')} baths | 📐 {header.get('square_footage')} sqft")
        print(f"📅 Built: {header.get('year_built')}")
        print(f"💵 Zestimate: ${header.get('zestimate'):,}" if header.get('zestimate') else "N/A")
        print(f"📊 ${header.get('price_per_sqft')}/sqft" if header.get('price_per_sqft') else "N/A")
        print(f"🌐 Coordinates: {coords.get('latitude')}, {coords.get('longitude')}")

        # Show how many fields were extracted
        listing_keys = len(prop.get("METADATA", {}).get("all_listing_keys", []))
        detail_keys = len(prop.get("METADATA", {}).get("all_detail_keys", []))
        print(f"\n📊 Data Extracted:")
        print(f"   - Listing fields: {listing_keys}")
        print(f"   - Detail fields: {detail_keys}")
        print(f"   - Total sections: {len(prop.keys())}")

        print(f"\n{'-'*80}")
        print("Full JSON structure:")
        print(json.dumps(prop, indent=2)[:2000] + "\n... (truncated, see JSON file for complete data)")

    # Summary
    print(f"\n{'*'*80}")
    print(f"FINAL SUMMARY")
    print(f"{'*'*80}")
    print(f"✓ Total Properties: {len(all_properties)}")
    print(f"✓ Location: {KEYWORD}")
    print(f"✓ Pages Scraped: {page}")
    print(f"✓ Output File: {output_file}")
    print(f"✓ All available fields extracted!")
    print(f"{'*'*80}\n")


if __name__ == "__main__":
    main()

In [None]:
import http.client
import json
import time
from urllib.parse import quote


class ZillowCompleteScraper:
    """Complete Zillow scraper matching exact Zillow page structure."""

    def __init__(self, api_key):
        self.api_key = api_key
        self.base_url = "api.hasdata.com"
        self.headers = {
            'x-api-key': api_key,
            'Content-Type': "application/json"
        }

    def _make_request(self, endpoint, max_retries=3):
        """Make HTTP request with retry logic."""
        for attempt in range(max_retries):
            conn = None
            try:
                conn = http.client.HTTPSConnection(self.base_url, timeout=30)
                conn.request("GET", endpoint, headers=self.headers)
                res = conn.getresponse()
                data = res.read()

                if res.status == 200:
                    return json.loads(data.decode("utf-8")), res.status
                elif res.status == 429:
                    wait_time = 2 ** attempt
                    print(f"  ⚠ Rate limit. Waiting {wait_time}s...")
                    time.sleep(wait_time)
                    continue
                else:
                    print(f"  ✗ Status {res.status}: {data.decode('utf-8')[:200]}")
                    return None, res.status
            except Exception as e:
                print(f"  ✗ Error: {e}")
                if attempt + 1 < max_retries:
                    time.sleep(2 ** attempt)
            finally:
                if conn:
                    conn.close()
        return None, 0

    def fetch_listings(self, keyword="Pittsburgh, PA", listing_type="forSale", page=1):
        """Fetch Zillow listings."""
        encoded_keyword = quote(keyword)
        endpoint = f"/scrape/zillow/listing?keyword={encoded_keyword}&type={listing_type}&page={page}"

        data, status = self._make_request(endpoint)
        if data and "properties" in data:
            return data["properties"], status
        return [], status

    def fetch_property_details(self, property_url):
        """Fetch detailed property information."""
        if not property_url:
            return None

        encoded_url = quote(property_url, safe='')
        endpoint = f"/scrape/zillow/property?url={encoded_url}&extractAgentEmails=true"

        data, status = self._make_request(endpoint)
        return data if status == 200 else None

    def extract_zillow_format(self, listing, details):
        """Extract data matching exact Zillow page format from images."""

        detail_prop = details.get("property", {}) if details else {}

        # Basic data
        beds = listing.get("beds") or detail_prop.get("bedrooms")
        baths = listing.get("baths") or detail_prop.get("bathrooms")
        sqft = listing.get("area") or detail_prop.get("livingArea")
        lot_size = listing.get("lotAreaValue")
        lot_unit = listing.get("lotAreaUnits", "Square Feet")

        # Get room dimensions
        rooms = detail_prop.get("rooms", {})

        # Format the data exactly as shown in Zillow
        property_data = {
            "Facts & features": {
                "Interior": {
                    "Bedrooms & bathrooms": {
                        "Bedrooms": beds,
                        "Bathrooms": baths,
                        "Full bathrooms": detail_prop.get("fullBathrooms"),
                        "1/2 bathrooms": detail_prop.get("halfBathrooms")
                    },
                    "Primary bedroom": {
                        "Dimensions": rooms.get("primaryBedroom", {}).get("dimensions")
                    },
                    "Bedroom 2": {
                        "Dimensions": rooms.get("bedroom2", {}).get("dimensions")
                    },
                    "Bedroom 3": {
                        "Dimensions": rooms.get("bedroom3", {}).get("dimensions")
                    },
                    "Den": {
                        "Dimensions": rooms.get("den", {}).get("dimensions")
                    },
                    "Kitchen": {
                        "Dimensions": rooms.get("kitchen", {}).get("dimensions")
                    },
                    "Living room": {
                        "Dimensions": rooms.get("livingRoom", {}).get("dimensions")
                    },
                    "Features": {
                        "Flooring": ", ".join(detail_prop.get("flooring", [])) if detail_prop.get("flooring") else None,
                        "Has basement": "Yes" if detail_prop.get("hasBasement") else "No" if detail_prop.get("hasBasement") is not None else None
                    },
                    "Interior area": {
                        "Total structure area": detail_prop.get("totalStructureArea") or sqft,
                        "Total interior livable area": f"{sqft} sqft" if sqft else None
                    }
                },
                "Property": {
                    "Parking": {
                        "Total spaces": detail_prop.get("parkingSpaces"),
                        "Parking features": ", ".join(detail_prop.get("parkingFeatures", [])) if detail_prop.get("parkingFeatures") else None,
                        "Has attached garage": "Yes" if detail_prop.get("hasAttachedGarage") else "No" if detail_prop.get("hasAttachedGarage") is not None else None
                    },
                    "Features": {
                        "Levels": detail_prop.get("levels"),
                        "Stories": detail_prop.get("stories"),
                        "Pool features": detail_prop.get("poolFeatures") or "None"
                    },
                    "Lot": {
                        "Size": f"{lot_size:,.0f} {lot_unit}" if lot_size else detail_prop.get("lotSize"),
                        "Dimensions": detail_prop.get("lotDimensions")
                    },
                    "Details": {
                        "Parcel number": detail_prop.get("parcelNumber"),
                        "Special conditions": detail_prop.get("specialConditions")
                    }
                },
                "Construction": {
                    "Type & style": {
                        "Home type": detail_prop.get("homeType") or listing.get("homeType"),
                        "Architectural style": detail_prop.get("architecturalStyle"),
                        "Property subtype": detail_prop.get("propertySubType")
                    },
                    "Materials": ", ".join(detail_prop.get("constructionMaterials", [])) if detail_prop.get("constructionMaterials") else None,
                    "Condition": detail_prop.get("condition"),
                    "Year built": detail_prop.get("yearBuilt")
                },
                "Utilities & green energy": {
                    "Sewer": detail_prop.get("sewer"),
                    "Water": detail_prop.get("waterSource") or detail_prop.get("water")
                },
                "Community & HOA": {
                    "Location": {
                        "Region": listing.get("address", {}).get("city")
                    }
                }
            },
            "Financial & listing details": {
                "Price per square foot": f"${detail_prop.get('pricePerSquareFoot')}/sqft" if detail_prop.get("pricePerSquareFoot") else None,
                "Tax assessed value": f"${detail_prop.get('taxAssessedValue'):,}" if detail_prop.get("taxAssessedValue") else None,
                "Annual tax amount": f"${detail_prop.get('annualTax'):,}" if detail_prop.get("annualTax") else None,
                "Date on market": detail_prop.get("dateOnMarket")
            },

            # Additional comprehensive data
            "Property Details": {
                "price": listing.get("price"),
                "address": listing.get("addressRaw"),
                "bedrooms": beds,
                "bathrooms": baths,
                "square_footage": sqft,
                "property_type": listing.get("homeType") or detail_prop.get("homeType"),
                "year_built": detail_prop.get("yearBuilt"),
                "lot_size": f"{lot_size:,.0f} {lot_unit}" if lot_size else None,
                "zestimate": listing.get("zestimate") or detail_prop.get("zestimate"),
                "rent_zestimate": listing.get("rentZestimate"),
                "estimated_monthly_payment": detail_prop.get("monthlyEstimate"),
                "hoa_fee": detail_prop.get("hoaFee"),
                "coordinates": {
                    "latitude": listing.get("lat"),
                    "longitude": listing.get("lng")
                }
            },

            "Description": detail_prop.get("description"),
            "What's special": detail_prop.get("specialFeatures", []),

            "Listing Information": {
                "property_id": listing.get("id"),
                "property_url": listing.get("url"),
                "status": listing.get("status"),
                "days_on_zillow": listing.get("daysOnZillow"),
                "views": detail_prop.get("views"),
                "saves": detail_prop.get("saves"),
                "listing_updated": detail_prop.get("listingUpdated"),
                "last_checked": detail_prop.get("lastChecked")
            },

            "Agent Information": {
                "name": detail_prop.get("listingAgent", {}).get("name"),
                "phone": detail_prop.get("listingAgent", {}).get("phone"),
                "email": detail_prop.get("listingAgent", {}).get("email"),
                "brokerage": detail_prop.get("listingAgent", {}).get("brokerage")
            },

            "MLS Information": {
                "mls_number": detail_prop.get("mlsNumber"),
                "mls_source": detail_prop.get("mlsSource")
            },

            "Schools": detail_prop.get("schools", []),
            "Price History": detail_prop.get("priceHistory", []),
            "Tax History": detail_prop.get("taxHistory", []),

            "Additional Features": {
                "appliances": detail_prop.get("appliances", []),
                "kitchen_features": detail_prop.get("kitchenFeatures", []),
                "bathroom_features": detail_prop.get("bathroomFeatures", []),
                "heating": detail_prop.get("heating"),
                "cooling": detail_prop.get("cooling"),
                "fireplace_count": detail_prop.get("fireplaceCount"),
                "basement": detail_prop.get("basement"),
                "pool": detail_prop.get("hasPool"),
                "walk_score": detail_prop.get("walkScore"),
                "transit_score": detail_prop.get("transitScore"),
                "bike_score": detail_prop.get("bikeScore")
            },

            "Images": {
                "main_image": listing.get("image"),
                "all_photos": detail_prop.get("photos", [])
            }
        }

        return property_data


def display_property(prop, index, total):
    """Display property in readable format matching Zillow structure."""
    details = prop.get("Property Details", {})
    facts = prop.get("Facts & features", {})

    print(f"\n{'='*80}")
    print(f"PROPERTY {index}/{total}")
    print(f"{'='*80}\n")

    # Header section
    price = details.get("price")
    print(f"${price:,}" if price else "Price: N/A")
    print(f"{details.get('address')}")
    print(f"{details.get('bedrooms')}beds")
    print(f"{details.get('bathrooms')}baths")
    print(f"{details.get('square_footage'):,}sqft" if details.get('square_footage') else "N/A sqft")

    monthly = details.get("estimated_monthly_payment")
    if monthly:
        print(f"Est.: ${monthly:,}/mo")

    print(f"{details.get('property_type')}")
    year = details.get('year_built')
    if year:
        print(f"Built in {year}")

    lot = details.get('lot_size')
    if lot:
        print(f"{lot} Lot")

    zest = details.get('zestimate')
    if zest:
        print(f"${zest:,} Zestimate®")

    coords = details.get('coordinates', {})
    if coords.get('latitude') and coords.get('longitude'):
        print(f"Coordinates: {coords['latitude']}, {coords['longitude']}")

    # What's special
    special = prop.get("What's special", [])
    if special:
        print(f"\nWhat's special")
        for feature in special:
            print(f"  • {feature}")

    # Description
    desc = prop.get("Description")
    if desc:
        print(f"\nDescription:")
        print(desc[:500] + "..." if len(desc) > 500 else desc)

    # Facts & features
    print(f"\n{'='*80}")
    print("Facts & features")
    print(f"{'='*80}")

    # Interior
    interior = facts.get("Interior", {})
    if interior:
        print(f"\nInterior\n")

        bed_bath = interior.get("Bedrooms & bathrooms", {})
        if any(bed_bath.values()):
            print("Bedrooms & bathrooms")
            for key, val in bed_bath.items():
                if val is not None:
                    print(f"  • {key}: {val}")

        # Room dimensions
        for room in ["Primary bedroom", "Bedroom 2", "Bedroom 3", "Den", "Kitchen", "Living room"]:
            room_data = interior.get(room, {})
            dim = room_data.get("Dimensions")
            if dim:
                print(f"\n{room}")
                print(f"  • Dimensions: {dim}")

        # Features
        features = interior.get("Features", {})
        if any(features.values()):
            print(f"\nFeatures")
            for key, val in features.items():
                if val:
                    print(f"  • {key}: {val}")

        # Interior area
        area = interior.get("Interior area", {})
        if any(area.values()):
            print(f"\nInterior area")
            for key, val in area.items():
                if val:
                    print(f"  • {key}: {val}")

    # Property
    property_section = facts.get("Property", {})
    if property_section:
        print(f"\n{'='*80}")
        print("Property\n")

        parking = property_section.get("Parking", {})
        if any(parking.values()):
            print("Parking")
            for key, val in parking.items():
                if val is not None:
                    print(f"  • {key}: {val}")

        features = property_section.get("Features", {})
        if any(features.values()):
            print(f"\nFeatures")
            for key, val in features.items():
                if val:
                    print(f"  • {key}: {val}")

        lot = property_section.get("Lot", {})
        if any(lot.values()):
            print(f"\nLot")
            for key, val in lot.items():
                if val:
                    print(f"  • {key}: {val}")

        details_section = property_section.get("Details", {})
        if any(details_section.values()):
            print(f"\nDetails")
            for key, val in details_section.items():
                if val:
                    print(f"  • {key}: {val}")

    # Construction
    construction = facts.get("Construction", {})
    if construction:
        print(f"\n{'='*80}")
        print("Construction\n")

        type_style = construction.get("Type & style", {})
        if any(type_style.values()):
            print("Type & style")
            for key, val in type_style.items():
                if val:
                    print(f"  • {key}: {val}")

        materials = construction.get("Materials")
        if materials:
            print(f"\nMaterials")
            print(f"  • {materials}")

        condition = construction.get("Condition")
        if condition:
            print(f"\nCondition")
            print(f"  • {condition}")

        year = construction.get("Year built")
        if year:
            print(f"  • Year built: {year}")

    # Utilities
    utilities = facts.get("Utilities & green energy", {})
    if any(utilities.values()):
        print(f"\n{'='*80}")
        print("Utilities & green energy\n")
        for key, val in utilities.items():
            if val:
                print(f"  • {key}: {val}")

    # Community & HOA
    community = facts.get("Community & HOA", {})
    if community:
        print(f"\n{'='*80}")
        print("Community & HOA\n")
        location = community.get("Location", {})
        if any(location.values()):
            print("Location")
            for key, val in location.items():
                if val:
                    print(f"  • {key}: {val}")

    # Financial & listing details
    financial = prop.get("Financial & listing details", {})
    if any(financial.values()):
        print(f"\n{'='*80}")
        print("Financial & listing details\n")
        for key, val in financial.items():
            if val:
                print(f"  • {key}: {val}")

    print(f"\n{'='*80}\n")


def main():
    """Main execution."""

    # Configuration
    API_KEY = "e2477c14-214a-4165-ad95-13138b02df13"
    KEYWORD = "Pittsburgh, PA"
    MAX_PAGES = 1
    DELAY = 2

    print(f"\n{'='*80}")
    print(f"ZILLOW SCRAPER - EXACT FORMAT EXTRACTION")
    print(f"{'='*80}")
    print(f"Location: {KEYWORD}")
    print(f"Max Pages: {MAX_PAGES}")
    print(f"{'='*80}\n")

    scraper = ZillowCompleteScraper(API_KEY)
    all_properties = []
    total_count = 0

    for page in range(1, MAX_PAGES + 1):
        print(f"\n📄 PAGE {page}/{MAX_PAGES}")
        print(f"{'-'*80}")

        listings, status = scraper.fetch_listings(KEYWORD, page=page)

        if status != 200:
            print(f"✗ Failed. Status: {status}")
            break

        if not listings:
            print("ℹ No more listings")
            break

        print(f"✓ Found {len(listings)} properties\n")

        for idx, listing in enumerate(listings, 1):
            url = listing.get("url")
            address = listing.get("addressRaw") or listing.get("address")
            price = listing.get("price")

            total_count += 1
            print(f"[{total_count}] {address}")
            print(f"    💰 ${price:,}" if price else "    💰 N/A")

            details = None
            if url:
                print(f"    ⏳ Fetching details...")
                details = scraper.fetch_property_details(url)
                if details:
                    print(f"    ✓ Complete")
                else:
                    print(f"    ⚠ Failed")
                time.sleep(DELAY)
            else:
                print(f"    ⚠ No URL")

            property_data = scraper.extract_zillow_format(listing, details)
            all_properties.append(property_data)
            print()

        print(f"✓ Page {page} completed")
        time.sleep(DELAY)

    # Save JSON
    output_file = "zillow_properties_exact_format.json"
    print(f"\n💾 Saving to {output_file}...")
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(all_properties, f, indent=2, ensure_ascii=False)
    print(f"✓ Saved!")

    # Display all properties
    print(f"\n{'#'*80}")
    print(f"ALL PROPERTIES - ZILLOW FORMAT")
    print(f"{'#'*80}")

    for idx, prop in enumerate(all_properties, 1):
        display_property(prop, idx, len(all_properties))

    # Summary
    print(f"\n{'*'*80}")
    print(f"SCRAPING COMPLETE")
    print(f"{'*'*80}")
    print(f"✓ Total Properties: {len(all_properties)}")
    print(f"✓ Location: {KEYWORD}")
    print(f"✓ Output File: {output_file}")
    print(f"{'*'*80}\n")


if __name__ == "__main__":
    main()