In [3]:
import http.client
import json
import time
from urllib.parse import quote


class ZillowCompleteScraper:
    """Complete Zillow scraper with exact key names and comprehensive data."""
    
    def __init__(self, api_key):
        self.api_key = api_key
        self.base_url = "api.hasdata.com"
        self.headers = {
            'x-api-key': api_key,
            'Content-Type': "application/json"
        }
    
    def _make_request(self, endpoint, max_retries=3):
        """Make HTTP request with retry logic."""
        for attempt in range(max_retries):
            conn = None
            try:
                conn = http.client.HTTPSConnection(self.base_url, timeout=30)
                conn.request("GET", endpoint, headers=self.headers)
                res = conn.getresponse()
                data = res.read()
                
                if res.status == 200:
                    return json.loads(data.decode("utf-8")), res.status
                elif res.status == 429:
                    wait_time = 2 ** attempt
                    print(f"  ⚠ Rate limit. Waiting {wait_time}s...")
                    time.sleep(wait_time)
                    continue
                else:
                    print(f"  ✗ Status {res.status}: {data.decode('utf-8')[:200]}")
                    return None, res.status
            except Exception as e:
                print(f"  ✗ Error: {e}")
                if attempt + 1 < max_retries:
                    time.sleep(2 ** attempt)
            finally:
                if conn:
                    conn.close()
        return None, 0
    
    def fetch_listings(self, keyword="Pittsburgh, PA", listing_type="forSale", page=1):
        """Fetch Zillow listings."""
        encoded_keyword = quote(keyword)
        endpoint = f"/scrape/zillow/listing?keyword={encoded_keyword}&type={listing_type}&page={page}"
        
        data, status = self._make_request(endpoint)
        if data and "properties" in data:
            return data["properties"], status
        return [], status
    
    def fetch_property_details(self, property_url):
        """Fetch detailed property information."""
        if not property_url:
            return None
        
        encoded_url = quote(property_url, safe='')
        endpoint = f"/scrape/zillow/property?url={encoded_url}&extractAgentEmails=true"
        
        data, status = self._make_request(endpoint)
        return data if status == 200 else None
    
    def extract_complete_data(self, listing, details):
        """Extract ALL data with exact key names."""
        
        detail_prop = details.get("property", {}) if details else {}
        
        # Basic data
        beds = listing.get("beds") or detail_prop.get("bedrooms")
        baths = listing.get("baths") or detail_prop.get("bathrooms")
        sqft = listing.get("area") or detail_prop.get("livingArea")
        lot_size = listing.get("lotAreaValue")
        lot_unit = listing.get("lotAreaUnits", "Square Feet")
        price = listing.get("price")
        zestimate = listing.get("zestimate") or detail_prop.get("zestimate")
        
        # Get room dimensions
        rooms = detail_prop.get("rooms", {})
        
        # Calculate price per sqft if not available
        price_sqft = detail_prop.get("pricePerSquareFoot")
        if not price_sqft and price and sqft and sqft > 0:
            price_sqft = round(price / sqft)
        
        property_data = {
            # ==================== BASIC PROPERTY INFO ====================
            "Property_ID": listing.get("id"),
            "Property_URL": listing.get("url"),
            "Property_Status": listing.get("status"),
            "Currency": listing.get("currency", "$"),
            
            # ==================== HEADER INFORMATION ====================
            "Price": f"${price:,}" if price else None,
            "Address": listing.get("addressRaw"),
            "Street": listing.get("address", {}).get("street"),
            "City": listing.get("address", {}).get("city"),
            "State": listing.get("address", {}).get("state"),
            "Zipcode": listing.get("address", {}).get("zipcode"),
            "Latitude": listing.get("lat"),
            "Longitude": listing.get("lng"),
            
            "Bedrooms": f"{beds}beds" if beds else None,
            "Bathrooms": f"{baths}baths" if baths else None,
            "Square_Footage": f"{sqft:,}sqft" if sqft else None,
            "Estimated_Monthly_Payment": f"Est.: ${detail_prop.get('monthlyEstimate'):,}/mo" if detail_prop.get('monthlyEstimate') else None,
            "Property_Type": listing.get("homeType") or detail_prop.get("homeType"),
            "Year_Built": f"Built in {detail_prop.get('yearBuilt')}" if detail_prop.get('yearBuilt') else None,
            "Lot_Size": f"{lot_size:,.0f} {lot_unit} Lot" if lot_size else detail_prop.get("lotSize"),
            "Zestimate": f"${zestimate:,} Zestimate®" if zestimate else None,
            "Price_Per_Sqft": f"${price_sqft}/sqft" if price_sqft else None,
            "HOA_Fee": f"${detail_prop.get('hoaFee'):,} HOA" if detail_prop.get('hoaFee') else "$-- HOA",
            "Rent_Zestimate": f"${listing.get('rentZestimate'):,}/mo" if listing.get('rentZestimate') else None,
            
            # ==================== IMAGES & MEDIA ====================
            "Main_Image": listing.get("image"),
            "Image_Source": listing.get("imgSrc"),
            "All_Photos": detail_prop.get("photos", []),
            "Photo_Count": detail_prop.get("photoCount"),
            "Has_3D_Tour": detail_prop.get("has3DTour"),
            "Has_Video": detail_prop.get("hasVideo"),
            "Virtual_Tour_URL": detail_prop.get("virtualTourUrl"),
            
            # ==================== WHAT'S SPECIAL ====================
            "Whats_Special": detail_prop.get("specialFeatures", []),
            
            # ==================== DESCRIPTION ====================
            "Description": detail_prop.get("description"),
            
            # ==================== INTERIOR (EXACT KEY NAMES) ====================
            "Interior": {
                "Bedrooms": beds,
                "Bathrooms": baths,
                "Full_bathrooms": detail_prop.get("fullBathrooms"),
                "Half_bathrooms": detail_prop.get("halfBathrooms"),
                "Primary_bedroom_dimensions": rooms.get("primaryBedroom", {}).get("dimensions"),
                "Bedroom_2_dimensions": rooms.get("bedroom2", {}).get("dimensions"),
                "Bedroom_3_dimensions": rooms.get("bedroom3", {}).get("dimensions"),
                "Den_dimensions": rooms.get("den", {}).get("dimensions"),
                "Kitchen_dimensions": rooms.get("kitchen", {}).get("dimensions"),
                "Living_room_dimensions": rooms.get("livingRoom", {}).get("dimensions"),
                "Flooring": ", ".join(detail_prop.get("flooring", [])) if detail_prop.get("flooring") else None,
                "Has_basement": "Yes" if detail_prop.get("hasBasement") else "No" if detail_prop.get("hasBasement") is not None else None,
                "Total_structure_area": detail_prop.get("totalStructureArea") or sqft,
                "Total_interior_livable_area": f"{sqft} sqft" if sqft else None
            },
            
            # ==================== PROPERTY (EXACT KEY NAMES) ====================
            "Property": {
                "Total_spaces": detail_prop.get("parkingSpaces"),
                "Parking_features": ", ".join(detail_prop.get("parkingFeatures", [])) if detail_prop.get("parkingFeatures") else None,
                "Has_attached_garage": "Yes" if detail_prop.get("hasAttachedGarage") else "No" if detail_prop.get("hasAttachedGarage") is not None else None,
                "Levels": detail_prop.get("levels"),
                "Stories": detail_prop.get("stories"),
                "Pool_features": detail_prop.get("poolFeatures") or "None",
                "Lot_size": f"{lot_size:,.0f} {lot_unit}" if lot_size else detail_prop.get("lotSize"),
                "Lot_dimensions": detail_prop.get("lotDimensions"),
                "Parcel_number": detail_prop.get("parcelNumber"),
                "Special_conditions": detail_prop.get("specialConditions")
            },
            
            # ==================== CONSTRUCTION (EXACT KEY NAMES) ====================
            "Construction": {
                "Home_type": detail_prop.get("homeType") or listing.get("homeType"),
                "Architectural_style": detail_prop.get("architecturalStyle"),
                "Property_subtype": detail_prop.get("propertySubType"),
                "Materials": ", ".join(detail_prop.get("constructionMaterials", [])) if detail_prop.get("constructionMaterials") else None,
                "Condition": detail_prop.get("condition"),
                "Year_built": detail_prop.get("yearBuilt")
            },
            
            # ==================== UTILITIES & GREEN ENERGY (EXACT KEY NAMES) ====================
            "Utilities_and_Green_Energy": {
                "Sewer": detail_prop.get("sewer"),
                "Water": detail_prop.get("waterSource") or detail_prop.get("water"),
                "Heating": detail_prop.get("heating"),
                "Cooling": detail_prop.get("cooling"),
                "Electric": detail_prop.get("electric"),
                "Gas": detail_prop.get("gas")
            },
            
            # ==================== COMMUNITY & HOA (EXACT KEY NAMES) ====================
            "Community_and_HOA": {
                "Region": listing.get("address", {}).get("city"),
                "County": detail_prop.get("county"),
                "Township": detail_prop.get("township"),
                "Subdivision": detail_prop.get("subdivision"),
                "Neighborhood": detail_prop.get("neighborhood"),
                "School_District": detail_prop.get("schoolDistrict"),
                "HOA_Fee": f"${detail_prop.get('hoaFee'):,}" if detail_prop.get('hoaFee') else None,
                "HOA_Fee_Frequency": detail_prop.get("hoaFeeFrequency"),
                "Association_Amenities": detail_prop.get("associationAmenities", [])
            },
            
            # ==================== FINANCIAL & LISTING DETAILS (EXACT KEY NAMES) ====================
            "Financial_and_Listing_Details": {
                "Price_per_square_foot": f"${price_sqft}/sqft" if price_sqft else None,
                "Tax_assessed_value": f"${detail_prop.get('taxAssessedValue'):,}" if detail_prop.get('taxAssessedValue') else None,
                "Annual_tax_amount": f"${detail_prop.get('annualTax'):,}" if detail_prop.get('annualTax') else None,
                "Date_on_market": detail_prop.get("dateOnMarket"),
                "Original_Price": f"${detail_prop.get('originalPrice'):,}" if detail_prop.get('originalPrice') else None,
                "Price_Cut": detail_prop.get("priceCut"),
                "Zestimate_Low": f"${detail_prop.get('zestimateRange', {}).get('low'):,}" if detail_prop.get('zestimateRange', {}).get('low') else None,
                "Zestimate_High": f"${detail_prop.get('zestimateRange', {}).get('high'):,}" if detail_prop.get('zestimateRange', {}).get('high') else None,
                "Tax_Year": detail_prop.get("taxYear")
            },
            
            # ==================== LISTING ACTIVITY ====================
            "Listing_Activity": {
                "Days_on_Zillow": listing.get("daysOnZillow"),
                "Views": detail_prop.get("views"),
                "Saves": detail_prop.get("saves"),
                "Is_Featured": listing.get("isFeatured"),
                "Listing_Updated": detail_prop.get("listingUpdated"),
                "Last_Checked": detail_prop.get("lastChecked"),
                "Date_Sold": detail_prop.get("dateSold")
            },
            
            # ==================== AGENT INFORMATION ====================
            "Listing_Agent": {
                "Name": detail_prop.get("listingAgent", {}).get("name"),
                "Phone": detail_prop.get("listingAgent", {}).get("phone"),
                "Email": detail_prop.get("listingAgent", {}).get("email"),
                "Brokerage": detail_prop.get("listingAgent", {}).get("brokerage"),
                "Broker_Phone": detail_prop.get("listingAgent", {}).get("brokerPhone")
            },
            
            # ==================== MLS INFORMATION ====================
            "MLS_Information": {
                "MLS_Number": detail_prop.get("mlsNumber"),
                "MLS_Source": detail_prop.get("mlsSource"),
                "MLS_ID": detail_prop.get("mlsId"),
                "Listing_Provider": detail_prop.get("listingProvider")
            },
            
            # ==================== LOCATION SCORES ====================
            "Location_Scores": {
                "Walk_Score": detail_prop.get("walkScore"),
                "Transit_Score": detail_prop.get("transitScore"),
                "Bike_Score": detail_prop.get("bikeScore")
            },
            
            # ==================== ADDITIONAL FEATURES ====================
            "Additional_Features": {
                "Appliances": detail_prop.get("appliances", []),
                "Kitchen_Features": detail_prop.get("kitchenFeatures", []),
                "Bathroom_Features": detail_prop.get("bathroomFeatures", []),
                "Laundry_Features": detail_prop.get("laundryFeatures", []),
                "Fireplace_Count": detail_prop.get("fireplaceCount"),
                "Fireplace_Type": detail_prop.get("fireplaceType"),
                "Fireplace_Features": detail_prop.get("fireplaceFeatures", []),
                "Basement": detail_prop.get("basement"),
                "Basement_Sqft": detail_prop.get("basementSqft"),
                "Has_Pool": detail_prop.get("hasPool"),
                "Pool_Type": detail_prop.get("poolType"),
                "Spa_Features": detail_prop.get("spaFeatures", []),
                "Patio_and_Porch_Features": detail_prop.get("patioAndPorchFeatures", []),
                "Fencing": detail_prop.get("fencing", []),
                "Landscaping": detail_prop.get("landscaping", []),
                "Window_Features": detail_prop.get("windowFeatures", []),
                "Door_Features": detail_prop.get("doorFeatures", []),
                "Roof_Type": detail_prop.get("roofType"),
                "Foundation": detail_prop.get("foundation", []),
                "Exterior": detail_prop.get("exterior", [])
            },
            
            # ==================== SCHOOLS ====================
            "Schools": detail_prop.get("schools", []),
            
            # ==================== PRICE HISTORY ====================
            "Price_History": detail_prop.get("priceHistory", []),
            
            # ==================== TAX HISTORY ====================
            "Tax_History": detail_prop.get("taxHistory", []),
            
            # ==================== NEARBY ====================
            "Nearby": {
                "Nearby_Homes": detail_prop.get("nearbyHomes", []),
                "Nearby_Schools": detail_prop.get("nearbySchools", []),
                "Points_of_Interest": detail_prop.get("pointsOfInterest", [])
            },
            
            # ==================== OPEN HOUSES ====================
            "Open_Houses": detail_prop.get("openHouses", []),
            
            # ==================== RAW DATA ====================
            "RAW_LISTING_DATA": listing,
            "RAW_DETAILS_DATA": details
        }
        
        return property_data


def main():
    """Main execution."""
    
    # Configuration
    API_KEY = "e2477c14-214a-4165-ad95-13138b02df13"
    KEYWORD = "Pittsburgh, PA"
    MAX_PAGES = 1  # Change this to scrape more pages
    DELAY = 2
    
    print(f"\n{'='*80}")
    print(f"ZILLOW COMPLETE SCRAPER - ALL DATA + EXACT KEY NAMES")
    print(f"{'='*80}")
    print(f"Location: {KEYWORD}")
    print(f"Max Pages: {MAX_PAGES}")
    print(f"{'='*80}\n")
    
    scraper = ZillowCompleteScraper(API_KEY)
    all_properties = []
    total_count = 0
    
    for page in range(1, MAX_PAGES + 1):
        print(f"\n📄 PAGE {page}/{MAX_PAGES}")
        print(f"{'-'*80}")
        
        listings, status = scraper.fetch_listings(KEYWORD, page=page)
        
        if status != 200:
            print(f"✗ Failed. Status: {status}")
            break
        
        if not listings:
            print("ℹ No more listings")
            break
        
        print(f"✓ Found {len(listings)} properties\n")
        
        for idx, listing in enumerate(listings, 1):
            url = listing.get("url")
            address = listing.get("addressRaw") or listing.get("address")
            price = listing.get("price")
            
            total_count += 1
            print(f"[{total_count}] {address}")
            print(f"    💰 ${price:,}" if price else "    💰 N/A")
            
            details = None
            if url:
                print(f"    ⏳ Fetching complete details...")
                details = scraper.fetch_property_details(url)
                if details:
                    print(f"    ✓ Complete")
                else:
                    print(f"    ⚠ Failed")
                time.sleep(DELAY)
            else:
                print(f"    ⚠ No URL")
            
            complete_data = scraper.extract_complete_data(listing, details)
            all_properties.append(complete_data)
            print()
        
        print(f"✓ Page {page} completed")
        time.sleep(DELAY)
    
    # Save JSON
    output_file = "zillow_complete_properties.json"
    print(f"\n💾 Saving to {output_file}...")
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(all_properties, f, indent=2, ensure_ascii=False)
    print(f"✓ Saved!")
    
    # Display properties
    print(f"\n{'#'*80}")
    print(f"COMPLETE PROPERTY DATA")
    print(f"{'#'*80}")
    
    for idx, prop in enumerate(all_properties, 1):
        print(f"\n{'='*80}")
        print(f"PROPERTY {idx}/{len(all_properties)}")
        print(f"{'='*80}\n")
        
        # Header
        print(prop.get('Price') or 'N/A')
        print(prop.get('Address'))
        print(prop.get('Bedrooms') or 'N/A')
        print(prop.get('Bathrooms') or 'N/A')
        print(prop.get('Square_Footage') or 'N/A')
        print(prop.get('Estimated_Monthly_Payment') or 'N/A')
        print(prop.get('Property_Type') or 'N/A')
        print(prop.get('Year_Built') or 'N/A')
        print(prop.get('Lot_Size') or 'N/A')
        print(prop.get('Zestimate') or 'N/A')
        print(prop.get('Price_Per_Sqft') or 'N/A')
        print(prop.get('HOA_Fee') or 'N/A')
        
        print(f"\n📍 Coordinates: {prop.get('Latitude')}, {prop.get('Longitude')}")
        print(f"🔗 URL: {prop.get('Property_URL')}")
        print(f"🖼️  Main Image: {prop.get('Main_Image')}")
        
        # Interior
        print(f"\n{'='*80}")
        print("🏠 INTERIOR")
        print(f"{'='*80}")
        interior = prop.get('Interior', {})
        for key, val in interior.items():
            if val:
                print(f"  • {key.replace('_', ' ')}: {val}")
        
        # Property
        print(f"\n{'='*80}")
        print("🏡 PROPERTY")
        print(f"{'='*80}")
        property_data = prop.get('Property', {})
        for key, val in property_data.items():
            if val:
                print(f"  • {key.replace('_', ' ')}: {val}")
        
        # Construction
        print(f"\n{'='*80}")
        print("🔨 CONSTRUCTION")
        print(f"{'='*80}")
        construction = prop.get('Construction', {})
        for key, val in construction.items():
            if val:
                print(f"  • {key.replace('_', ' ')}: {val}")
        
        # Utilities
        print(f"\n{'='*80}")
        print("⚡ UTILITIES & GREEN ENERGY")
        print(f"{'='*80}")
        utilities = prop.get('Utilities_and_Green_Energy', {})
        for key, val in utilities.items():
            if val:
                print(f"  • {key.replace('_', ' ')}: {val}")
        
        # Community & HOA
        print(f"\n{'='*80}")
        print("🏘️  COMMUNITY & HOA")
        print(f"{'='*80}")
        community = prop.get('Community_and_HOA', {})
        for key, val in community.items():
            if val and not isinstance(val, list):
                print(f"  • {key.replace('_', ' ')}: {val}")
        
        # Financial & Listing Details
        print(f"\n{'='*80}")
        print("💰 FINANCIAL & LISTING DETAILS")
        print(f"{'='*80}")
        financial = prop.get('Financial_and_Listing_Details', {})
        for key, val in financial.items():
            if val:
                print(f"  • {key.replace('_', ' ')}: {val}")
        
        print(f"\n{'='*80}\n")
    
    # Summary
    print(f"\n{'*'*80}")
    print(f"SCRAPING COMPLETE")
    print(f"{'*'*80}")
    print(f"✓ Total Properties: {len(all_properties)}")
    print(f"✓ Location: {KEYWORD}")
    print(f"✓ Output File: {output_file}")
    print(f"✓ All data extracted with exact key names!")
    print(f"{'*'*80}\n")


if __name__ == "__main__":
    main()


ZILLOW COMPLETE SCRAPER - ALL DATA + EXACT KEY NAMES
Location: Pittsburgh, PA
Max Pages: 1


📄 PAGE 1/1
--------------------------------------------------------------------------------
✓ Found 41 properties

[1] 613 Surfside Dr, Pittsburgh, PA 15239
    💰 $299,900
    ⏳ Fetching complete details...
  ✗ Status 403: {"status":"error","message":"Insufficient credits. Your current plan has run out of credits. Please renew your plan to continue using the API."}
    ⚠ Failed

[2] 552 N Neville St APT 21, Pittsburgh, PA 15213
    💰 $199,000
    ⏳ Fetching complete details...
  ✗ Status 403: {"status":"error","message":"Insufficient credits. Your current plan has run out of credits. Please renew your plan to continue using the API."}
    ⚠ Failed

[3] 103 Mayer Dr, Pittsburgh, PA 15237
    💰 $310,000
    ⏳ Fetching complete details...
  ✗ Status 403: {"status":"error","message":"Insufficient credits. Your current plan has run out of credits. Please renew your plan to continue using the API."}

In [None]:
import http.client
import json
import time
from urllib.parse import quote


class ZillowCompleteScraper:
    """Complete Zillow scraper with comprehensive extraction."""
    
    def __init__(self, api_key):
        self.api_key = api_key
        self.base_url = "api.hasdata.com"
        self.headers = {
            'x-api-key': api_key,
            'Content-Type': "application/json"
        }
    
    def _make_request(self, endpoint, max_retries=3):
        """Make HTTP request with retry logic."""
        for attempt in range(max_retries):
            conn = None
            try:
                conn = http.client.HTTPSConnection(self.base_url, timeout=30)
                conn.request("GET", endpoint, headers=self.headers)
                res = conn.getresponse()
                data = res.read()
                
                if res.status == 200:
                    return json.loads(data.decode("utf-8")), res.status
                elif res.status == 429:
                    wait_time = 2 ** attempt
                    print(f"  ⚠ Rate limit. Waiting {wait_time}s...")
                    time.sleep(wait_time)
                    continue
                else:
                    print(f"  ✗ Status {res.status}")
                    return None, res.status
            except Exception as e:
                print(f"  ✗ Error: {e}")
                if attempt + 1 < max_retries:
                    time.sleep(2 ** attempt)
            finally:
                if conn:
                    conn.close()
        return None, 0
    
    def fetch_listings(self, keyword="Pittsburgh, PA", listing_type="forSale", page=1):
        """Fetch Zillow listings."""
        encoded_keyword = quote(keyword)
        endpoint = f"/scrape/zillow/listing?keyword={encoded_keyword}&type={listing_type}&page={page}"
        
        data, status = self._make_request(endpoint)
        if data and "properties" in data:
            return data["properties"], status
        return [], status
    
    def fetch_property_details(self, property_url):
        """Fetch detailed property information."""
        if not property_url:
            return None
        
        encoded_url = quote(property_url, safe='')
        endpoint = f"/scrape/zillow/property?url={encoded_url}&extractAgentEmails=true"
        
        data, status = self._make_request(endpoint)
        return data if status == 200 else None
    
    def safe_get(self, data, keys, default=None):
        """Safely get nested dictionary values."""
        if isinstance(keys, str):
            keys = [keys]
        
        current = data
        for key in keys:
            if isinstance(current, dict) and key in current:
                current = current[key]
            else:
                return default
        
        return current if current not in (None, "", [], {}) else default
    
    def extract_all_data(self, listing, details):
        """Extract ALL available data comprehensively."""
        
        detail_prop = self.safe_get(details, "property", {})
        
        # Extract everything
        beds = self.safe_get(listing, "beds") or self.safe_get(detail_prop, "bedrooms")
        baths = self.safe_get(listing, "baths") or self.safe_get(detail_prop, "bathrooms")
        sqft = self.safe_get(listing, "area") or self.safe_get(detail_prop, "livingArea")
        lot_value = self.safe_get(listing, "lotAreaValue")
        lot_unit = self.safe_get(listing, "lotAreaUnits", "sqft")
        price = self.safe_get(listing, "price")
        
        # Room info
        rooms = self.safe_get(detail_prop, "rooms", {})
        
        # Build complete property object
        return {
            # ==================== BASIC INFO ====================
            "property_id": self.safe_get(listing, "id"),
            "property_url": self.safe_get(listing, "url"),
            "status": self.safe_get(listing, "status"),
            
            # ==================== MAIN DETAILS ====================
            "price": price,
            "address": self.safe_get(listing, "addressRaw"),
            "street": self.safe_get(listing, ["address", "street"]),
            "city": self.safe_get(listing, ["address", "city"]),
            "state": self.safe_get(listing, ["address", "state"]),
            "zipcode": self.safe_get(listing, ["address", "zipcode"]),
            "latitude": self.safe_get(listing, "lat"),
            "longitude": self.safe_get(listing, "lng"),
            
            "bedrooms": beds,
            "bathrooms": baths,
            "square_footage": sqft,
            "property_type": self.safe_get(listing, "homeType") or self.safe_get(detail_prop, "homeType"),
            "year_built": self.safe_get(detail_prop, "yearBuilt"),
            "lot_size_value": lot_value,
            "lot_size_unit": lot_unit,
            
            # ==================== FINANCIAL ====================
            "zestimate": self.safe_get(listing, "zestimate") or self.safe_get(detail_prop, "zestimate"),
            "rent_zestimate": self.safe_get(listing, "rentZestimate") or self.safe_get(detail_prop, "rentZestimate"),
            "price_per_sqft": self.safe_get(detail_prop, "pricePerSquareFoot"),
            "hoa_fee": self.safe_get(detail_prop, "hoaFee"),
            "monthly_estimate": self.safe_get(detail_prop, "monthlyEstimate"),
            "tax_assessed_value": self.safe_get(detail_prop, "taxAssessedValue"),
            "annual_tax": self.safe_get(detail_prop, "annualTax"),
            "original_price": self.safe_get(detail_prop, "originalPrice"),
            "price_cut": self.safe_get(detail_prop, "priceCut"),
            "zestimate_low": self.safe_get(detail_prop, ["zestimateRange", "low"]),
            "zestimate_high": self.safe_get(detail_prop, ["zestimateRange", "high"]),
            
            # ==================== IMAGES ====================
            "main_image": self.safe_get(listing, "image"),
            "img_src": self.safe_get(listing, "imgSrc"),
            "all_photos": self.safe_get(detail_prop, "photos", []),
            "photo_count": self.safe_get(detail_prop, "photoCount"),
            
            # ==================== SPECIAL FEATURES ====================
            "whats_special": self.safe_get(detail_prop, "specialFeatures", []),
            "description": self.safe_get(detail_prop, "description"),
            
            # ==================== INTERIOR ====================
            "interior": {
                "bedrooms": beds,
                "bathrooms": baths,
                "full_bathrooms": self.safe_get(detail_prop, "fullBathrooms"),
                "half_bathrooms": self.safe_get(detail_prop, "halfBathrooms"),
                "three_quarter_bathrooms": self.safe_get(detail_prop, "threeQuarterBathrooms"),
                
                "primary_bedroom_dimensions": self.safe_get(rooms, ["primaryBedroom", "dimensions"]),
                "bedroom_2_dimensions": self.safe_get(rooms, ["bedroom2", "dimensions"]),
                "bedroom_3_dimensions": self.safe_get(rooms, ["bedroom3", "dimensions"]),
                "bedroom_4_dimensions": self.safe_get(rooms, ["bedroom4", "dimensions"]),
                "den_dimensions": self.safe_get(rooms, ["den", "dimensions"]),
                "kitchen_dimensions": self.safe_get(rooms, ["kitchen", "dimensions"]),
                "living_room_dimensions": self.safe_get(rooms, ["livingRoom", "dimensions"]),
                "dining_room_dimensions": self.safe_get(rooms, ["diningRoom", "dimensions"]),
                
                "flooring": self.safe_get(detail_prop, "flooring", []),
                "has_basement": self.safe_get(detail_prop, "hasBasement"),
                "basement": self.safe_get(detail_prop, "basement"),
                "basement_sqft": self.safe_get(detail_prop, "basementSqft"),
                
                "total_structure_area": self.safe_get(detail_prop, "totalStructureArea") or sqft,
                "living_area": sqft,
                "finished_sqft": self.safe_get(detail_prop, "finishedSqFt"),
                
                "appliances": self.safe_get(detail_prop, "appliances", []),
                "kitchen_features": self.safe_get(detail_prop, "kitchenFeatures", []),
                "bathroom_features": self.safe_get(detail_prop, "bathroomFeatures", []),
                "laundry_features": self.safe_get(detail_prop, "laundryFeatures", []),
                "window_features": self.safe_get(detail_prop, "windowFeatures", []),
                "door_features": self.safe_get(detail_prop, "doorFeatures", []),
                "additional_features": self.safe_get(detail_prop, "additionalFeatures", []),
                
                "fireplace_count": self.safe_get(detail_prop, "fireplaceCount"),
                "fireplace_type": self.safe_get(detail_prop, "fireplaceType"),
                "fireplace_features": self.safe_get(detail_prop, "fireplaceFeatures", [])
            },
            
            # ==================== PROPERTY ====================
            "property_features": {
                "parking_spaces": self.safe_get(detail_prop, "parkingSpaces"),
                "parking_features": self.safe_get(detail_prop, "parkingFeatures", []),
                "parking_type": self.safe_get(detail_prop, "parkingType"),
                "has_attached_garage": self.safe_get(detail_prop, "hasAttachedGarage"),
                "has_detached_garage": self.safe_get(detail_prop, "hasDetachedGarage"),
                "garage_spaces": self.safe_get(detail_prop, "garageSpaces"),
                
                "levels": self.safe_get(detail_prop, "levels") or self.safe_get(listing, "levels"),
                "stories": self.safe_get(detail_prop, "stories"),
                
                "pool_features": self.safe_get(detail_prop, "poolFeatures", []),
                "has_pool": self.safe_get(detail_prop, "hasPool"),
                "pool_type": self.safe_get(detail_prop, "poolType"),
                "spa_features": self.safe_get(detail_prop, "spaFeatures", []),
                
                "lot_size": self.safe_get(detail_prop, "lotSize"),
                "lot_size_acres": self.safe_get(detail_prop, "lotSizeAcres"),
                "lot_dimensions": self.safe_get(detail_prop, "lotDimensions"),
                "lot_features": self.safe_get(detail_prop, "lotFeatures", []),
                
                "parcel_number": self.safe_get(detail_prop, "parcelNumber"),
                "apn": self.safe_get(detail_prop, "apn"),
                "special_conditions": self.safe_get(detail_prop, "specialConditions"),
                
                "patio_porch_features": self.safe_get(detail_prop, "patioAndPorchFeatures", []),
                "fencing": self.safe_get(detail_prop, "fencing", []),
                "landscaping": self.safe_get(detail_prop, "landscaping", [])
            },
            
            # ==================== CONSTRUCTION ====================
            "construction": {
                "home_type": self.safe_get(detail_prop, "homeType") or self.safe_get(listing, "homeType"),
                "architectural_style": self.safe_get(detail_prop, "architecturalStyle"),
                "property_subtype": self.safe_get(detail_prop, "propertySubType"),
                "construction_materials": self.safe_get(detail_prop, "constructionMaterials", []),
                "exterior": self.safe_get(detail_prop, "exterior", []),
                "roof_type": self.safe_get(detail_prop, "roofType"),
                "foundation": self.safe_get(detail_prop, "foundation", []),
                "condition": self.safe_get(detail_prop, "condition"),
                "year_built": self.safe_get(detail_prop, "yearBuilt"),
                "year_renovated": self.safe_get(detail_prop, "yearRenovated")
            },
            
            # ==================== UTILITIES ====================
            "utilities": {
                "sewer": self.safe_get(detail_prop, "sewer"),
                "water": self.safe_get(detail_prop, "waterSource") or self.safe_get(detail_prop, "water"),
                "heating": self.safe_get(detail_prop, "heating"),
                "heating_type": self.safe_get(detail_prop, "heatingType"),
                "cooling": self.safe_get(detail_prop, "cooling"),
                "cooling_type": self.safe_get(detail_prop, "coolingType"),
                "electric": self.safe_get(detail_prop, "electric"),
                "gas": self.safe_get(detail_prop, "gas")
            },
            
            # ==================== COMMUNITY ====================
            "community": {
                "region": self.safe_get(listing, ["address", "city"]),
                "county": self.safe_get(detail_prop, "county"),
                "township": self.safe_get(detail_prop, "township"),
                "subdivision": self.safe_get(detail_prop, "subdivision"),
                "neighborhood": self.safe_get(detail_prop, "neighborhood"),
                "school_district": self.safe_get(detail_prop, "schoolDistrict"),
                "hoa_fee": self.safe_get(detail_prop, "hoaFee"),
                "hoa_fee_frequency": self.safe_get(detail_prop, "hoaFeeFrequency"),
                "association_amenities": self.safe_get(detail_prop, "associationAmenities", [])
            },
            
            # ==================== LISTING INFO ====================
            "listing_info": {
                "days_on_zillow": self.safe_get(listing, "daysOnZillow"),
                "views": self.safe_get(detail_prop, "views"),
                "saves": self.safe_get(detail_prop, "saves"),
                "is_featured": self.safe_get(listing, "isFeatured"),
                "date_on_market": self.safe_get(detail_prop, "dateOnMarket"),
                "date_sold": self.safe_get(detail_prop, "dateSold"),
                "listing_updated": self.safe_get(detail_prop, "listingUpdated"),
                "last_checked": self.safe_get(detail_prop, "lastChecked"),
                "days_on_market": self.safe_get(detail_prop, "daysOnMarket")
            },
            
            # ==================== AGENT ====================
            "listing_agent": self.safe_get(detail_prop, "listingAgent", {}),
            
            # ==================== MLS ====================
            "mls": {
                "mls_number": self.safe_get(detail_prop, "mlsNumber"),
                "mls_source": self.safe_get(detail_prop, "mlsSource"),
                "mls_id": self.safe_get(detail_prop, "mlsId")
            },
            
            # ==================== SCORES ====================
            "scores": {
                "walk_score": self.safe_get(detail_prop, "walkScore"),
                "transit_score": self.safe_get(detail_prop, "transitScore"),
                "bike_score": self.safe_get(detail_prop, "bikeScore")
            },
            
            # ==================== SCHOOLS ====================
            "schools": self.safe_get(detail_prop, "schools", []),
            
            # ==================== HISTORY ====================
            "price_history": self.safe_get(detail_prop, "priceHistory", []),
            "tax_history": self.safe_get(detail_prop, "taxHistory", []),
            
            # ==================== TOURS ====================
            "tours": {
                "has_3d_tour": self.safe_get(detail_prop, "has3DTour"),
                "has_video": self.safe_get(detail_prop, "hasVideo"),
                "virtual_tour_url": self.safe_get(detail_prop, "virtualTourUrl")
            },
            
            # ==================== RAW DATA ====================
            "raw_listing": listing,
            "raw_details": details
        }


def display_property_zillow_format(prop, index, total):
    """Display property exactly like Zillow page."""
    
    print(f"\n{'='*100}")
    print(f"PROPERTY {index}/{total}")
    print(f"{'='*100}\n")
    
    # Header Section
    price = prop.get("price")
    if price:
        print(f"${price:,}")
    print(f"{prop.get('address')}")
    print(f"{prop.get('bedrooms')}beds")
    print(f"{prop.get('bathrooms')}baths")
    sqft = prop.get('square_footage')
    if sqft:
        print(f"{sqft:,}sqft")
    
    monthly = prop.get('monthly_estimate')
    if monthly:
        print(f"Est.: ${monthly:,}/mo")
    
    print(f"{prop.get('property_type')}")
    
    year = prop.get('year_built')
    if year:
        print(f"Built in {year}")
    
    lot_val = prop.get('lot_size_value')
    lot_unit = prop.get('lot_size_unit')
    if lot_val:
        print(f"{lot_val:,} {lot_unit} Lot")
    
    zest = prop.get('zestimate')
    if zest:
        print(f"${zest:,} Zestimate®")
    
    price_sqft = prop.get('price_per_sqft')
    if price_sqft:
        print(f"${price_sqft}/sqft")
    
    hoa = prop.get('hoa_fee')
    print(f"${hoa:,} HOA" if hoa else "$-- HOA")
    
    # Coordinates
    lat, lng = prop.get('latitude'), prop.get('longitude')
    if lat and lng:
        print(f"\nCoordinates: {lat}, {lng}")
    
    # What's Special
    special = prop.get('whats_special', [])
    if special:
        print(f"\n{'='*100}")
        print("What's special")
        print(f"{'='*100}")
        print(", ".join(special))
    
    # Description
    desc = prop.get('description')
    if desc:
        print(f"\n{'='*100}")
        print("Description")
        print(f"{'='*100}")
        print(desc)
    
    # Facts & Features - Interior
    print(f"\n{'='*100}")
    print("Facts & features")
    print(f"{'='*100}")
    print("\nInterior\n")
    
    interior = prop.get('interior', {})
    print("Bedrooms & bathrooms")
    print(f"  * Bedrooms: {interior.get('bedrooms') or 'N/A'}")
    print(f"  * Bathrooms: {interior.get('bathrooms') or 'N/A'}")
    print(f"  * Full bathrooms: {interior.get('full_bathrooms') or 'N/A'}")
    print(f"  * 1/2 bathrooms: {interior.get('half_bathrooms') or 'N/A'}")
    
    # Room dimensions - ALWAYS show
    print(f"\nPrimary bedroom")
    print(f"  * Dimensions: {interior.get('primary_bedroom_dimensions') or 'N/A'}")
    print(f"\nBedroom 2")
    print(f"  * Dimensions: {interior.get('bedroom_2_dimensions') or 'N/A'}")
    print(f"\nBedroom 3")
    print(f"  * Dimensions: {interior.get('bedroom_3_dimensions') or 'N/A'}")
    print(f"\nDen")
    print(f"  * Dimensions: {interior.get('den_dimensions') or 'N/A'}")
    print(f"\nKitchen")
    print(f"  * Dimensions: {interior.get('kitchen_dimensions') or 'N/A'}")
    print(f"\nLiving room")
    print(f"  * Dimensions: {interior.get('living_room_dimensions') or 'N/A'}")
    
    # Features - ALWAYS show
    print(f"\nFeatures")
    flooring = interior.get('flooring', [])
    print(f"  * Flooring: {', '.join(flooring) if flooring else 'N/A'}")
    has_basement = interior.get('has_basement')
    print(f"  * Has basement: {'Yes' if has_basement else 'No' if has_basement is False else 'N/A'}")
    
    # Interior area - ALWAYS show
    print(f"\nInterior area")
    total_area = interior.get('total_structure_area')
    print(f"  * Total structure area: {total_area:,} sqft" if total_area else "  * Total structure area: N/A")
    living_area = interior.get('living_area')
    print(f"  * Total interior livable area: {living_area:,} sqft" if living_area else "  * Total interior livable area: N/A")
    
    # Property - ALWAYS show all keys
    print(f"\n{'='*100}")
    print("Property\n")
    
    prop_features = prop.get('property_features', {})
    print("Parking")
    print(f"  * Total spaces: {prop_features.get('parking_spaces') or 'N/A'}")
    parking_feat = prop_features.get('parking_features', [])
    print(f"  * Parking features: {', '.join(parking_feat) if parking_feat else 'N/A'}")
    garage = prop_features.get('has_attached_garage')
    print(f"  * Has attached garage: {'Yes' if garage else 'No' if garage is False else 'N/A'}")
    
    print(f"\nFeatures")
    print(f"  * Levels: {prop_features.get('levels') or 'N/A'}")
    print(f"  * Stories: {prop_features.get('stories') or 'N/A'}")
    pool_feat = prop_features.get('pool_features', [])
    print(f"  * Pool features: {', '.join(pool_feat) if pool_feat else 'None'}")
    
    print(f"\nLot")
    lot_size = prop_features.get('lot_size')
    if not lot_size and prop.get('lot_size_value'):
        lot_size = f"{prop.get('lot_size_value', 0):,} {prop.get('lot_size_unit', 'sqft')}"
    print(f"  * Lot size: {lot_size or 'N/A'}")
    print(f"  * Lot dimensions: {prop_features.get('lot_dimensions') or 'N/A'}")
    
    print(f"\nDetails")
    print(f"  * Parcel number: {prop_features.get('parcel_number') or 'N/A'}")
    print(f"  * Special conditions: {prop_features.get('special_conditions') or 'N/A'}")
    
    # Construction - ALWAYS show all keys
    print(f"\n{'='*100}")
    print("Construction\n")
    
    construction = prop.get('construction', {})
    print("Type & style")
    print(f"  * Home type: {construction.get('home_type') or 'N/A'}")
    print(f"  * Architectural style: {construction.get('architectural_style') or 'N/A'}")
    print(f"  * Property subtype: {construction.get('property_subtype') or 'N/A'}")
    
    materials = construction.get('construction_materials', [])
    print(f"\nMaterials")
    print(f"  * {', '.join(materials) if materials else 'N/A'}")
    
    print(f"\nCondition")
    print(f"  * {construction.get('condition') or 'N/A'}")
    print(f"  * Year built: {construction.get('year_built') or 'N/A'}")
    
    # Utilities - ALWAYS show all keys
    print(f"\n{'='*100}")
    print("Utilities & green energy\n")
    
    utilities = prop.get('utilities', {})
    print(f"  * Sewer: {utilities.get('sewer') or 'N/A'}")
    print(f"  * Water: {utilities.get('water') or 'N/A'}")
    
    # Community & HOA - ALWAYS show all keys
    print(f"\n{'='*100}")
    print("Community & HOA\n")
    
    community = prop.get('community', {})
    print("Location")
    print(f"  * Region: {community.get('region') or 'N/A'}")
    
    # Financial & listing details - ALWAYS show all keys
    print(f"\n{'='*100}")
    print("Financial & listing details\n")
    
    price_sqft = prop.get('price_per_sqft')
    print(f"  * Price per square foot: ${price_sqft}/sqft" if price_sqft else "  * Price per square foot: N/A")
    
    tax_val = prop.get('tax_assessed_value')
    print(f"  * Tax assessed value: ${tax_val:,}" if tax_val else "  * Tax assessed value: N/A")
    
    annual_tax = prop.get('annual_tax')
    print(f"  * Annual tax amount: ${annual_tax:,}" if annual_tax else "  * Annual tax amount: N/A")
    
    listing = prop.get('listing_info', {})
    print(f"  * Date on market: {listing.get('date_on_market') or 'N/A'}")
    
    # URLs & Images
    print(f"\n{'='*100}")
    print("Links & Media\n")
    print(f"  * Property URL: {prop.get('property_url')}")
    print(f"  * Main Image: {prop.get('main_image')}")
    if prop.get('all_photos'):
        print(f"  * Total Photos: {len(prop.get('all_photos', []))}")
    
    print(f"\n{'='*100}\n")


def main():
    """Main execution."""
    
    # Configuration
    API_KEY = "e2477c14-214a-4165-ad95-13138b02df13"
    KEYWORD = "Pittsburgh, PA"
    MAX_PAGES = 1
    DELAY = 2
    
    print(f"\n{'='*100}")
    print(f"ZILLOW COMPLETE SCRAPER - ZILLOW PAGE FORMAT")
    print(f"{'='*100}")
    print(f"Location: {KEYWORD}")
    print(f"Max Pages: {MAX_PAGES}")
    print(f"{'='*100}\n")
    
    scraper = ZillowCompleteScraper(API_KEY)
    all_properties = []
    total_count = 0
    
    for page in range(1, MAX_PAGES + 1):
        print(f"\n📄 PAGE {page}/{MAX_PAGES}")
        print(f"{'-'*100}")
        
        listings, status = scraper.fetch_listings(KEYWORD, page=page)
        
        if status != 200:
            print(f"✗ Failed. Status: {status}")
            break
        
        if not listings:
            print("ℹ No listings")
            break
        
        print(f"✓ Found {len(listings)} properties\n")
        
        for idx, listing in enumerate(listings, 1):
            url = listing.get("url")
            address = listing.get("addressRaw")
            price = listing.get("price")
            
            total_count += 1
            print(f"[{total_count}] {address}")
            print(f"    💰 ${price:,}" if price else "    💰 N/A")
            
            details = None
            if url:
                print(f"    ⏳ Fetching...")
                details = scraper.fetch_property_details(url)
                if details:
                    print(f"    ✓ Done")
                time.sleep(DELAY)
            
            data = scraper.extract_all_data(listing, details)
            all_properties.append(data)
            print()
        
        print(f"✓ Page {page} done")
        time.sleep(DELAY)
    
    # Save JSON
    output_file = "zillow_all_data.json"
    print(f"\n💾 Saving to {output_file}...")
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(all_properties, f, indent=2, ensure_ascii=False)
    print(f"✓ Saved!")
    
    # Display
    print(f"\n{'#'*100}")
    print(f"ALL PROPERTIES - ZILLOW FORMAT")
    print(f"{'#'*100}")
    
    for idx, prop in enumerate(all_properties, 1):
        display_property_zillow_format(prop, idx, len(all_properties))
    
    # Summary
    print(f"\n{'*'*100}")
    print(f"COMPLETE")
    print(f"{'*'*100}")
    print(f"✓ Properties: {len(all_properties)}")
    print(f"✓ Location: {KEYWORD}")
    print(f"✓ File: {output_file}")
    print(f"{'*'*100}\n")


if __name__ == "__main__":
    main()