In [12]:
import os
import json
import pandas as pd
from dotenv import load_dotenv
from supabase import create_client, Client

In [13]:
load_dotenv()
SUPABASE_URL = os.getenv("SUPABASE_URL")
SUPABASE_KEY = os.getenv("SUPABASE_KEY")
SUPABASE_URL = os.getenv("SUPABASE_URL")

supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
print("Supabase client initialized successfully!")

Supabase client initialized successfully!


In [46]:
json_file_path = '../scraper/json-dump/real_estate_listings.json'
with open(json_file_path, 'r') as f:
    data = json.load(f)

listing_data_batch = []
listing_details_batch = []
listing_details_map = {}

for listing in data:
    address_parts = listing['Address'].split(',')
    address = address_parts[0]
    city = address_parts[1]
    state = address_parts[2].split(' ')[1]
    zipcode = address_parts[2].split(' ')[2]
    price = int(listing['Price'].replace('$', '').replace(',', ''))
    status = listing['Sale Status']
    bed = listing['Bedrooms']
    bath = listing['Bathrooms']
    area = listing['Square Feet']

    listing_data = {
        'address': address,
        'city': city,
        'state': state,
        'zip': zipcode,
        'price': price,
        'description': status,
    }

    listing_key = f"{address}_{city}_{state}_{zipcode}_{price}"
    listing_details_map[listing_key] = {
        'bedrooms': bed,
        'baths': bath,
        'area': area
    }
    
    existing_listing = supabase.table('listings').select().eq('address', address).eq('city', city).eq('state', state).eq('zip', zipcode).eq('price', price).execute()
    if existing_listing.data:
        print(f"Listing already exists: {listing_data}")
    else:
        listing_data_batch.append(listing_data)

# Insert listings data
if listing_data_batch:
    response = supabase.table('listings').insert(listing_data_batch).execute()
    print(f"Successfully inserted {len(listing_data_batch)} listings")
    
    # Use the returned insert IDs to create the listing details
    for idx, listing in enumerate(response.data):
        # Get the details for this listing using the same key format
        listing_key = f"{listing_data_batch[idx]['address']}_{listing_data_batch[idx]['city']}_{listing_data_batch[idx]['state']}_{listing_data_batch[idx]['zip']}_{listing_data_batch[idx]['price']}"
        details = listing_details_map[listing_key]
        
        listing_details_data = {
            'listing_id': listing['id'],
            'bedrooms': details['bedrooms'],
            'baths': details['baths'],
            'area': details['area']
        }
        listing_details_batch.append(listing_details_data)
    
    # Insert listing details data
    if listing_details_batch:
        response_details = supabase.table('listing_details').insert(listing_details_batch).execute()
        print(f"Successfully inserted {len(listing_details_batch)} listing details")

Successfully inserted 13 listings
Successfully inserted 13 listing details


In [45]:
print(listing_details_batch)

[{'listing_id': 1, 'bedrooms': '2 bed', 'baths': '1.5 bath', 'area': '1,000 square feet'}, {'listing_id': 2, 'bedrooms': '2 bed', 'baths': '1.5 bath', 'area': '1,000 square feet'}, {'listing_id': 3, 'bedrooms': '2 bed', 'baths': '1.5 bath', 'area': '1,000 square feet'}, {'listing_id': 4, 'bedrooms': '2 bed', 'baths': '1.5 bath', 'area': '1,000 square feet'}, {'listing_id': 5, 'bedrooms': '2 bed', 'baths': '1.5 bath', 'area': '1,000 square feet'}, {'listing_id': 6, 'bedrooms': '2 bed', 'baths': '1.5 bath', 'area': '1,000 square feet'}, {'listing_id': 7, 'bedrooms': '2 bed', 'baths': '1.5 bath', 'area': '1,000 square feet'}, {'listing_id': 8, 'bedrooms': '2 bed', 'baths': '1.5 bath', 'area': '1,000 square feet'}, {'listing_id': 9, 'bedrooms': '2 bed', 'baths': '1.5 bath', 'area': '1,000 square feet'}, {'listing_id': 10, 'bedrooms': '2 bed', 'baths': '1.5 bath', 'area': '1,000 square feet'}, {'listing_id': 11, 'bedrooms': '2 bed', 'baths': '1.5 bath', 'area': '1,000 square feet'}, {'listi

In [None]:
def insert_data(data):
    for entry in data:
        address_data = {
            'country': entry['address']['country'] or '',
            'administrative_area': entry['address']['administrative_area'] or '',
            'sub_administrative_area': entry['address']['sub_administrative_area'] or '',
            'locality': entry['address']['locality'] or '',
            'postal_code': entry['address']['postal_code'] or '',
            'street': entry['address']['street'] or '',
            'premise': entry['address']['premise'] or '',
            'sub_premise': entry['address']['sub_premise'] or '',
        }
        
        # Use upsert with on_conflict parameter to specify which fields are constrained
        address_response = supabase.table('address').upsert(
            [address_data],
            on_conflict='country,administrative_area,sub_administrative_area,locality,postal_code,street,premise,sub_premise'
        ).execute()
        
        if address_response.data:
            address_id = address_response.data[0]['id']
        else:
            print(f"Error upserting address data: {address_response}")
            continue  # Skip to the next entry if address upsert fails
        
        # Convert None to 0 for numeric entries
        price = entry['price'] if entry['price'] is not None else 0
        bedrooms = entry['bedrooms'] if entry['bedrooms'] is not None else 0
        bathrooms = entry['bathrooms'] if entry['bathrooms'] is not None else 0
        square_feet = entry['square_feet'] if entry['square_feet'] is not None else 0
        acre_lot = entry['acre_lot'] if entry['acre_lot'] is not None else 0
        
        property_listing_data = {
            'price': price,
            'bedrooms': bedrooms,
            'bathrooms': bathrooms,
            'square_feet': square_feet,
            'sale_status': entry['sale_status'],
            'acre_lot': acre_lot,
            'tour_available': entry['tour_available'],
            'image_source': entry['image_source'],
            'address_id': address_id
        }
        
        # Use upsert with on_conflict for property listings
        # Assuming there's a unique constraint that includes these fields
        property_response = supabase.table('property_listings').upsert(
            [property_listing_data],
            on_conflict='address_id,price,sale_status'  # Assuming address_id is part of a unique constraint
        ).execute()
        
        if property_response.data:
            print(f"Successfully upserted property listing: {property_listing_data}")
        else:
            print(f"Error upserting property listing data: {property_response}")