In [14]:
#import dependencies
import os
import json
from dotenv import load_dotenv
from supabase import create_client, Client
import requests

In [15]:
#Initialize supabase connection

load_dotenv()
SUPABASE_URL = os.getenv("SUPABASE_URL")
SUPABASE_KEY = os.getenv("SUPABASE_KEY")
SUPABASE_URL = os.getenv("SUPABASE_URL")
GOOGLE_GEOCODE_KEY = os.getenv('GOOGLE_GEOCODE_API')

supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
print("Supabase client initialized successfully!")

Supabase client initialized successfully!


In [16]:
#Helper function to read dataset
def getData(filePath):
    with open(filePath, 'r') as f:
        data = json.load(f)
        return data

In [17]:
#Helper function to get geocode with google API

def get_geocode(address_data):
    address = f"{address_data['premise']} {address_data['street']}, {address_data['locality']}, {address_data['administrative_area']}"
    uri = f"https://maps.googleapis.com/maps/api/geocode/json?key={GOOGLE_GEOCODE_KEY}&address={address}"
    response = requests.get(uri)
    if response.status_code == 200:
        data = response.json()
        if data['results']:
            location = data['results'][0]['geometry']['location']
            return location['lat'], location['lng']
    return None, None

In [None]:
#Insert function into supabase
def insert_data(data):
    for entry in data:
        address_data = {
            'country': entry['address']['country'] or '',
            'administrative_area': entry['address']['administrative_area'] or '',
            'sub_administrative_area': entry['address']['sub_administrative_area'] or '',
            'locality': entry['address']['locality'] or '',
            'postal_code': entry['address']['postal_code'] or '',
            'street': entry['address']['street'] or '',
            'premise': entry['address']['premise'] or '',
            'sub_premise': entry['address']['sub_premise'] or '',
        }

        # geocode for the address
        lat, lng = get_geocode(address_data)
        address_data['latitude'] = lat
        address_data['longitude'] = lng

        address_response = supabase.table('address').upsert(
            [address_data],
            on_conflict="country,administrative_area,sub_administrative_area,locality,postal_code,street,premise,sub_premise"
        ).execute()
        
        if address_response.data:
            address_id = address_response.data[0]['id']
        else:
            print(f"Error upserting address data: {address_response}")
            continue 
        
        # Convert None to 0 for numeric entries
        price = entry['price'] if entry['price'] is not None else 0
        bedrooms = entry['bedrooms'] if entry['bedrooms'] is not None else 0
        bathrooms = entry['bathrooms'] if entry['bathrooms'] is not None else 0
        square_feet = entry['square_feet'] if entry['square_feet'] is not None else 0
        acre_lot = entry['acre_lot'] if entry['acre_lot'] is not None else 0
        
        property_listing_data = {
            'price': price,
            'bedrooms': bedrooms,
            'bathrooms': bathrooms,
            'square_feet': square_feet,
            'sale_status': entry['sale_status'],
            'acre_lot': acre_lot,
            'tour_available': entry['tour_available'],
            'image_source': entry['image_source'],
            'address_id': address_id
        }
        
        property_response = supabase.table('property_listings').upsert(
            [property_listing_data],
            on_conflict='address_id,price,sale_status'
        ).execute()
        
        if property_response.data:
            print(f"Successfully upserted property listing: {property_listing_data}")
        else:
            print(f"Error upserting property listing data: {property_response}")

In [20]:
#load json files
brooklyn_data = getData('../scraper/json-dump/brooklyn-2025-03-15-formatted.json')
manhattan_data = getData('../scraper/json-dump/manhattan-2025-03-15-formatted.json')
queens_data = getData('../scraper/json-dump/queens-2025-03-15-formatted.json')
staten_island_data = getData('../scraper/json-dump/staten-island-2025-03-15-formatted.json')
bronx_data = getData('../scraper/json-dump/bronx-2025-03-15-formatted.json')

In [23]:
insert_data(bronx_data)

Successfully upserted property listing: {'price': 700000, 'bedrooms': 4, 'bathrooms': 2.5, 'square_feet': 1863.0, 'sale_status': 'House for sale', 'acre_lot': 1800.0, 'tour_available': True, 'image_source': 'https://ap.rdcpix.com/ffa67aa2e123357cf879c2b693022beal-m2482881542rd-w960_h720.jpg', 'address_id': 1}
Successfully upserted property listing: {'price': 699999, 'bedrooms': 4, 'bathrooms': 2.5, 'square_feet': 1260.0, 'sale_status': 'House for sale', 'acre_lot': 1800.0, 'tour_available': True, 'image_source': 'https://ap.rdcpix.com/2dca2e942e667f7368c974057877d8bel-m2008829264rd-w960_h720.jpg', 'address_id': 2}
Successfully upserted property listing: {'price': 849000, 'bedrooms': 8, 'bathrooms': 3.0, 'square_feet': 2150.0, 'sale_status': 'Townhouse for sale', 'acre_lot': 3257.0, 'tour_available': True, 'image_source': 'https://ap.rdcpix.com/8b41918b1ee6b36f10c4e90e2edccd0dl-m1123716120rd-w960_h720.jpg', 'address_id': 3}


KeyboardInterrupt: 

In [22]:
insert_data(manhattan_data)

APIError: {'code': '42P10', 'details': None, 'hint': None, 'message': 'there is no unique or exclusion constraint matching the ON CONFLICT specification'}

In [None]:
insert_data(brooklyn_data)

In [None]:
insert_data(queens_data)

In [None]:
insert_data(staten_island_data)