In [1]:
import json
import yaml
import pandas as pd
import logging
from utils.geo_utils import reverse_geocode_geojson_to_dataframe, get_redfin_estimate

In [None]:
# geojson file to run analysis on
geojson_file='DINS_2025_Eaton_Public_View.geojson'

In [None]:
# load config
with open("config.yml", "r") as file:
    config = yaml.safe_load(file)

# initialize GoogleMaps api key variable
api_key = config["google_maps"]["api_key"]

In [None]:
# load geo data
with open(geojson_file, 'r') as f:
    geojson_data = json.load(f)

In [None]:
# reverse geocode the lat/long coordinates to get addresses
df = reverse_geocode_geojson_to_dataframe(geojson_data, api_key, sample_size=500) # set sample_size = None to run on full dataset

In [None]:
df.head()

In [None]:
# Configure logging
logging.basicConfig(
    filename="redfin_scrape.log",  # Logs will be saved in this file
    level=logging.INFO,           # Log level: INFO and above
    format="%(asctime)s - %(levelname)s - %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S"
)

In [None]:
# Test redfin price scraping function
address = "1780 North Altadena Drive, Pasadena, CA 91107"
driver_path = "/opt/homebrew/Caskroom/chromedriver/132.0.6834.83/chromedriver-mac-arm64/chromedriver"
redfin_price = get_redfin_estimate(address, driver_path, headless = True)
redfin_price

In [None]:
# Apply the `get_redfin_estimate` function directly to the entire DataFrame
df["redfin_estimate"] = df["address"].apply(lambda address: get_redfin_estimate(address, driver_path))

In [None]:
df.head(100)

In [None]:
# save results to csv
df.to_csv('results.csv')