In [1]:
import os
import re
import pandas as pd
import geopandas as gpd
import requests
import gspread
from googleapiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials# Define the scope of the application
from googleapiclient.discovery import build
from shapely.geometry import Point
from dotenv import load_dotenv

In [6]:
scope = ['https://spreadsheets.google.com/feeds','https://www.googleapis.com/auth/drive']

# Add credentials to the account
creds = ServiceAccountCredentials.from_json_keyfile_name('autoscraper-380600-0d0c84856d6b.json', scope)

# Authorize the clientsheet 
client = gspread.authorize(creds)

sheet = client.open_by_key('1w681QWTVg51Y8AwIU8rhpOBHolK78Ue2PTYb-G8wmkg')

# Drive client
drivesvc = build("drive", "v3", credentials=creds)

range_name = 'A1:AO200'

In [7]:
def fetch_data(sheet, worksheet_name, range_name, df_name=None):
    print('Fetching data from Google Sheets...')
    worksheet = sheet.worksheet(worksheet_name)
    data = worksheet.get(range_name)
    df = pd.DataFrame(data)
    df.columns = df.iloc[0]  # Set first row as column headers
    df = df.drop(0).reset_index(drop=True)  # Drop the header row from the dataframe and reset index

    df.drop_duplicates(inplace=True)  # Drop duplicate rows
    print(f'Number of rows in {worksheet_name} worksheet: {len(df)}')
    return df

In [9]:
df = fetch_data(sheet, 'ConfirmedSites', range_name, 'df')

Fetching data from Google Sheets...
Number of rows in ConfirmedSites worksheet: 11


In [11]:
df

Unnamed: 0,Developer,Architect,Status,Live Local Units,Total Units,Percent Live Local,Description,Recent Coverage,Address,Coordinates,None
0,Shoma Group,PPKS,Proposed,81.0,201.0,40.0,"Lorem ipsum dolor sit amet, consectetur adipis...",https://therealdeal.com/miami/2025/08/12/shoma...,3808-3850 Southwest Eighth Street,"25.76446210180117, -80.25777492474045",
1,"Pablo Castro, Laura Tauber",Arquitectonica,Planned,4032.0,4032.0,100.0,"Lorem ipsum dolor sit amet, consectetur adipis...",https://therealdeal.com/miami/2025/08/01/insid...,8395 Northwest 27th Avenue in unincorporated M...,"25.850711898160483, -80.24212178425633",
2,Tulip Developments Group,Kobi Karp,Planned,203.0,499.0,40.0,"Lorem ipsum dolor sit amet, consectetur adipis...",https://therealdeal.com/miami/2025/07/28/edgew...,3350 Biscayne Boulevard and 234-264 NE 34th St...,"25.808050648825517, -80.18967253453498",
3,Integral,,Proposed,170.0,170.0,100.0,"Lorem ipsum dolor sit amet, consectetur adipis...",https://therealdeal.com/miami/2025/07/22/integ...,"6007, 6013 and 6015 Northwest Seventh Avenue, ...","25.830553402961872, -80.2082414443304",
4,Integral,,Proposed,160.0,160.0,100.0,"Lorem ipsum dolor sit amet, consectetur adipis...",https://therealdeal.com/miami/2025/07/22/integ...,"Southwest Fifth and Fourth streets, both east ...","25.769987464784403, -80.20096192369311",
5,Kolter Group,,Proposed,154.0,386.0,40.0,"Lorem ipsum dolor sit amet, consectetur adipis...",https://therealdeal.com/miami/2025/07/10/alton...,2101 South Congress Avenue,"26.434955234242263, -80.0918967292921",
6,Midtown Capital,,Proposed,140.0,348.0,40.0,"Lorem ipsum dolor sit amet, consectetur adipis...",https://therealdeal.com/miami/2025/07/10/midto...,7501 and 7553 Northeast Second Avenue in Miami,"25.844469980462605, -80.19245424128017",
7,Bazbaz Development,,Proposed,,544.0,,"Lorem ipsum dolor sit amet, consectetur adipis...",https://therealdeal.com/miami/2024/05/29/bazba...,"2110, 2118 and 2134 North Miami Avenue, as wel...","25.79696648108967, -80.19507914987746",
8,RCC Developers,Modis Architects,Proposed,120.0,300.0,40.0,"Lorem ipsum dolor sit amet, consectetur adipis...",https://therealdeal.com/miami/2025/06/13/argen...,Southwest 214th Street and U.S. 1/South Dixie ...,"25.568006317235255, -80.37942124988717",
9,SF QOZ Fund I,,Planned,,,,"Lorem ipsum dolor sit amet, consectetur adipis...",https://therealdeal.com/miami/2025/06/04/genti...,"1515 and 1525 Northeast Miami Place, 75 Northe...","25.789755734428443, -80.19254654802926",


In [12]:
def parse_coords(val):
    """Return (lat, lon) as floats or None if not parseable."""
    if pd.isna(val):
        return None
    if isinstance(val, (list, tuple)) and len(val) == 2:
        lat, lon = val
        return float(lat), float(lon)
    if isinstance(val, str):
        s = val.strip().replace("(", "").replace(")", "")
        parts = [p.strip() for p in s.split(",")]
        if len(parts) == 2:
            try:
                return float(parts[0]), float(parts[1])
            except ValueError:
                return None
    return None

records = []

for _, row in df.iterrows():
    coords = parse_coords(row.get("Coordinates"))
    if not coords:
        print(f"Skipping (no coords): {row.get('full_address')}")
        continue

    lat, lon = coords

    records.append({
        "Address":       row.get("Address"),
        "Developer":    row.get("Developer"),
        "Description":   (row.get("Description") if pd.notna(row.get("description")) and str(row.get("description")).strip() else None),
        "Recent Coverage":  (row.get("Recent Coverage") if pd.notna(row.get("story_link")) and str(row.get("story_link")).strip() else None),
        "Status":  (row.get("Status") if pd.notna(row.get("story_link")) and str(row.get("story_link")).strip() else None),
        "Live Local Units":  (row.get("Live Local Units") if pd.notna(row.get("story_link")) and str(row.get("story_link")).strip() else None),
        "Total Units":  (row.get("Total Units") if pd.notna(row.get("story_link")) and str(row.get("story_link")).strip() else None),
        "Percent Live Local":  (row.get("Percent Live Local") if pd.notna(row.get("story_link")) and str(row.get("story_link")).strip() else None),
        "geometry":      Point(lon, lat) 
    })

if not records:
    raise RuntimeError("No records parsed into features!")

gdf = gpd.GeoDataFrame(records, crs="EPSG:4326")
gdf.to_file("live_local.geojson", driver="GeoJSON")
print(f"Wrote {len(gdf)} features")


Skipping (no coords): None
Wrote 10 features
