In [8]:
import os
import re
import pandas as pd
import geopandas as gpd
import requests
import googlemaps
import gspread
from googleapiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials# Define the scope of the application
from googleapiclient.discovery import build
from shapely.geometry import Point
from dotenv import load_dotenv

In [10]:
# # Load environment variables from .env file if present
# load_dotenv()

# # Try to get the API key from environment variables
# GOOGLE_MAPS_API_KEY = os.environ.get("GOOGLE_MAPS_API_KEY")
# if not GOOGLE_MAPS_API_KEY:
#   raise RuntimeError("GOOGLE_MAPS_API_KEY not found in environment or .env file.")

In [12]:
scope = ['https://spreadsheets.google.com/feeds','https://www.googleapis.com/auth/drive']

# Add credentials to the account
creds = ServiceAccountCredentials.from_json_keyfile_name('autoscraper-380600-0d0c84856d6b.json', scope)

# Authorize the clientsheet 
client = gspread.authorize(creds)

sheet = client.open_by_key('1vxMnPSQyDxTy16mE2Oq2a1rH3vrBSruTU74f5xEWCB0')

# Drive client
drivesvc = build("drive", "v3", credentials=creds)

range_name = 'A1:AO200'

In [13]:
def fetch_data(sheet, worksheet_name, range_name, df_name=None):
    print('Fetching data from Google Sheets...')
    worksheet = sheet.worksheet(worksheet_name)
    data = worksheet.get(range_name)
    df = pd.DataFrame(data)
    df.columns = df.iloc[0]  # Set first row as column headers
    df = df.drop(0).reset_index(drop=True)  # Drop the header row from the dataframe and reset index

    df.drop_duplicates(inplace=True)  # Drop duplicate rows
    print(f'Number of rows in {worksheet_name} worksheet: {len(df)}')
    return df

In [14]:
df = fetch_data(sheet, 'Sheet1', range_name, 'df')

Fetching data from Google Sheets...
Number of rows in Sheet1 worksheet: 7


In [19]:
def parse_coords(val):
    """Return (lat, lon) as floats or None if not parseable."""
    if pd.isna(val):
        return None
    if isinstance(val, (list, tuple)) and len(val) == 2:
        lat, lon = val
        return float(lat), float(lon)
    if isinstance(val, str):
        s = val.strip().replace("(", "").replace(")", "")
        parts = [p.strip() for p in s.split(",")]
        if len(parts) == 2:
            try:
                return float(parts[0]), float(parts[1])
            except ValueError:
                return None
    return None

records = []

for _, row in df.iterrows():
    coords = parse_coords(row.get("Coordinates"))
    if not coords:
        print(f"Skipping (no coords): {row.get('full_address')}")
        continue

    lat, lon = coords

    records.append({
        "Address":       row.get("full_address"),
        "Developers":    row.get("developers"),
        "Description":   (row.get("description") if pd.notna(row.get("description")) and str(row.get("description")).strip() else None),
        "Landing Page":  (row.get("story_link") if pd.notna(row.get("story_link")) and str(row.get("story_link")).strip() else None),
        "geometry":      Point(lon, lat) 
    })

if not records:
    raise RuntimeError("No records parsed into features!")

gdf = gpd.GeoDataFrame(records, crs="EPSG:4326")
gdf.to_file("live_local.geojson", driver="GeoJSON")
print(f"Wrote {len(gdf)} features")


Wrote 7 features
