In [14]:
# 📦 Imports
import pandas as pd
import os

# 📍 File paths
RAW_PATH = '../data/raw/urban_agriculture_sites.csv'
CLEAN_PATH = '../data/cleaned/urban_agriculture_sites_cleaned.csv'

# 📥 Load data
df = pd.read_csv(RAW_PATH)
print(f"✅ Loaded raw urban ag dataset: {df.shape[0]} rows")
print("📊 Columns:", df.columns.tolist())


# ✂️ Keep only relevant columns
columns_to_keep = [
    'ADDRESS', 'LATITUDE', 'LONGITUDE', 'LOCATION'
]
df = df[columns_to_keep]

# 🧹 Drop rows with missing or invalid coordinates
df = df.dropna(subset=['LATITUDE', 'LONGITUDE'])
df = df[(df['LATITUDE'] != 0) & (df['LONGITUDE'] != 0)]

# 🔠 Normalize text
df['ADDRESS'] = df['ADDRESS'].str.title().str.strip()
df['LOCATION'] = df['ADDRESS'].str.title().str.strip()


# 💾 Save cleaned dataset
os.makedirs(os.path.dirname(CLEAN_PATH), exist_ok=True)
df.to_csv(CLEAN_PATH, index=False)
print(f"✅ Cleaned urban ag dataset saved to: {CLEAN_PATH}")



✅ Loaded raw urban ag dataset: 11 rows
📊 Columns: ['ADDRESS', 'LATITUDE', 'LONGITUDE', 'LOCATION']
✅ Cleaned urban ag dataset saved to: ../data/cleaned/urban_agriculture_sites_cleaned.csv
