In [1]:
# 📦 Imports
import pandas as pd
import folium
from folium.plugins import MarkerCluster
import os

# 📍 File Paths
CLEANED_DATA = '../data/cleaned/grocery_stores_cleaned_v1.csv'
MAP_OUTPUT = '../maps/grocery_stores_chicago_map_v1.html'
STATS_OUTPUT = '../data/cleaned/grocery_stats_by_zip.csv'

# 📥 Load Cleaned Dataset
df = pd.read_csv(CLEANED_DATA)
print(f"✅ Loaded {len(df)} rows from clean dataset")

# 🧠 Classify Stores by Type (based on name patterns)
junk_keywords = [
    'DOLLAR', '7 ELEVEN', 'MOBIL', 'SHELL', 'CIRCLE K',
    'GAS', 'CONVENIENCE', 'MINI MART', 'CVS', 'WALGREENS', 'CITGO'
]

grocery_keywords = [
    'ALDI', 'JEWEL', 'MARIANO', 'WHOLE FOODS', 'COSTCO',
    'FOOD MARKET', 'SUPERMARKET', 'TRADER JOE', 'FRESH MARKET'
]

df['DBA_NAME_CLEAN'] = df['DBA Name'].str.upper().fillna('')

df['IS_JUNK_STORE'] = df['DBA_NAME_CLEAN'].apply(
    lambda name: any(junk in name for junk in junk_keywords)
)

df['IS_REAL_GROCERY'] = df['DBA_NAME_CLEAN'].apply(
    lambda name: any(real in name for real in grocery_keywords)
)

# 🧠 Set icon color based on type
def classify_store(row):
    if row['IS_REAL_GROCERY']:
        return 'green', 'Real Grocery Store'
    elif row['IS_JUNK_STORE']:
        return 'red', 'Junk Store'
    else:
        return 'gray', 'Unclassified'

# 🗺️ Create Map
m = folium.Map(location=[41.8781, -87.6298], zoom_start=11)
cluster = MarkerCluster().add_to(m)

# 📌 Add markers
for _, row in df.iterrows():
    color, store_type = classify_store(row)
    popup_text = f"<b>{row['DBA Name']}</b><br>{row['Address']}, {row['City']} {row['Zip']}<br><i>{store_type}</i>"
    
    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
        popup=popup_text,
        tooltip=row['DBA Name'],
        icon=folium.Icon(color=color, icon='shopping-cart', prefix='fa')
    ).add_to(cluster)

# 💾 Save map
os.makedirs(os.path.dirname(MAP_OUTPUT), exist_ok=True)
m.save(MAP_OUTPUT)
print(f"✅ Map saved to: {MAP_OUTPUT}")

# 📊 ZIP-Level Summary Stats
zip_stats = df.groupby('Zip').agg(
    total_stores=('DBA Name', 'count'),
    real_grocery_count=('IS_REAL_GROCERY', 'sum'),
    junk_store_count=('IS_JUNK_STORE', 'sum'),
)

zip_stats['unclassified_count'] = (
    zip_stats['total_stores'] - zip_stats['real_grocery_count'] - zip_stats['junk_store_count']
)

zip_stats = zip_stats.sort_values(by='total_stores', ascending=False)

# 🖨️ Display summary
print("📍 Grocery Store Summary by ZIP Code:")
display(zip_stats.head(10))

# 💾 Save ZIP stats
os.makedirs(os.path.dirname(STATS_OUTPUT), exist_ok=True)
zip_stats.to_csv(STATS_OUTPUT)
print(f"✅ ZIP-level stats saved to: {STATS_OUTPUT}")




✅ Loaded 1612 rows from clean dataset
✅ Map saved to: ../maps/grocery_stores_chicago_map_v1.html
📍 Grocery Store Summary by ZIP Code:


Unnamed: 0_level_0,total_stores,real_grocery_count,junk_store_count,unclassified_count
Zip,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
60651,89,14,4,71
60619,87,7,10,70
60644,82,9,14,59
60624,74,6,4,64
60628,74,2,11,61
60623,68,2,3,63
60659,56,3,0,53
60636,54,8,8,38
60629,51,4,2,45
60608,48,7,1,40


✅ ZIP-level stats saved to: ../data/cleaned/grocery_stats_by_zip.csv
