In [2]:
import pandas as pd
import folium
from folium.plugins import MarkerCluster
import os

CLEANED_DATA = '../data/cleaned/grocery_stores_cleaned_v3.csv'
MAP_OUTPUT = '../docs/grocery_stores_chicago_map_v1.html'

# Load Cleaned Dataset
df = pd.read_csv(CLEANED_DATA)
print("📊 Columns:", df.columns.tolist())
print(f"✅ Loaded {len(df)} rows from clean dataset")
print("📊 Columns in DataFrame:", df.columns.tolist())

# Ensure correct data types
df['IS_REAL_GROCERY'] = df['IS_REAL_GROCERY'].astype(bool)
df['IS_JUNK_STORE'] = df['IS_JUNK_STORE'].astype(bool)

# Create base map
m = folium.Map(location=[41.8781, -87.6298], zoom_start=11)

# Create Layer Groups WITH Clustering
real_layer = folium.FeatureGroup(name='🟢 Real Grocery Stores')
junk_layer = folium.FeatureGroup(name='🔴 Junk Stores')
unknown_layer = folium.FeatureGroup(name='⚪ Unclassified Stores')

real_cluster = MarkerCluster().add_to(real_layer)
junk_cluster = MarkerCluster().add_to(junk_layer)
unknown_cluster = MarkerCluster().add_to(unknown_layer)

# Add markers with cluster logic
for _, row in df.iterrows():
    lat, lon = row['Latitude'], row['Longitude']
    popup = f"<b>{row['DBA Name']}</b><br>{row['Address']}, {row['City']} {row['Zip']}"
    
    icon_color = 'green' if row['IS_REAL_GROCERY'] else 'red' if row['IS_JUNK_STORE'] else 'gray'
    icon_type = 'shopping-cart' if row['IS_REAL_GROCERY'] else 'remove' if row['IS_JUNK_STORE'] else 'question'
    
    marker = folium.Marker(
        location=[lat, lon],
        popup=popup,
        icon=folium.Icon(color=icon_color, icon=icon_type, prefix='fa')
    )

    if row['IS_REAL_GROCERY']:
        marker.add_to(real_cluster)
    elif row['IS_JUNK_STORE']:
        marker.add_to(junk_cluster)
    else:
        marker.add_to(unknown_cluster)

# Add to map
real_layer.add_to(m)
junk_layer.add_to(m)
unknown_layer.add_to(m)

# Layer Control
folium.LayerControl(collapsed=False).add_to(m)

# Save map to file
os.makedirs(os.path.dirname(MAP_OUTPUT), exist_ok=True)
m.save(MAP_OUTPUT)
print(f"✅ Map saved to: {MAP_OUTPUT}")


📊 Columns: ['DBA Name', 'Address', 'City', 'State', 'Zip', 'Inspection Date', 'Results', 'Violations', 'Latitude', 'Longitude', 'DBA_NAME_CLEAN', 'ADDRESS_CLEAN', 'IS_JUNK_STORE', 'IS_REAL_GROCERY', 'HAS_PRODUCE_FLAG', 'geometry', 'index_right', 'Community', 'shape_area', 'area_num_1', 'AreaNumber', 'shape_len', 'IS_REVIEWED_REAL', 'REVIEW_NOTES']
✅ Loaded 4505 rows from clean dataset
📊 Columns in DataFrame: ['DBA Name', 'Address', 'City', 'State', 'Zip', 'Inspection Date', 'Results', 'Violations', 'Latitude', 'Longitude', 'DBA_NAME_CLEAN', 'ADDRESS_CLEAN', 'IS_JUNK_STORE', 'IS_REAL_GROCERY', 'HAS_PRODUCE_FLAG', 'geometry', 'index_right', 'Community', 'shape_area', 'area_num_1', 'AreaNumber', 'shape_len', 'IS_REVIEWED_REAL', 'REVIEW_NOTES']
✅ Map saved to: ../docs/grocery_stores_chicago_map_v1.html
