In [1]:
import pandas as pd
import os

# Load the curated dataset
df = pd.read_csv("../data/cleaned/grocery_stores_v1_curated.csv")

# Filter for confirmed junk stores that are not real grocery stores
junk = df[
    (df['IS_REAL_GROCERY'] == False) &
    (df['IS_JUNK_STORE'] == True)
].copy()

# Fill missing Community values
junk['Community'] = junk['Community'].fillna('Unknown')

# Summary of top junk-heavy communities
junk_by_community = junk['Community'].value_counts().reset_index()
junk_by_community.columns = ['Community', 'JunkCount']
print("📊 Top junk store communities:")
print(junk_by_community.head(10))

# Create output directory
output_dir = "../data/junk_review/"
os.makedirs(output_dir, exist_ok=True)

# Export CSVs per community
for community in junk['Community'].unique():
    community_str = str(community).lower().replace(' ', '_')
    file_path = os.path.join(output_dir, f"junk_{community_str}.csv")
    community_df = junk[junk['Community'] == community]
    community_df.to_csv(file_path, index=False)
    print(f"✅ Saved {len(community_df)} records for '{community}' to {file_path}")


📊 Top junk store communities:
         Community  JunkCount
0           AUSTIN        100
1             LOOP         85
2     LOGAN SQUARE         52
3  NEAR NORTH SIDE         49
4        LAKE VIEW         48
5   SOUTH LAWNDALE         46
6        ENGLEWOOD         45
7   AUBURN GRESHAM         44
8        WEST TOWN         41
9   BELMONT CRAGIN         39
✅ Saved 35 records for 'NORTH LAWNDALE' to ../data/junk_review/junk_north_lawndale.csv
✅ Saved 29 records for 'NEW CITY' to ../data/junk_review/junk_new_city.csv
✅ Saved 37 records for 'CHATHAM' to ../data/junk_review/junk_chatham.csv
✅ Saved 39 records for 'CHICAGO LAWN' to ../data/junk_review/junk_chicago_lawn.csv
✅ Saved 8 records for 'WASHINGTON PARK' to ../data/junk_review/junk_washington_park.csv
✅ Saved 16 records for 'WOODLAWN' to ../data/junk_review/junk_woodlawn.csv
✅ Saved 32 records for 'ROGERS PARK' to ../data/junk_review/junk_rogers_park.csv
✅ Saved 100 records for 'AUSTIN' to ../data/junk_review/junk_austin.csv
✅ Save