In [2]:
import pandas as pd
import os

# Load the curated dataset
df = pd.read_csv("../data/cleaned/grocery_stores_v1_curated.csv")

# Filter for confirmed real stores that are not real grocery stores
real = df[
    (df['IS_REAL_GROCERY'] == True) &
    (df['IS_JUNK_STORE'] == False)
].copy()

# Fill missing Community values
real['Community'] = real['Community'].fillna('Unknown')

# Summary of top real-heavy communities
real_by_community = real['Community'].value_counts().reset_index()
real_by_community.columns = ['Community', 'RealCount']
print("📊 Top real store communities:")
print(real_by_community.head(10))

# Create output directory
output_dir = "../data/real_review/"
os.makedirs(output_dir, exist_ok=True)

# Export CSVs per community
for community in real['Community'].unique():
    community_str = str(community).lower().replace(' ', '_')
    file_path = os.path.join(output_dir, f"real_{community_str}.csv")
    community_df = real[real['Community'] == community]
    community_df.to_csv(file_path, index=False)
    print(f"✅ Saved {len(community_df)} records for '{community}' to {file_path}")


📊 Top real store communities:
         Community  RealCount
0       WEST RIDGE         49
1   SOUTH LAWNDALE         44
2   BELMONT CRAGIN         41
3  NEAR NORTH SIDE         38
4     CHICAGO LAWN         38
5    HUMBOLDT PARK         35
6      SOUTH SHORE         35
7          CHATHAM         35
8        WEST TOWN         32
9      ALBANY PARK         32
✅ Saved 29 records for 'NEAR WEST SIDE' to ../data/real_review/real_near_west_side.csv
✅ Saved 18 records for 'AUSTIN' to ../data/real_review/real_austin.csv
✅ Saved 35 records for 'CHATHAM' to ../data/real_review/real_chatham.csv
✅ Saved 13 records for 'MORGAN PARK' to ../data/real_review/real_morgan_park.csv
✅ Saved 5 records for 'CALUMET HEIGHTS' to ../data/real_review/real_calumet_heights.csv
✅ Saved 10 records for 'GARFIELD RIDGE' to ../data/real_review/real_garfield_ridge.csv
✅ Saved 28 records for 'NEW CITY' to ../data/real_review/real_new_city.csv
✅ Saved 21 records for 'AVONDALE' to ../data/real_review/real_avondale.csv
✅ S