In [1]:
import pandas as pd
import os

# Paths
CLEANED_PATH = "../data/cleaned/grocery_stores_cleaned_v1.csv"
JUNK_REVIEW_PATH = "../data/junk_review/"
OUTPUT_PATH = "../data/cleaned/grocery_stores_cleaned_v2.csv"

# Load base cleaned data
df = pd.read_csv(CLEANED_PATH)

# Load and combine all reviewed junk files
reviewed_files = [
    os.path.join(JUNK_REVIEW_PATH, f) 
    for f in os.listdir(JUNK_REVIEW_PATH) 
    if f.endswith(".csv")
]

reviewed = pd.concat([pd.read_csv(f) for f in reviewed_files], ignore_index=True)

# Clean up column matching
reviewed_subset = reviewed[['Address', 'DBA Name', 'IS_JUNK_STORE', 'REVIEW_NOTES']]

# Merge updated flags into original data
df = df.merge(
    reviewed_subset,
    on=['Address', 'DBA Name'],
    how='left',
    suffixes=('', '_REVIEW')
)

# If reviewed value exists, overwrite
df['IS_JUNK_STORE'] = df['IS_JUNK_STORE_REVIEW'].combine_first(df['IS_JUNK_STORE'])
df['REVIEW_NOTES'] = df['REVIEW_NOTES_REVIEW'].combine_first(df['REVIEW_NOTES'])

# Drop temp columns
df.drop(columns=[col for col in df.columns if col.endswith('_REVIEW')], inplace=True)

# Save updated version
df.to_csv(OUTPUT_PATH, index=False)
print(f"✅ Merged junk review updates saved to {OUTPUT_PATH}")


✅ Merged junk review updates saved to ../data/cleaned/grocery_stores_cleaned_v2.csv
