In [1]:
import pandas as pd

# Load original dataset
df = pd.read_csv("../data/cleaned/grocery_stores_with_community.csv")

# Load updated Austin review
austin_review = pd.read_csv("../data/review/austin_real_grocery_review.csv")

# Strip any trailing spaces from column headers
austin_review.columns = austin_review.columns.str.strip()

# Debug column names
print("✅ Review file columns:", austin_review.columns.tolist())

# Merge the review back into the full dataset
df_updated = df.merge(
    austin_review[["DBA Name", "Address", "Zip", "IS_REVIEWED_REAL", "REVIEW_NOTES"]],
    on=["DBA Name", "Address", "Zip"],
    how="left"
)

# Where reviewed flag exists, override IS_REAL_GROCERY
df_updated["IS_REAL_GROCERY"] = df_updated.apply(
    lambda row: row["IS_REVIEWED_REAL"] if pd.notnull(row["IS_REVIEWED_REAL"]) else row["IS_REAL_GROCERY"],
    axis=1
)

# Save updated dataset
output_path = "../data/cleaned/grocery_stores_v1_curated.csv"
df_updated.to_csv(output_path, index=False)

print(f"✅ Merged Austin review and saved to: {output_path}")


✅ Review file columns: ['DBA Name', 'Address', 'Zip', 'IS_REAL_GROCERY', 'IS_REVIEWED_REAL', 'REVIEW_NOTES']
✅ Merged Austin review and saved to: ../data/cleaned/grocery_stores_v1_curated.csv
