In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

# ------------------------------
# Step 1: Basic setup
# ------------------------------
cities = ["Mumbai", "Delhi", "Bengaluru", "Kolkata", "Chennai"]

# Approximate coordinates for each (for realism)
coords = {
    "Mumbai": (19.0760, 72.8777),
    "Delhi": (28.6139, 77.2090),
    "Bengaluru": (12.9716, 77.5946),
    "Kolkata": (22.5726, 88.3639),
    "Chennai": (13.0827, 80.2707),
}

start_date = datetime(2024, 1, 1)
end_date = datetime(2024, 12, 31)
dates = pd.date_range(start_date, end_date)

# ------------------------------
# Step 2: Simulate restaurant info
# ------------------------------
restaurants = []
restaurant_id = 1
for city in cities:
    for i in range(5):  # 5 restaurants per city
        restaurants.append({"restaurant_id": restaurant_id, "city": city})
        restaurant_id += 1
restaurants_df = pd.DataFrame(restaurants)

# ------------------------------
# Step 3: Generate daily sales, weather, and event data
# ------------------------------
records = []

for _, rest in restaurants_df.iterrows():
    city = rest["city"]
    base_sales = random.randint(15000, 80000)  # ₹15K–₹80K per day typical range
    
    for d in dates:
        # Simulate weather
        month = d.month
        if city == "Mumbai" and month in [6, 7, 8, 9]:  # monsoon
            rainfall = np.random.uniform(10, 80)
            temperature = np.random.uniform(25, 32)
        elif city == "Delhi" and month in [5, 6]:  # hot summer
            rainfall = np.random.uniform(0, 10)
            temperature = np.random.uniform(35, 45)
        else:
            rainfall = np.random.uniform(0, 15)
            temperature = np.random.uniform(22, 34)

        # Simulate events (festivals, cricket matches, etc.)
        festival_days = ["2024-03-25", "2024-08-15", "2024-10-31", "2024-11-01", "2024-11-12"]
        num_events = 1 if str(d.date()) in festival_days else np.random.choice([0, 0, 1], p=[0.6, 0.3, 0.1])

        # Holiday indicator
        holidays = ["2024-01-26", "2024-08-15", "2024-10-02", "2024-11-01", "2024-12-25"]
        is_holiday = 1 if str(d.date()) in holidays else 0

        # Weekends boost sales
        weekday = d.weekday()  # 0=Mon, 6=Sun
        weekend_boost = 1.15 if weekday >= 5 else 1.0

        # Add festival boost
        event_boost = 1.2 if num_events > 0 else 1.0

        sales = base_sales * weekend_boost * event_boost * np.random.uniform(0.9, 1.1)

        records.append({
            "restaurant_id": rest["restaurant_id"],
            "city": city,
            "date": d,
            "sales": round(sales, 2),
            "temperature": round(temperature, 1),
            "rainfall": round(rainfall, 1),
            "num_events": num_events,
            "is_holiday": is_holiday,
            "day_of_week": d.strftime("%A")
        })

data = pd.DataFrame(records)

# ------------------------------
# Step 4: Generate synthetic food waste (in kg)
# ------------------------------
np.random.seed(42)
noise = np.random.normal(0, 2, len(data))  # random variation

data["waste"] = (
    0.1 * (data["sales"] / 100)  # ~10% of sales value in ₹ converted to food weight impact
    + 0.3 * data["rainfall"]
    - 1.5 * data["num_events"]
    + noise
)
data["waste"] = data["waste"].clip(lower=0).round(2)

# ------------------------------
# Step 5: Save dataset
# ------------------------------
data.to_csv("indian_restaurant_waste_dataset.csv", index=False)
print("✅ Dataset saved: indian_restaurant_waste_dataset.csv")
print("Rows:", len(data))
print("Columns:", list(data.columns))


✅ Dataset saved: indian_restaurant_waste_dataset.csv
Rows: 9150
Columns: ['restaurant_id', 'city', 'date', 'sales', 'temperature', 'rainfall', 'num_events', 'is_holiday', 'day_of_week', 'waste']
