In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# -----------------------------
# PARAMETERS
# -----------------------------
start_date = "2022-01-01"
end_date = "2024-12-31"
stores = ["Store_A", "Store_B", "Store_C"]
weathers = ["Sunny", "Cloudy", "Rainy"]
events = ["None", "Promotion", "Holiday"]

# -----------------------------
# CREATE BASE DATETIME RANGE
# -----------------------------
date_rng = pd.date_range(start=start_date, end=end_date, freq="H")

# -----------------------------
# GENERATE DATA
# -----------------------------
data = []

for store in stores:
    base_traffic = np.random.randint(100, 250)  # base traffic per store
    for dt in date_rng:
        hour = dt.hour
        day_of_week = dt.weekday()

        # daily & hourly patterns
        hour_factor = 1.5 if 10 <= hour <= 18 else 0.6  # peak during business hours
        weekend_factor = 1.3 if day_of_week in [5, 6] else 1.0  # more on weekends

        # weather & event effects
        weather = np.random.choice(weathers, p=[0.6, 0.3, 0.1])
        event = np.random.choice(events, p=[0.85, 0.1, 0.05])

        weather_factor = {
            "Sunny": 1.0,
            "Cloudy": 0.9,
            "Rainy": 0.75
        }[weather]

        event_factor = {
            "None": 1.0,
            "Promotion": 1.4,
            "Holiday": 0.7
        }[event]

        # add some randomness
        visits = base_traffic * hour_factor * weekend_factor * weather_factor * event_factor
        visits += np.random.normal(0, 10)

        data.append([
            dt,
            store,
            round(max(visits, 0)),  # no negative traffic
            weather,
            event
        ])

# -----------------------------
# CREATE DATAFRAME
# -----------------------------
df = pd.DataFrame(data, columns=["Datetime", "Store", "Visits", "Weather", "Event"])

# -----------------------------
# SAVE CSV
# -----------------------------
df.to_csv("traffic_data.csv", index=False)
print("✅ traffic_data.csv generated successfully!")
print(df.head())


  date_rng = pd.date_range(start=start_date, end=end_date, freq="H")


✅ traffic_data.csv generated successfully!
             Datetime    Store  Visits Weather    Event
0 2022-01-01 00:00:00  Store_A      61   Sunny  Holiday
1 2022-01-01 01:00:00  Store_A      76  Cloudy     None
2 2022-01-01 02:00:00  Store_A      96   Sunny     None
3 2022-01-01 03:00:00  Store_A     102   Sunny     None
4 2022-01-01 04:00:00  Store_A      95  Cloudy     None
