In [24]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta
import os

# --- CONFIGURATION ---
NUM_DAYS = 90  # Generate 3 months of data
START_DATE = datetime(2025, 1, 1)
PARK_OPEN = 10  # 10 AM
PARK_CLOSE = 20 # 8 PM

# Ride Characteristics (Base Wait Time in minutes)
# "Throughput" helps us calculate queue length later
RIDES = {
    "R_001": {"name": "Mega Coaster", "base_wait": 25, "capacity_per_min": 20},
    "R_002": {"name": "Ferris Wheel", "base_wait": 10, "capacity_per_min": 10},
    "R_003": {"name": "Bumper Cars",  "base_wait": 15, "capacity_per_min": 8},
    "R_004": {"name": "Haunted House", "base_wait": 20, "capacity_per_min": 12}
}

WEATHER_OPTIONS = ["Sunny", "Cloudy", "Rainy"]
WEATHER_WEIGHTS = [0.6, 0.3, 0.1] # 10% chance of rain

In [25]:
# --- SIMULATION FUNCTION ---
def generate_funfair_data():
    data = []
    
    current_date = START_DATE
    for _ in range(NUM_DAYS):
        # Determine day characteristics
        day_of_week = current_date.weekday() # 0=Mon, 6=Sun
        is_weekend = 1 if day_of_week >= 5 else 0
        
        # Determine daily weather (simplification: one weather per day)
        daily_weather = random.choices(WEATHER_OPTIONS, weights=WEATHER_WEIGHTS)[0]
        
        # Loop through operating hours
        for hour in range(PARK_OPEN, PARK_CLOSE):
            
            # --- CALCULATE FACTORS ---
            
            # 1. Time Factor (Bell Curve logic)
            # Traffic builds up until 2-3 PM, then drops
            distance_from_peak = abs(hour - 14) 
            time_factor = max(0.5, 1.5 - (distance_from_peak * 0.1))
            
            # 2. Weekend Factor
            weekend_factor = 1.5 if is_weekend else 1.0
            
            # 3. Weather Factor
            weather_factor = 0.6 if daily_weather == "Rainy" else 1.0
            if daily_weather == "Sunny": weather_factor = 1.1

            # Generate data for each ride
            for ride_id, stats in RIDES.items():
                
                # Calculate "Real" Wait Time based on factors
                base = stats["base_wait"]
                
                # The Formula: Base * Time * Weekend * Weather
                predicted_wait = base * time_factor * weekend_factor * weather_factor
                
                # Add randomness (noise) because real life is messy
                noise = random.randint(-5, 10)
                final_wait_time = max(0, int(predicted_wait + noise))
                
                # Reverse engineer Queue Length
                # Queue = Wait Time * Capacity per minute
                # Add some variance to queue length too
                queue_length = int(final_wait_time * stats["capacity_per_min"] * random.uniform(0.9, 1.1))

                data.append({
                    "timestamp": current_date.replace(hour=hour, minute=0),
                    "ride_id": ride_id,
                    "day_of_week": day_of_week, # 0-6
                    "hour_of_day": hour,
                    "is_weekend": is_weekend,
                    "weather": daily_weather,
                    "queue_length": queue_length, # Simulated sensor data
                    "wait_time_min": final_wait_time # Target variable
                })
        
        current_date += timedelta(days=1)

    return pd.DataFrame(data)

In [26]:
# --- EXECUTION ---
if __name__ == "__main__":
    print("Generating simulation data...")
    df = generate_funfair_data()
    
    # Save to CSV
    output_path = "../data/raw/funfair_data.csv"

    # Adjust path for safety depending on where you run this:
    os.makedirs('../data/raw', exist_ok=True)
    df.to_csv(output_path, index=False)
    
    print(f"Data generated! Shape: {df.shape}")
    print(df.head())
    print("\nStats by Ride:")
    print(df.groupby('ride_id')['wait_time_min'].mean())

Generating simulation data...
Data generated! Shape: (3600, 8)
            timestamp ride_id  day_of_week  hour_of_day  is_weekend weather  \
0 2025-01-01 10:00:00   R_001            2           10           0   Sunny   
1 2025-01-01 10:00:00   R_002            2           10           0   Sunny   
2 2025-01-01 10:00:00   R_003            2           10           0   Sunny   
3 2025-01-01 10:00:00   R_004            2           10           0   Sunny   
4 2025-01-01 11:00:00   R_001            2           11           0   Sunny   

   queue_length  wait_time_min  
0           711             34  
1            77              8  
2           197             24  
3           254             21  
4           838             43  

Stats by Ride:
ride_id
R_001    37.503333
R_002    16.112222
R_003    23.474444
R_004    30.611111
Name: wait_time_min, dtype: float64
