In [2]:
import pandas as pd
import numpy as np

# Set random seed for reproducibility
np.random.seed(42)

# Define parameters
num_products = 5  # Number of unique products
start_date = '2023-01-01'
end_date = '2023-12-31'
date_range = pd.date_range(start=start_date, end=end_date, freq='D')

# Generate synthetic data
data = []

for date in date_range:
    # Randomly decide how many products are sold on this day (between 1 and num_products)
    num_products_sold_today = np.random.randint(1, num_products + 1)
    
    # Randomly select which products are sold today
    products_sold_today = np.random.choice(range(num_products), size=num_products_sold_today, replace=False)
    
    for product_id in products_sold_today:
        # Base quantity sold (random between 10 and 50)
        base_quantity = np.random.randint(10, 50)
        
        # Price (random between 10 and 100)
        price = np.random.uniform(10, 100)
        
        # Promotion (random binary flag, 20% chance of promotion)
        promotion = np.random.choice([0, 1], p=[0.8, 0.2])
        
        # Holiday (random binary flag, 10% chance of holiday)
        holiday = np.random.choice([0, 1], p=[0.9, 0.1])
        
        # Adjust quantity sold based on promotion and holiday
        if promotion:
            base_quantity *= 1.5  # Increase sales during promotions
        if holiday:
            base_quantity *= 1.2  # Increase sales during holidays
        
        # Add some noise to quantity sold
        quantity_sold = int(base_quantity + np.random.normal(0, 5))
        
        # Append to data
        data.append([date, product_id, quantity_sold, price, promotion, holiday])

# Create DataFrame
columns = ['Date', 'Product_ID', 'Quantity_Sold', 'Price', 'Promotion', 'Holiday']
df = pd.DataFrame(data, columns=columns)

# Save to CSV
df.to_csv("synthetic_sales_data_shuffled.csv", index=False)

# Display the first few rows
print(df.head())

        Date  Product_ID  Quantity_Sold      Price  Promotion  Holiday
0 2023-01-01           0             31  24.041678          0        0
1 2023-01-01           1             38  68.579963          0        0
2 2023-01-01           3             40  29.110520          0        0
3 2023-01-01           2             24  36.210623          0        0
4 2023-01-02           0              1  87.394637          0        0
