In [51]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

In [52]:
class Product:
    def __init__(self, id, base_sales, seasonal_pattern, weekend_effect, 
                 holiday_effect, promotion_effect, competitor_effect, market_sensitivity,
                 initial_inventory, reorder_point, max_inventory, lead_time):
        self.id = id
        self.base_sales = base_sales
        self.seasonal_pattern = seasonal_pattern  # 'summer', 'winter', 'all_year'
        self.weekend_effect = weekend_effect      # multiplier for weekend sales
        self.holiday_effect = holiday_effect      # multiplier for holiday sales
        self.promotion_effect = promotion_effect  # multiplier during promotions
        self.competitor_effect = competitor_effect # sales reduction during competitor promotions
        self.market_sensitivity = market_sensitivity  # how much market index affects sales
        self.inventory = initial_inventory
        self.reorder_point = reorder_point
        self.max_inventory = max_inventory
        self.lead_time = lead_time
        self.pending_orders = []  # [(delivery_date, quantity)]

In [53]:
def generate_realistic_sales_data(start_date, num_days=365):
    # Define products with different characteristics
    products = [
        Product(
            id=1,
            base_sales=100,
            seasonal_pattern='summer',
            weekend_effect=1.3,
            holiday_effect=1.5,
            promotion_effect=1.4,
            competitor_effect=0.8,
            market_sensitivity=0.3,
            initial_inventory=800,
            reorder_point=200,
            max_inventory=1000,
            lead_time=3
        ),
        Product(
            id=2,
            base_sales=80,
            seasonal_pattern='winter',
            weekend_effect=1.1,
            holiday_effect=1.6,
            promotion_effect=1.3,
            competitor_effect=0.9,
            market_sensitivity=0.2,
            initial_inventory=600,
            reorder_point=150,
            max_inventory=800,
            lead_time=5
        ),
        Product(
            id=3,
            base_sales=120,
            seasonal_pattern='all_year',
            weekend_effect=1.4,
            holiday_effect=1.3,
            promotion_effect=1.5,
            competitor_effect=0.7,
            market_sensitivity=0.4,
            initial_inventory=1000,
            reorder_point=300,
            max_inventory=1200,
            lead_time=4
        )
    ]
    
    dates = [(start_date + timedelta(days=x)) for x in range(num_days)]
    data = []
    
    # Generate daily data
    for date in dates:
        # Environmental factors
        is_weekend = date.weekday() >= 5
        is_holiday = is_holiday_date(date)  # You would implement this based on actual holidays
        market_index = generate_market_index(date)  # Generate market sentiment (0-1)
        competitor_promotion = np.random.choice([0, 1], p=[0.85, 0.15])  # 15% chance of competitor promotion
        
        # Calculate season strength (0-1)
        season_strength = calculate_season_strength(date)
        
        for product in products:
            # Process any pending deliveries
            process_pending_orders(product, date)
            
            # Calculate daily sales based on all factors
            daily_sales = calculate_daily_sales(
                product,
                is_weekend,
                is_holiday,
                season_strength,
                market_index,
                competitor_promotion
            )
            
            # Ensure we can't sell more than we have
            daily_sales = min(daily_sales, product.inventory)
            
            # Update inventory
            product.inventory -= daily_sales
            
            # Check if we need to place a new order
            if product.inventory <= product.reorder_point:
                order_quantity = product.max_inventory - product.inventory
                delivery_date = date + timedelta(days=product.lead_time)
                product.pending_orders.append((delivery_date, order_quantity))
            
            # Record daily data
            row = {
                'date': date.strftime('%Y-%m-%d'),
                'product_id': f'PROD_{product.id}',
                'daily_sales': int(daily_sales),
                'inventory_level': product.inventory,
                'is_weekend': int(is_weekend),
                'is_holiday': int(is_holiday),
                'season_strength': season_strength,
                'competitor_promotion': competitor_promotion,
                'market_index': market_index,
                'is_promotion': np.random.choice([0, 1], p=[0.8, 0.2])  # 20% chance of promotion
            }
            data.append(row)
    
    return pd.DataFrame(data)

In [54]:
def calculate_daily_sales(product, is_weekend, is_holiday, season_strength, 
                        market_index, competitor_promotion):
    """Calculate daily sales based on all factors"""
    base = product.base_sales
    
    # Apply seasonal effect
    if product.seasonal_pattern == 'summer':
        seasonal_multiplier = 1 + (season_strength * 0.5)  # Up to 50% boost in summer
    elif product.seasonal_pattern == 'winter':
        seasonal_multiplier = 1 + ((1 - season_strength) * 0.5)  # Up to 50% boost in winter
    else:  # all_year
        seasonal_multiplier = 1 + (abs(0.5 - season_strength) * 0.2)  # Small variation
    
    # Apply other effects
    weekend_multiplier = product.weekend_effect if is_weekend else 1
    holiday_multiplier = product.holiday_effect if is_holiday else 1
    competitor_multiplier = product.competitor_effect if competitor_promotion else 1
    
    # Market effect
    market_multiplier = 1 + (market_index - 0.5) * product.market_sensitivity
    
    # Calculate final sales with some randomness
    sales = base * seasonal_multiplier * weekend_multiplier * holiday_multiplier * \
            competitor_multiplier * market_multiplier
    
    # Add random noise (±10%)
    noise = np.random.uniform(0.9, 1.1)
    sales *= noise
    
    return max(0, int(sales))


In [55]:
def calculate_season_strength(date):
    """Calculate season strength (0=winter, 1=summer) based on day of year"""
    day_of_year = date.timetuple().tm_yday
    return (1 + np.sin((day_of_year / 365) * 2 * np.pi - np.pi/2)) / 2

In [56]:
def generate_market_index(date):
    """Generate market index with some temporal correlation"""
    # This could be enhanced with more sophisticated time series generation
    return max(0, min(1, np.random.normal(0.5, 0.15)))

In [57]:
def process_pending_orders(product, date):
    """Process any pending orders due for delivery"""
    remaining_orders = []
    for delivery_date, quantity in product.pending_orders:
        if date >= delivery_date:
            product.inventory += quantity
        else:
            remaining_orders.append((delivery_date, quantity))
    product.pending_orders = remaining_orders

In [58]:
def is_holiday_date(date):
    """Determine if date is a holiday"""
    # Major holidays - this could be expanded with a proper holiday calendar
    holidays = [
        (1, 1),   # New Year's
        (8, 15),# Independence Day
        (10,19), #diwali
        (3,28), #holi
        (12, 25), # Christmas
        # Add more holidays as needed
    ]
    return (date.month, date.day) in holidays

In [59]:
start_date = datetime(2023, 1, 1)
df = generate_realistic_sales_data(start_date)

In [60]:
print(df.head())

         date product_id  daily_sales  inventory_level  is_weekend  \
0  2023-01-01     PROD_1          183              617           1   
1  2023-01-01     PROD_2          215              385           1   
2  2023-01-01     PROD_3          262              738           1   
3  2023-01-02     PROD_1           95              522           0   
4  2023-01-02     PROD_2          109              276           0   

   is_holiday  season_strength  competitor_promotion  market_index  \
0           1         0.000074                     0      0.484561   
1           1         0.000074                     0      0.484561   
2           1         0.000074                     0      0.484561   
3           0         0.000296                     0      0.314541   
4           0         0.000296                     0      0.314541   

   is_promotion  
0             0  
1             0  
2             0  
3             0  
4             0  


In [61]:
print("\nDataset Statistics:")
print(df.describe())


Dataset Statistics:
       daily_sales  inventory_level   is_weekend   is_holiday  \
count  1095.000000      1095.000000  1095.000000  1095.000000   
mean    116.534247      1539.462100     0.287671     0.013699   
std      38.977311      1061.132283     0.452884     0.116290   
min       0.000000         0.000000     0.000000     0.000000   
25%      95.000000       645.500000     0.000000     0.000000   
50%     118.000000      1491.000000     0.000000     0.000000   
75%     138.000000      2336.500000     1.000000     0.000000   
max     262.000000      4120.000000     1.000000     1.000000   

       season_strength  competitor_promotion  market_index  is_promotion  
count      1095.000000           1095.000000   1095.000000   1095.000000  
mean          0.500000              0.156164      0.500895      0.191781  
std           0.353715              0.363177      0.143459      0.393881  
min           0.000000              0.000000      0.121044      0.000000  
25%           0.14