In [3]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import seaborn as sns

def create_dataset(n_samples=20000):
    np.random.seed(42)
    
    start_date = datetime(2023, 1, 1)
    
    products = [
        {"name": "Milk", "price": 2.5, "shelf_life": 14, "lead_time_range": (1, 3), "base_sales_range": (500, 1000)},
        {"name": "Bread", "price": 3, "shelf_life": 7, "lead_time_range": (1, 2), "base_sales_range": (400, 800)},
        {"name": "Eggs", "price": 4, "shelf_life": 30, "lead_time_range": (2, 5), "base_sales_range": (300, 600)},
        {"name": "Cheese", "price": 6, "shelf_life": 60, "lead_time_range": (3, 7), "base_sales_range": (200, 400)},
        {"name": "Yogurt", "price": 3.5, "shelf_life": 21, "lead_time_range": (2, 4), "base_sales_range": (350, 700)},
        {"name": "Chicken", "price": 8, "shelf_life": 5, "lead_time_range": (1, 3), "base_sales_range": (250, 500)},
        {"name": "Beef", "price": 12, "shelf_life": 7, "lead_time_range": (2, 5), "base_sales_range": (150, 300)},
        {"name": "Apples", "price": 2, "shelf_life": 14, "lead_time_range": (3, 7), "base_sales_range": (400, 800)},
        {"name": "Bananas", "price": 1.5, "shelf_life": 7, "lead_time_range": (3, 6), "base_sales_range": (500, 1000)},
        {"name": "Tomatoes", "price": 3, "shelf_life": 10, "lead_time_range": (2, 4), "base_sales_range": (300, 600)},
        {"name": "Potatoes", "price": 2.5, "shelf_life": 30, "lead_time_range": (3, 7), "base_sales_range": (400, 800)},
        {"name": "Onions", "price": 2, "shelf_life": 60, "lead_time_range": (3, 7), "base_sales_range": (300, 600)},
        {"name": "Pasta", "price": 3.5, "shelf_life": 365, "lead_time_range": (5, 10), "base_sales_range": (200, 400)},
        {"name": "Rice", "price": 4, "shelf_life": 365, "lead_time_range": (5, 10), "base_sales_range": (200, 400)},
        {"name": "Cereal", "price": 5, "shelf_life": 180, "lead_time_range": (4, 8), "base_sales_range": (250, 500)},
        {"name": "Juice", "price": 4, "shelf_life": 30, "lead_time_range": (3, 6), "base_sales_range": (300, 600)},
        {"name": "Soda", "price": 3, "shelf_life": 180, "lead_time_range": (4, 8), "base_sales_range": (400, 800)},
        {"name": "Chips", "price": 3.5, "shelf_life": 90, "lead_time_range": (3, 7), "base_sales_range": (300, 600)},
        {"name": "Cookies", "price": 4, "shelf_life": 120, "lead_time_range": (4, 8), "base_sales_range": (200, 400)},
        {"name": "Ice Cream", "price": 6, "shelf_life": 180, "lead_time_range": (2, 5), "base_sales_range": (150, 300)},
    ]    
    data = {
        'OrderDate': [start_date + timedelta(days=i) for i in range(n_samples)],
        'Product': [],
        'Price': [],
        'ExpirationDate': [],
        'LeadTime': [],
        'StockOnHand': [],
        'Count': [],
        'CapitalRecord': np.random.uniform(100000, 1000000, n_samples),
        'WeeklySales': [],
        'OrderToReceiveTime': [],
        'HoldingCost': [],
        'Seasonality': [],
        'PromotionActive': np.random.choice([0, 1], n_samples, p=[0.8, 0.2]),
    }
    
    product_probabilities = np.array([0.1, 0.1, 0.08, 0.05, 0.07, 0.06, 0.04, 0.06, 0.07, 0.05,
                                      0.05, 0.04, 0.03, 0.03, 0.04, 0.05, 0.05, 0.04, 0.03, 0.02])
    product_probabilities /= product_probabilities.sum()
    
    for i in range(n_samples):
        product = np.random.choice(products, p=product_probabilities)
        data['Product'].append(product["name"])
        data['Price'].append(product["price"])
        data['ExpirationDate'].append(data['OrderDate'][i] + timedelta(days=product["shelf_life"]))
        data['LeadTime'].append(np.random.randint(*product["lead_time_range"]))
        
        month = data['OrderDate'][i].month
        seasonality_factor = 1.3 if month in [12, 1, 2] else 1.1 if month in [3, 4, 5] else 0.9 if month in [6, 7, 8] else 1.0
        seasonality = 'Winter' if seasonality_factor == 1.3 else 'Spring' if seasonality_factor == 1.1 else 'Summer' if seasonality_factor == 0.9 else 'Fall'
        data['Seasonality'].append(seasonality)
        
        base_sales = np.random.randint(*product["base_sales_range"])
        promotion_factor = 1.5 if data['PromotionActive'][i] else 1
        weekly_sales = int(base_sales * seasonality_factor * promotion_factor)
        daily_sales = int(weekly_sales / 7)
        
        data['WeeklySales'].append(weekly_sales)
        data['Count'].append(daily_sales)
        
        if i == 0:
            data['StockOnHand'].append(int(weekly_sales * 1.5))
        else:
            previous_stock = data['StockOnHand'][i-1]
            new_stock = max(0, previous_stock - data['Count'][i-1])
            if new_stock < daily_sales * 3:
                new_stock += weekly_sales
            data['StockOnHand'].append(new_stock)
        
        data['OrderToReceiveTime'].append(data['LeadTime'][i] + np.random.randint(0, 2))
        data['HoldingCost'].append(0.1 * product["price"])
    
    df = pd.DataFrame(data)
    df['DaysUntilExpiration'] = (df['ExpirationDate'] - df['OrderDate']).dt.days
    df['DailySales'] = df['WeeklySales'] / 7
    df['InventoryTurnoverRatio'] = df['WeeklySales'] / df['StockOnHand']
    df['StockCoverDays'] = df['StockOnHand'] / df['DailySales']
    df['TotalInventoryCost'] = df['StockOnHand'] * df['HoldingCost']
    
    df = df.round({
        'WeeklySales': 0, 
        'StockOnHand': 0, 
        'DailySales': 2, 
        'InventoryTurnoverRatio': 2, 
        'StockCoverDays': 2, 
        'TotalInventoryCost': 2
    })
    
    return df

def calculate_optimal_order_time(row):
    safety_stock_days = 3
    reorder_point = round(row['DailySales'] * (row['LeadTime'] + safety_stock_days))
    days_until_reorder = round((row['StockOnHand'] - reorder_point) / row['DailySales'])
    return max(0, days_until_reorder)

df = create_dataset()

df['OptimalOrderTime'] = df.apply(calculate_optimal_order_time, axis=1)
df['OptimalOrderDate'] = df['OrderDate'] + pd.to_timedelta(df['OptimalOrderTime'], unit='D')
df['Month'] = df['OrderDate'].dt.month
df['DayOfWeek'] = df['OrderDate'].dt.dayofweek
df['IsWeekend'] = df['DayOfWeek'].isin([5, 6])

df.to_csv('inventory_dataset_noisy_v3.csv', index=False)
print("Dataset saved as 'inventory_dataset_noisy_v3.csv'")


Dataset saved as 'inventory_dataset5.csv'


In [4]:
import pandas as pd
import numpy as np

# بارگذاری دیتاست
df = pd.read_csv('inventory_dataset_noisy_v3')

# تبدیل ستون‌های OrderDate و OptimalOrderDate به نوع datetime
df['OrderDate'] = pd.to_datetime(df['OrderDate'], errors='coerce')
df['OptimalOrderDate'] = pd.to_datetime(df['OptimalOrderDate'], errors='coerce')

# اضافه کردن نویز به ویژگی‌های کلیدی برای ایجاد تنوع در داده‌ها
np.random.seed(42)  # برای اطمینان از تکرارپذیری نتایج

df['LeadTime'] += np.random.normal(0, 1, len(df))  # نویز به زمان دریافت کالا
df['StockOnHand'] += np.random.normal(0, 50, len(df))  # نویز به موجودی فعلی
df['WeeklySales'] += np.random.normal(0, 50, len(df))  # نویز به فروش هفتگی

# ذخیره‌سازی دیتاست تغییر یافته
df.to_csv('inventory_dataset_noisy_v3.csv', index=False)
print("دیتاست با نویز اضافه شده ذخیره شد.")


دیتاست با نویز اضافه شده ذخیره شد.
