In [1]:
import pandas as pd
import numpy as np
from faker import Faker
import random
from datetime import datetime, timedelta

# Initialize Faker for generating fake data
fake = Faker()

# Set parameters
num_products = 380  # Number of unique SKUs
num_customers = 50  # Number of unique customers

# Define pharmaceutical categories, suppliers, and product types
pharma_categories = ["Prescription Drugs", "Over-the-Counter (OTC)", "Medical Devices", "Vaccines", "Diagnostics"]
pharma_suppliers = ["PharmaCorp", "MediSupply", "HealthPlus", "BioGen", "Cardinal Med"]
pharma_products = ["Pain Reliever", "Antibiotic", "Insulin", "Blood Pressure Monitor", "COVID-19 Vaccine",
                   "Allergy Medication", "Asthma Inhaler", "Glucose Test Strips", "Thermometer", "Antiseptic Cream",
                   "Antiviral Drug", "Sterile Syringe", "Wound Dressing", "Cholesterol Medication", "IV Fluids"]

# Generate product details (SKU, Name, Category, Supplier)
products = []
for i in range(num_products):
    sku = f"SKU-{1000 + i}"
    products.append({
        "SKU": sku,
        "Product_Family_Name": random.choice(pharma_products),
        "Category": random.choice(pharma_categories),
        "Supplier": random.choice(pharma_suppliers),
        "Price_per_Unit (€)": round(random.uniform(5, 500), 2)  # Price range
    })

products_df = pd.DataFrame(products)

# Generate customers
customers = [f"Customer-{str(i+1).zfill(3)}" for i in range(num_customers)]

# Weight customers to create differences in sales volume
high_volume_customers = random.sample(customers, int(num_customers * 0.3))  # 30% of customers
low_volume_customers = list(set(customers) - set(high_volume_customers))  # Remaining 70%

# Generate time series sales and inventory data
start_date = datetime(2018, 1, 1)  # Fixed start date
end_date = datetime.today()
num_days = (end_date - start_date).days  # Calculate days from 2019 to today
sales_data = []

for i in range(num_days):
    current_date = start_date + timedelta(days=i)
    
    for product in products:
        sku = product["SKU"]
        stock_level = max(10, int(np.random.normal(500, 100)))  
        
        # Structured seasonality patterns
        if hash(sku) % 3 == 0:
            seasonal_factor = 1.8 if current_date.month in [1, 2, 3, 10, 11, 12] else 0.6
        elif hash(sku) % 3 == 1:
            seasonal_factor = 1.5 if current_date.month in [4, 5, 6] else 0.8
        else:
            seasonal_factor = 1.2 if current_date.month in [7, 8, 9] else 1.0

        # Demand trend with trend factor
        trend_factor = 1 + (i / num_days) * 0.1  
        base_sales = max(10, int(np.random.normal(50, 10)))
        sales_quantity = int(base_sales * seasonal_factor * trend_factor)
        
        # Assign a customer with variance in sales volume
        if random.random() < 0.7:
            customer_id = random.choice(low_volume_customers)
            sales_quantity = int(sales_quantity * 0.5)  # Low-volume customers buy 50% less
        else:
            customer_id = random.choice(high_volume_customers)
            sales_quantity = int(sales_quantity * 1.5)  # High-volume customers buy 50% more

        # Revenue calculation
        revenue = round(sales_quantity * product["Price_per_Unit (€)"], 2)
        
        # Lead time (simulate supply chain delays)
        lead_time = random.choice([1, 2, 3, 5, 7, 10])

        sales_data.append({
            "Date": current_date.strftime("%Y-%m-%d"),
            "SKU": sku,
            "Customer ID": customer_id,  # ✅ NEW COLUMN
            "Stock Level": stock_level,
            "Sales Quantity": sales_quantity,
            "Revenue (€)": revenue,
            "Lead Time (days)": lead_time
        })

sales_df = pd.DataFrame(sales_data)

# Merge product and sales data
full_dataset = sales_df.merge(products_df, on="SKU")

# Save the dataset as CSV
full_dataset.to_csv("pharmaceutical_supply_chain.csv", index=False)

print("Dataset generated and saved as 'pharmaceutical_supply_chain.csv'")


Dataset generated and saved as 'pharmaceutical_supply_chain.csv'


In [2]:
#pip install faker