## Synthetic supply chain dataset (1000 products)

The following cells generate comprehensive, realistic datasets for a simple inventory/replenishment scenario you can import into MongoDB later. It creates four collections for the date range 2024-01-01 to 2024-03-31 (90 days):

- products: 1000 products with category, pricing, UOM, lead time, safety stock, reorder policy, etc.
- daily_demand: daily demand per product for 90 days with weekday seasonality.
- inventory_levels: simulated end-of-day on-hand inventory levels following a simple reorder policy.
- reorder_recommendations: monthly recommendations with reorder_point and recommended_order_qty.

Output format:
- JSON Lines files (one JSON document per line), suitable for mongoimport or PyMongo bulk insert.
- Files written to ../data/supply_chain/*.jsonl relative to this notebook.

Notes:
- A fixed random seed is used for reproducibility.
- Quantities and prices are synthetic but follow reasonable distributions.
- The inventory simulation assumes backorders are not accumulated (inventory does not go negative) and uses a basic reorder policy with lead time.

In [14]:
# Synthetic dataset generator for supply chain scenario (1000 products)
from __future__ import annotations
import os
import json
import math
import random
from datetime import datetime, timedelta
from dataclasses import dataclass, asdict
from typing import List, Dict, Tuple

random.seed(42)

DATA_DIR = os.path.abspath(os.path.join(os.getcwd(), "data", "inventory"))
print(DATA_DIR)
os.makedirs(DATA_DIR, exist_ok=True)

START_DATE = datetime(2024, 1, 1)
END_DATE = datetime(2024, 3, 31)  # inclusive
DAYS = (END_DATE - START_DATE).days + 1
N_PRODUCTS = 1000

CATEGORIES = [
    ("Electronics", 0.20),
    ("Home & Kitchen", 0.20),
    ("Grocery", 0.15),
    ("Health & Beauty", 0.15),
    ("Sports", 0.10),
    ("Toys", 0.10),
    ("Office", 0.10),
]
UOMS = ["each", "pack", "box", "case"]

@dataclass
class Product:
    _id: str
    name: str
    category: str
    price: float
    uom: str
    lead_time_days: int
    safety_stock: int
    reorder_policy: str  # e.g., "ROP" (reorder point)
    reorder_multiplier: float  # multiplier for average daily demand * lead time


def choose_weighted(options: List[Tuple[str, float]]) -> str:
    r = random.random()
    cum = 0.0
    for value, weight in options:
        cum += weight
        if r <= cum:
            return value
    return options[-1][0]


def gen_products(n: int) -> List[Product]:
    products: List[Product] = []
    for i in range(n):
        pid = f"product{i+1:04d}"
        cat = choose_weighted(CATEGORIES)
        # price by category with variance
        base_price = {
            "Electronics": 120.0,
            "Home & Kitchen": 35.0,
            "Grocery": 5.0,
            "Health & Beauty": 18.0,
            "Sports": 40.0,
            "Toys": 22.0,
            "Office": 15.0,
        }[cat]
        price = max(0.5, random.gauss(mu=base_price, sigma=base_price * 0.25))
        uom = random.choice(UOMS)
        lead_time = max(2, int(random.gauss(mu=7, sigma=3)))  # 2–20 typical
        if lead_time > 20:
            lead_time = 20
        safety_stock = max(5, int(abs(random.gauss(mu=10, sigma=6))))
        reorder_policy = "ROP"
        reorder_multiplier = round(random.uniform(1.0, 2.5), 2)
        products.append(Product(
            _id=pid,
            name=f"Product {i+1}",
            category=cat,
            price=round(price, 2),
            uom=uom,
            lead_time_days=lead_time,
            safety_stock=safety_stock,
            reorder_policy=reorder_policy,
            reorder_multiplier=reorder_multiplier,
        ))
    return products


def daterange(start: datetime, end: datetime):
    d = start
    while d <= end:
        yield d
        d += timedelta(days=1)


def weekday_seasonality_factor(d: datetime) -> float:
    # Higher demand on Mon/Thu, lower on weekend
    wd = d.weekday()  # 0=Mon .. 6=Sun
    if wd in (0, 3):
        return 1.15
    if wd in (5, 6):
        return 0.85
    return 1.0


def gen_daily_demand(products: List[Product]) -> List[Dict]:
    docs = []
    for p in products:
        # base mean daily demand driven by category
        base_mu = {
            "Electronics": 2.0,
            "Home & Kitchen": 5.0,
            "Grocery": 12.0,
            "Health & Beauty": 6.0,
            "Sports": 4.0,
            "Toys": 5.0,
            "Office": 7.0,
        }[p.category]
        for d in daterange(START_DATE, END_DATE):
            mu = base_mu * weekday_seasonality_factor(d)
            # Use Poisson-like integer demand with noise
            demand = max(0, int(random.gauss(mu=mu, sigma=max(1.0, mu * 0.35))))
            docs.append({
                "product_id": p._id,
                "date": d.strftime("%Y-%m-%d"),
                "demand": demand,
            })
    return docs


def simulate_inventory(products: List[Product], daily_demand_docs: List[Dict]) -> Tuple[List[Dict], List[Dict]]:
    # Organize daily demand by product and date for fast lookup
    demand_by_prod_date: Dict[str, Dict[str, int]] = {}
    for doc in daily_demand_docs:
        demand_by_prod_date.setdefault(doc["product_id"], {})[doc["date"]] = doc["demand"]

    inventory_docs: List[Dict] = []
    rec_docs: List[Dict] = []

    for p in products:
        # Initial inventory ~ lead time coverage + safety stock
        avg_mu = sum(demand_by_prod_date[p._id].values()) / DAYS
        initial_inventory = int(max(20, avg_mu * p.lead_time_days * 1.25 + p.safety_stock))

        on_hand = initial_inventory
        pipeline: List[Tuple[str, int]] = []  # list of (arrival_date_str, qty)

        # monthly recommendation aggregation helpers
        month_demand: Dict[str, int] = {}

        for d in daterange(START_DATE, END_DATE):
            ds = d.strftime("%Y-%m-%d")
            # Receive any due orders
            arrivals_today = [q for (ad, q) in pipeline if ad == ds]
            if arrivals_today:
                on_hand += sum(arrivals_today)
            # Remove delivered orders from pipeline
            pipeline = [(ad, q) for (ad, q) in pipeline if ad != ds]

            dem = demand_by_prod_date[p._id].get(ds, 0)
            shipped = min(on_hand, dem)
            on_hand -= shipped

            # Record end-of-day inventory
            inventory_docs.append({
                "product_id": p._id,
                "date": ds,
                "inventory_level": on_hand,
            })

            # Accumulate monthly demand
            month_key = d.strftime("%Y-%m")
            month_demand[month_key] = month_demand.get(month_key, 0) + dem

            # Simple reorder point policy
            reorder_point = int(math.ceil(avg_mu * p.lead_time_days * p.reorder_multiplier + p.safety_stock))
            target_level = int(math.ceil(avg_mu * (p.lead_time_days + 7)))  # aim for a week beyond lead time

            if on_hand <= reorder_point:
                order_qty = max(0, target_level - on_hand)
                if order_qty > 0:
                    arrival_date = d + timedelta(days=p.lead_time_days)
                    pipeline.append((arrival_date.strftime("%Y-%m-%d"), order_qty))

        # Create monthly recommendations for months present in range
        for month in sorted(month_demand.keys()):
            # Compute avg daily demand in that month
            year, m = map(int, month.split("-"))
            # days in month in our window
            md_start = max(START_DATE, datetime(year, m, 1))
            if m == 12:
                md_end = min(END_DATE, datetime(year + 1, 1, 1) - timedelta(days=1))
            else:
                md_end = min(END_DATE, datetime(year, m + 1, 1) - timedelta(days=1))
            md_days = (md_end - md_start).days + 1
            month_avg = month_demand[month] / md_days

            rop = int(math.ceil(month_avg * p.lead_time_days * p.reorder_multiplier + p.safety_stock))
            rec_qty = int(max(0, math.ceil(month_avg * (p.lead_time_days + 14))))

            rec_docs.append({
                "product_id": p._id,
                "date": f"{month}-01",
                "recommended_order_qty": rec_qty,
                "reorder_point": rop,
            })

    return inventory_docs, rec_docs


products = gen_products(N_PRODUCTS)
daily_demand_docs = gen_daily_demand(products)
inventory_docs, recommendation_docs = simulate_inventory(products, daily_demand_docs)

# Preview
print(f"Products: {len(products)} | Daily demand docs: {len(daily_demand_docs)} | Inventory docs: {len(inventory_docs)} | Recs: {len(recommendation_docs)}")

/Applications/RF/NTU/SCTP in DSAI/Inventory_Optimization_Dashboard/data/inventory
Products: 1000 | Daily demand docs: 91000 | Inventory docs: 91000 | Recs: 3000
Products: 1000 | Daily demand docs: 91000 | Inventory docs: 91000 | Recs: 3000


In [15]:
# Write JSONL files for Mongo import
files = {
    "products": os.path.join(DATA_DIR, "products.jsonl"),
    "daily_demand": os.path.join(DATA_DIR, "daily_demand.jsonl"),
    "inventory_levels": os.path.join(DATA_DIR, "inventory_levels.jsonl"),
    "reorder_recommendations": os.path.join(DATA_DIR, "reorder_recommendations.jsonl"),
}

with open(files["products"], "w") as f:
    for p in products:
        f.write(json.dumps(asdict(p)) + "\n")

with open(files["daily_demand"], "w") as f:
    for doc in daily_demand_docs:
        f.write(json.dumps(doc) + "\n")

with open(files["inventory_levels"], "w") as f:
    for doc in inventory_docs:
        f.write(json.dumps(doc) + "\n")

with open(files["reorder_recommendations"], "w") as f:
    for doc in recommendation_docs:
        f.write(json.dumps(doc) + "\n")

print("Written:")
for k, pth in files.items():
    size_mb = os.path.getsize(pth) / (1024 * 1024)
    print(f" - {k}: {pth} ({size_mb:.2f} MB)")

Written:
 - products: /Applications/RF/NTU/SCTP in DSAI/Inventory_Optimization_Dashboard/data/inventory/products.jsonl (0.19 MB)
 - daily_demand: /Applications/RF/NTU/SCTP in DSAI/Inventory_Optimization_Dashboard/data/inventory/daily_demand.jsonl (5.65 MB)
 - inventory_levels: /Applications/RF/NTU/SCTP in DSAI/Inventory_Optimization_Dashboard/data/inventory/inventory_levels.jsonl (6.55 MB)
 - reorder_recommendations: /Applications/RF/NTU/SCTP in DSAI/Inventory_Optimization_Dashboard/data/inventory/reorder_recommendations.jsonl (0.29 MB)


In [16]:
# (Optional) Quick import via PyMongo from this notebook
# Ensure you have a running MongoDB or Atlas URI in MONGO_URI
import os
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi

# Load env vars from .env if available (dev only)
try:
    from dotenv import load_dotenv
    load_dotenv()
except Exception:
    pass

uri = os.environ.get("MONGO_URI")
if not uri:
    raise ValueError("MONGO_URI is not set. Create a .env from .env.example or set it in the environment.")
DB_NAME = os.environ.get("MONGO_DB", "inventory_demo")

# Create a new client and connect to the server
client = MongoClient(uri, server_api=ServerApi('1'))

# Send a ping to confirm a successful connection
try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)


db = client[DB_NAME]

# Drop existing collections (optional)
for col in ["products", "daily_demand", "inventory_levels", "reorder_recommendations"]:
    if col in db.list_collection_names():
        db[col].drop()

# Load from JSONL files
import json

def load_jsonl(path):
    with open(path, "r") as f:
        for line in f:
            if line.strip():
                yield json.loads(line)

insert_counts = {}
for name, path in files.items():
    bulk = list(load_jsonl(path))
    if bulk:
        db[name].insert_many(bulk)
        insert_counts[name] = len(bulk)

print("Imported counts:")
for k, v in insert_counts.items():
    print(f" - {k}: {v}")

# Create helpful indexes
from pymongo import ASCENDING

db["daily_demand"].create_index([("product_id", ASCENDING), ("date", ASCENDING)])
db["inventory_levels"].create_index([("product_id", ASCENDING), ("date", ASCENDING)])
db["reorder_recommendations"].create_index([("product_id", ASCENDING), ("date", ASCENDING)])
db["products"].create_index([("category", ASCENDING)])

print("Indexes created.")

Pinged your deployment. You successfully connected to MongoDB!
Imported counts:
 - products: 1000
 - daily_demand: 91000
 - inventory_levels: 91000
 - reorder_recommendations: 3000
Imported counts:
 - products: 1000
 - daily_demand: 91000
 - inventory_levels: 91000
 - reorder_recommendations: 3000
Indexes created.
Indexes created.
