In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import sys
sys.path.append((Path.cwd().parent/"src").resolve().as_posix())

import settings as s

In [2]:
# ----------------------------
# 1. Suppliers & Purchases
# ----------------------------

np.random.seed(0)

# Suppliers (can keep 4 or expand if needed)
suppliers = ["SteelCorp", "PolyPlast", "BoxMakers", "FastenIt"]

raw_materials = [
    # Metals
    "Steel Rods", "Aluminum Sheets", "Copper Wire", "Brass Fittings", "Iron Ingots",
    "Stainless Steel Pipes", "Zinc Coils", "Titanium Plates", "Nickel Powder", "Lead Blocks",

    # Plastics & Polymers
    "Polyethylene Pellets", "Polypropylene Sheets", "PVC Pipes", "ABS Resin", "Nylon Granules",
    "Polystyrene Foam", "Acrylic Sheets", "Polycarbonate Pellets", "Teflon Sheets", "Epoxy Resin",

    # Chemicals
    "Sulfuric Acid", "Hydrochloric Acid", "Sodium Hydroxide", "Ethanol", "Acetone",
    "Ammonia", "Silicone Oil", "Latex Compound", "Benzene", "Isopropyl Alcohol",

    # Textiles
    "Cotton Yarn", "Polyester Fabric", "Wool Rolls", "Silk Cloth", "Nylon Thread",
    "Canvas Sheets", "Denim Fabric", "Jute Bags", "Velvet Cloth", "Leather Hides",

    # Packaging
    "Cardboard Boxes", "Corrugated Sheets", "Kraft Paper Rolls", "Bubble Wrap", "Shrink Film",
    "Wooden Pallets", "Plastic Crates", "Glass Bottles", "Aluminum Cans", "Plastic Caps",

    # Electronics
    "Silicon Wafers", "Lithium Batteries", "Resistors", "Capacitors", "Diodes",
    "Transistors", "Copper PCBs", "Fiber Optic Cables", "Magnetic Coils", "LED Chips",

    # Wood & Construction
    "Plywood Sheets", "Timber Logs", "Cement Bags", "Sand", "Gravel",
    "Ceramic Tiles", "Glass Sheets", "Clay Bricks", "Concrete Blocks", "Gypsum Boards",

    # Food Industry (if relevant)
    "Wheat Flour", "Corn Starch", "Sugar", "Palm Oil", "Milk Powder",
    "Cocoa Beans", "Coffee Beans", "Soy Protein", "Gelatin Powder", "Salt",

    # Fasteners & Small Parts
    "Bolts", "Nuts", "Screws", "Washers", "Rivets",
    "Springs", "Bearings", "Pins", "Clamps", "Seals"
]

# Number of rows (make it large, e.g., 50k)
n_rows = 1000

# Generate synthetic purchases
purchases_df = pd.DataFrame({
    "po_id": range(1, n_rows + 1),
    "supplier": np.random.choice(suppliers, size=n_rows, ),
    "item_name": np.random.choice(raw_materials, size=n_rows),
    "quantity": np.random.randint(50, 100, size=n_rows),
    "order_date": pd.date_range("2020-01-01", periods=n_rows, freq="h"),
    "delivery_date": pd.date_range("2020-01-05", periods=n_rows, freq="h")
})

purchases_df

Unnamed: 0,po_id,supplier,item_name,quantity,order_date,delivery_date
0,1,SteelCorp,Leather Hides,99,2020-01-01 00:00:00,2020-01-05 00:00:00
1,2,FastenIt,Benzene,62,2020-01-01 01:00:00,2020-01-05 01:00:00
2,3,PolyPlast,Acetone,67,2020-01-01 02:00:00,2020-01-05 02:00:00
3,4,SteelCorp,Teflon Sheets,81,2020-01-01 03:00:00,2020-01-05 03:00:00
4,5,FastenIt,Clay Bricks,60,2020-01-01 04:00:00,2020-01-05 04:00:00
...,...,...,...,...,...,...
995,996,SteelCorp,Silicon Wafers,79,2020-02-11 11:00:00,2020-02-15 11:00:00
996,997,SteelCorp,Nylon Thread,92,2020-02-11 12:00:00,2020-02-15 12:00:00
997,998,BoxMakers,Polypropylene Sheets,77,2020-02-11 13:00:00,2020-02-15 13:00:00
998,999,PolyPlast,Polypropylene Sheets,82,2020-02-11 14:00:00,2020-02-15 14:00:00


In [3]:
purchases_df[purchases_df["item_name"] == "Steel Rods"]

df = purchases_df.groupby("item_name")["quantity"].sum()
df.describe()

count      90.000000
mean      827.966667
std       277.937971
min       275.000000
25%       617.250000
50%       827.000000
75%       987.750000
max      1999.000000
Name: quantity, dtype: float64

In [4]:
# ----------------------------
# 2. Raw Material Inventory
# ----------------------------
inventory_df = pd.DataFrame({
    "item_id": range(1, len(raw_materials) + 1),
    "item_name": raw_materials,
    "category": ["Raw Material"] * len(raw_materials),
    "stock_level": purchases_df.groupby("item_name")["quantity"].sum(),  # current stock
    "reorder_point": np.random.randint(500, 1000, size=len(raw_materials)),  # restock threshold
    "lead_time_days": np.random.randint(5, 15, size=len(raw_materials)),   # days to restock
    "supplier": np.random.choice(suppliers, size=len(raw_materials))       # assigned supplier
})

inventory_df

Unnamed: 0_level_0,item_id,item_name,category,stock_level,reorder_point,lead_time_days,supplier
item_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
ABS Resin,1,Steel Rods,Raw Material,533,925,13,PolyPlast
Acetone,2,Aluminum Sheets,Raw Material,674,710,7,FastenIt
Acrylic Sheets,3,Copper Wire,Raw Material,981,913,14,PolyPlast
Aluminum Cans,4,Brass Fittings,Raw Material,989,897,10,PolyPlast
Aluminum Sheets,5,Iron Ingots,Raw Material,866,998,6,BoxMakers
...,...,...,...,...,...,...,...
Washers,86,Springs,Raw Material,914,820,10,SteelCorp
Wheat Flour,87,Bearings,Raw Material,618,537,14,FastenIt
Wooden Pallets,88,Pins,Raw Material,740,667,7,FastenIt
Wool Rolls,89,Clamps,Raw Material,928,512,5,PolyPlast


In [5]:
# ----------------------------
# 3. Bill of Materials (BOM)
# ----------------------------
finished_products = ["Car Frame", "Plastic Bottle", "Furniture Panel", "Engine Block", "Packaged Kit"]

# Each product linked to 3 raw materials
bom_data = []
for product in finished_products:
    chosen_raws = np.random.choice(raw_materials, size=3, replace=False)  # pick 3 unique raw materials
    for raw in chosen_raws:
        bom_data.append({
            "product_name": product,
            "raw_material": raw,
            "quantity_required": np.random.randint(2, 10)
        })

bom_df = pd.DataFrame(bom_data)
bom_df


Unnamed: 0,product_name,raw_material,quantity_required
0,Car Frame,Sand,5
1,Car Frame,Cardboard Boxes,7
2,Car Frame,Cocoa Beans,8
3,Plastic Bottle,Diodes,8
4,Plastic Bottle,Clay Bricks,2
5,Plastic Bottle,Cement Bags,3
6,Furniture Panel,Corn Starch,8
7,Furniture Panel,Nylon Granules,5
8,Furniture Panel,Cocoa Beans,9
9,Engine Block,Acetone,9


In [6]:
# ----------------------------
# 4. Production Schedule
# ----------------------------
production_schedule_df = pd.DataFrame({
    "prod_id": range(1, 11),
    "product_name": np.random.choice(finished_products, size=10),
    "planned_start_date": pd.date_range("2025-09-01", periods=10, freq="2D"),
    "planned_end_date": pd.date_range("2025-09-05", periods=10, freq="2D"),
    "quantity": np.random.randint(50, 200, size=10),
})

production_schedule_df["actual_end_date"] = production_schedule_df["planned_end_date"] + pd.to_timedelta(
    np.random.choice([0, 1, 2, 3], size=10), unit="D"
)

production_schedule_df

Unnamed: 0,prod_id,product_name,planned_start_date,planned_end_date,quantity,actual_end_date
0,1,Engine Block,2025-09-01,2025-09-05,124,2025-09-05
1,2,Car Frame,2025-09-03,2025-09-07,185,2025-09-08
2,3,Plastic Bottle,2025-09-05,2025-09-09,79,2025-09-09
3,4,Packaged Kit,2025-09-07,2025-09-11,150,2025-09-11
4,5,Car Frame,2025-09-09,2025-09-13,132,2025-09-14
5,6,Packaged Kit,2025-09-11,2025-09-15,97,2025-09-15
6,7,Engine Block,2025-09-13,2025-09-17,113,2025-09-20
7,8,Plastic Bottle,2025-09-15,2025-09-19,173,2025-09-19
8,9,Packaged Kit,2025-09-17,2025-09-21,91,2025-09-22
9,10,Car Frame,2025-09-19,2025-09-23,79,2025-09-26


In [7]:
# ----------------------------
# 5. Finished Goods Inventory
# ----------------------------
finished_goods_inventory_df = pd.DataFrame({
    "fg_id": range(1, len(finished_products) + 1),
    "product_name": finished_products,
    "stock_level": np.random.randint(50, 500, size=len(finished_products)),
    "reorder_point": np.random.randint(50, 150, size=len(finished_products)),
})

finished_goods_inventory_df


Unnamed: 0,fg_id,product_name,stock_level,reorder_point
0,1,Car Frame,204,137
1,2,Plastic Bottle,106,132
2,3,Furniture Panel,362,55
3,4,Engine Block,334,115
4,5,Packaged Kit,479,135


In [8]:
# Save each dataframe
purchases_df.to_csv(s.data_root_path / "purchases.csv", index=False)
inventory_df.to_csv(s.data_root_path / "raw_material_inventory.csv", index=False)
bom_df.to_csv(s.data_root_path / "bill_of_materials.csv", index=False)
production_schedule_df.to_csv(s.data_root_path / "production_schedule.csv", index=False)
finished_goods_inventory_df.to_csv(s.data_root_path / "finished_goods_inventory.csv", index=False)

print(f"All files saved in {s.data_root_path.resolve()}")


All files saved in /workspaces/TCS_GenAI_Hackaton/data
