In [1]:
import random
import pandas as pd
from faker import Faker
from datetime import timedelta, datetime  # Import datetime

In [2]:
data_path = "./data/"

In [3]:
fake = Faker()

In [4]:
# Configuration

NUM_PLANTS = 10
NUM_LINES = 30
NUM_MACHINES = 50
NUM_OPERATORS = 40
NUM_MATERIALS = 60
NUM_PRODUCTS = 40
NUM_WORKORDERS = 80
NUM_INSPECTIONS = 70

In [5]:
# 1. Generate ManufacturingPlants
plants = []
for i in range(NUM_PLANTS):
    plants.append({
        "id": f"plant_{i}",
        "plantName": fake.company() + " Plant",
        "location": fake.city()
    })

In [6]:
# 2. Generate ProductionLines
# Each line belongs to one plant
lines = []
for i in range(NUM_LINES):
    plant = random.choice(plants)
    lines.append({
        "id": f"line_{i}",
        "lineName": f"Line_{i}_{fake.word()}",
        "capacity": random.randint(50, 500),  # units/day
        "plantID": plant["id"]
    })

In [7]:
# 3. Generate Machines
machine_types = ["Cutting", "Assembly", "Packaging", "Welding", "Pressing"]
machines = []
for i in range(NUM_MACHINES):
    line = random.choice(lines)
    mtype = random.choice(machine_types)
    # maintenance due ~ next 6 months
    due_date = datetime.today() + timedelta(days=random.randint(0, 180))
    machines.append({
        "id": f"machine_{i}",
        "machineName": f"{mtype}Machine_{i}",
        "machineType": mtype,
        "maintenanceDueDate": due_date.isoformat(),
        "lineID": line["id"]
    })

In [8]:
# 4. Generate Operators
skill_levels = ["Beginner", "Intermediate", "Expert"]
operators = []
for i in range(NUM_OPERATORS):
    operators.append({
        "id": f"operator_{i}",
        "operatorName": fake.name(),
        "skillLevel": random.choice(skill_levels),
        "hireDate": fake.date_between(start_date='-5y', end_date='today').isoformat()
    })

# Link Operators to Machines in a many-to-many approach
# We'll store the "operatedBy -> Operator" in a separate structure or as a list
machine_operator_links = []
for m in machines:
    # each machine can have 1-3 operators
    num_ops = random.randint(1, 3)
    assigned_ops = random.sample(operators, k=num_ops)
    # store
    m["operatorIDs"] = [op["id"] for op in assigned_ops]

In [9]:
# 5. Generate Materials
material_types = ["Raw", "Semi-Finished"]
material_names = ["Steel Sheet", "Aluminum Rod", "Plastic Granules", "Wood Plank", "Copper Wire", "Rubber Block", "Glass Pane"]
materials = []
for i in range(NUM_MATERIALS):
    mat_name = random.choice(material_names)
    mat_type = random.choice(material_types)
    unit_cost = round(random.uniform(0.5, 50.0), 2)
    materials.append({
        "id": f"material_{i}",
        "materialName": mat_name,
        "materialType": mat_type,
        "unitCost": unit_cost
    })

In [10]:
# 6. Generate Products
product_names = ["Widget A", "Gadget B", "Assembly X", "Component Y", "Part Z", "Device Q"]
products = []
for i in range(NUM_PRODUCTS):
    p_name = random.choice(product_names)
    sku_val = f"SKU-{random.randint(1000,9999)}"
    price = round(random.uniform(5.0, 200.0), 2)
    products.append({
        "id": f"product_{i}",
        "productName": p_name,
        "sku": sku_val,
        "price": price
    })

In [11]:
# 7. Generate WorkOrders
status_options = ["Scheduled", "In Progress", "Completed", "Cancelled"]
workOrders = []
for i in range(NUM_WORKORDERS):
    wo_num = f"WO-{random.randint(1000,9999)}"
    qty = random.randint(10, 1000)
    start = fake.date_between(start_date='-180d', end_date='today')
    due = start + timedelta(days=random.randint(1, 60))
    w_status = random.choice(status_options)
    # pick a production line
    line = random.choice(lines)
    # pick 0-3 consumed materials
    consumed_mat_ids = []
    mat_count = random.randint(0,3)
    if mat_count > 0:
        consumed_mat_ids = [random.choice(materials)["id"] for _ in range(mat_count)]
    # pick 1 product
    prod = random.choice(products)

    workOrders.append({
        "id": f"wo_{i}",
        "workOrderNumber": wo_num,
        "quantity": qty,
        "startDate": start.isoformat(),
        "dueDate": due.isoformat(),
        "status": w_status,
        "lineID": line["id"],
        "materialIDs": consumed_mat_ids,
        "productID": prod["id"]
    })

In [12]:
# 8. Generate QualityInspections
results = ["Pass", "Fail"]
inspections = []
for i in range(NUM_INSPECTIONS):
    # choose random work order, operator
    wo = random.choice(workOrders)
    op = random.choice(operators)
    date_of_inspection = fake.date_between(start_date=datetime.fromisoformat(wo["startDate"]), end_date=datetime.fromisoformat(wo["dueDate"]))
    insp_result = random.choice(results)
    insp_notes = fake.sentence(nb_words=6)

    inspections.append({
        "id": f"inspection_{i}",
        "inspectionDate": date_of_inspection.isoformat(),
        "result": insp_result,
        "notes": insp_notes,
        "workOrderID": wo["id"],
        "operatorID": op["id"]
    })

In [13]:
# Print Summaries
print("ManufacturingPlants:", len(plants))
print("ProductionLines:", len(lines))
print("Machines:", len(machines))
print("Operators:", len(operators))
print("Materials:", len(materials))
print("Products:", len(products))
print("WorkOrders:", len(workOrders))
print("QualityInspections:", len(inspections), "\n")

ManufacturingPlants: 10
ProductionLines: 30
Machines: 50
Operators: 40
Materials: 60
Products: 40
WorkOrders: 80
QualityInspections: 70 



In [14]:
# Print sample data
print("Sample Plant:", plants[0])
print("Sample ProductionLine:", lines[0])
print("Sample Machine:", machines[0])
print("Sample Operator:", operators[0])
print("Sample Material:", materials[0])
print("Sample Product:", products[0])
print("Sample WorkOrder:", workOrders[0])
print("Sample QualityInspection:", inspections[0])

Sample Plant: {'id': 'plant_0', 'plantName': 'Atkins, Jackson and Rivera Plant', 'location': 'Mollyview'}
Sample ProductionLine: {'id': 'line_0', 'lineName': 'Line_0_room', 'capacity': 223, 'plantID': 'plant_0'}
Sample Machine: {'id': 'machine_0', 'machineName': 'WeldingMachine_0', 'machineType': 'Welding', 'maintenanceDueDate': '2025-07-11T14:50:01.043564', 'lineID': 'line_4', 'operatorIDs': ['operator_28', 'operator_32', 'operator_38']}
Sample Operator: {'id': 'operator_0', 'operatorName': 'Joshua Hoover', 'skillLevel': 'Expert', 'hireDate': '2021-09-29'}
Sample Material: {'id': 'material_0', 'materialName': 'Steel Sheet', 'materialType': 'Semi-Finished', 'unitCost': 47.55}
Sample Product: {'id': 'product_0', 'productName': 'Gadget B', 'sku': 'SKU-5200', 'price': 118.97}
Sample WorkOrder: {'id': 'wo_0', 'workOrderNumber': 'WO-5691', 'quantity': 837, 'startDate': '2024-11-15', 'dueDate': '2024-12-09', 'status': 'In Progress', 'lineID': 'line_13', 'materialIDs': ['material_44'], 'produ

In [15]:
# persist the data
pd.DataFrame(plants).to_csv(data_path+"plants.csv", encoding = "utf-8", escapechar = "\"", index=False)
pd.DataFrame(lines).to_csv(data_path+"lines.csv", encoding = "utf-8", escapechar = "\"", index=False)
pd.DataFrame(machines).to_csv(data_path+"machines.csv", encoding = "utf-8", escapechar = "\"", index=False)
pd.DataFrame(operators).to_csv(data_path+"operators.csv", encoding = "utf-8", escapechar = "\"", index=False)
pd.DataFrame(materials).to_csv(data_path+"materials.csv", encoding = "utf-8", escapechar = "\"", index=False)
pd.DataFrame(products).to_csv(data_path+"products.csv", encoding = "utf-8", escapechar = "\"", index=False)
pd.DataFrame(workOrders).to_csv(data_path+"work_orders.csv", encoding = "utf-8", escapechar = "\"", index=False)
pd.DataFrame(inspections).to_csv(data_path+"inspections.csv", encoding = "utf-8", escapechar = "\"", index=False)