In [None]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta
from pathlib import Path

# Configurable Parameters
NUM_UNITS = 5000
START_DATE = datetime(2024, 1, 1)
END_DATE = datetime(2025, 3, 31)

DEVICE_IDS = ["DEV_A1", "DEV_B2", "DEV_C3"]
LOCATIONS = ["Plant_1", "Plant_2", "Plant_3"]

# Device-specific ideal cycle times (in seconds)
DEVICE_CYCLE_TIME = {
    "DEV_A1": 1.8,
    "DEV_B2": 2.2,
    "DEV_C3": 2.0
}

ACCEPTANCE_RATE = 0.97  # ~3% defect rate
OUTPUT_PATH = "synthetic_oee_data.xlsx"

# Utility Functions
def generate_unit_id(index: int) -> str:
    return f"PKG2024_{index:06d}"

def generate_random_timestamp(start, end):
    return start + timedelta(seconds=random.randint(0, int((end - start).total_seconds())))

def generate_row(index):
    device = random.choice(DEVICE_IDS)
    ideal_time = DEVICE_CYCLE_TIME[device]
    
    production_time = round(np.random.normal(loc=ideal_time, scale=0.3), 2)
    production_time = max(production_time, 0.5)  # Prevent negatives

    result = "Accepted" if random.random() < ACCEPTANCE_RATE else "Rejected"
    timestamp = generate_random_timestamp(START_DATE, END_DATE)

    return {
        "Unit_ID": generate_unit_id(index),
        "Timestamp": timestamp,
        "Device_ID": device,
        "Location": random.choice(LOCATIONS),
        "Production_Time": production_time,
        "Ideal_Cycle_Time": ideal_time,
        "Result": result
    }

# Generate the dataset
data = [generate_row(i) for i in range(1, NUM_UNITS + 1)]
df = pd.DataFrame(data)

# Sort by timestamp for realism
df.sort_values(by="Timestamp", inplace=True)

# Save to Excel
df.to_excel(OUTPUT_PATH, index=False)
print(f"Synthetic OEE data saved to: {OUTPUT_PATH}")


Synthetic OEE data saved to: synthetic_oee_data.xlsx


In [7]:
df

Unnamed: 0,Unit_ID,Timestamp,Device_ID,Location,Production_Time,Ideal_Cycle_Time,Result
4057,PKG2024_004058,2024-01-01 00:10:04,DEV_A1,Plant_3,1.58,1.8,Accepted
3641,PKG2024_003642,2024-01-01 01:37:56,DEV_A1,Plant_1,1.72,1.8,Accepted
80,PKG2024_000081,2024-01-01 03:24:32,DEV_C3,Plant_3,1.92,2.0,Accepted
4040,PKG2024_004041,2024-01-01 03:47:03,DEV_A1,Plant_3,1.83,1.8,Accepted
197,PKG2024_000198,2024-01-01 03:58:11,DEV_B2,Plant_1,1.97,2.2,Accepted
...,...,...,...,...,...,...,...
1266,PKG2024_001267,2025-03-30 08:02:38,DEV_C3,Plant_1,2.26,2.0,Accepted
2498,PKG2024_002499,2025-03-30 16:13:12,DEV_C3,Plant_1,2.19,2.0,Accepted
1413,PKG2024_001414,2025-03-30 20:04:39,DEV_A1,Plant_2,1.42,1.8,Accepted
4629,PKG2024_004630,2025-03-30 20:35:53,DEV_A1,Plant_3,2.14,1.8,Accepted
