In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Configuration
np.random.seed(42)
n_rows = 100000
equipment_list = [f"Pasteurizer_{i}" for i in range(1,6)] + [f"Homogenizer_{i}" for i in range(1,4)] + [f"Filler_{i}" for i in range(1,3)]

# Generate synthetic data
data = []
start_date = datetime(2023, 1, 1)

for i in range(n_rows):
    # Timestamp (hourly intervals)
    timestamp = start_date + timedelta(hours=i)
    
    # Equipment selection
    equipment = np.random.choice(equipment_list)
    
    # SKU Changeover (5% probability)
    sku_changeover = np.random.choice([True, False], p=[0.05, 0.95])
    
    # Base sensor values
    vib_x = np.random.normal(0.1, 0.03)
    vib_y = np.random.normal(0.08, 0.02)
    vib_z = np.random.normal(0.06, 0.01)
    temp = np.random.normal(72, 5)
    pressure = np.random.normal(45, 3)
    current = np.random.normal(13, 1)
    
    # SKU Changeover effects
    if sku_changeover:
        vib_z += np.random.uniform(1.5, 2.0) # CIP vibration spike
        temp += np.random.uniform(15, 20) # Thermal sanitization
        wear_increment = np.random.uniform(3, 5)
    else:
        wear_increment = np.random.uniform(0.1, 0.3)
    
    # Cumulative wear (0-100%)
    wear_level = min(100, data[-1][8] + wear_increment) if i > 0 else wear_increment
    
    # Failure modes
    failure_modes = ["none", "bearing_wear", "seal_leak", "motor_fault"]
    weights = [0.85, 0.05, 0.05, 0.05]
    
    # Trigger failures based on conditions
    if wear_level > 80:
        failure = np.random.choice(failure_modes, p=[0.2, 0.3, 0.3, 0.2])
    elif vib_x > 0.25:
        failure = "bearing_wear"
    elif pressure < 35:
        failure = "seal_leak"
    elif current > 16:
        failure = "motor_fault"
    else:
        failure = np.random.choice(failure_modes, p=weights)
    
    # Maintenance reset (random 1% chance)
    if np.random.random() < 0.01:
        wear_level = 0
    
    data.append([
        timestamp,
        equipment,
        sku_changeover,
        max(0, vib_x), # No negative vibrations
        max(0, vib_y),
        max(0, vib_z),
        temp,
        pressure,
        current,
        round(wear_level, 1),
        failure
    ])

# Create DataFrame
df = pd.DataFrame(data, columns=[
    "Timestamp",
    "Equipment_ID",
    "SKU_Changeover",
    "Vibration_X",
    "Vibration_Y",
    "Vibration_Z",
    "Temperature",
    "Pressure",
    "Motor_Current",
    "Wear_Level",
    "Failure_Mode"
])

# Save to Excel
df.to_excel("dairy_predictive_maintenance_data.xlsx", index=False)
print("✅ Dataset generated: dairy_predictive_maintenance_data.xlsx")


✅ Dataset generated: dairy_predictive_maintenance_data.xlsx
