In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# --- Configuration for Data Generation ---
start_date = datetime(2024, 1, 1)
end_date = datetime(2024, 6, 30)
num_batches_per_day = 5 # Assuming 5 batches per day for hardboard production

# Define typical ranges for parameters
ranges = {
    'Press_Temperature_C': (180, 220), # Degrees Celsius
    'Press_Pressure_psi': (400, 600), # PSI
    'Press_Cycle_Time_min': (8, 15), # Minutes
    'Fiber_Consistency_pct': (85, 95), # Percentage
    'Resin_Content_pct': (8, 12), # Percentage
    'Moisture_Content_PrePress_pct': (5, 8), # Percentage
    'Hardboard_Strength_MPa_Ideal': (40, 55), # MPa (for target)
    'Hardboard_Density_g_cm3_Ideal': (0.85, 0.95), # g/cm3 (for target)
}

# --- Data Generation Logic ---
data = []
current_date = start_date
batch_id_counter = 1

while current_date <= end_date:
    for _ in range(num_batches_per_day):
        batch_id = f"HB_BATCH_{batch_id_counter:04d}"

        # Generate process parameters within realistic ranges
        press_temp = np.random.uniform(*ranges['Press_Temperature_C'])
        press_pressure = np.random.uniform(*ranges['Press_Pressure_psi'])
        cycle_time = np.random.uniform(*ranges['Press_Cycle_Time_min'])
        fiber_consistency = np.random.uniform(*ranges['Fiber_Consistency_pct'])
        resin_content = np.random.uniform(*ranges['Resin_Content_pct'])
        moisture_content = np.random.uniform(*ranges['Moisture_Content_PrePress_pct'])

        # --- Simulate relationships for Quality & Energy ---

        # Simulate Total Defects Count (Negative correlation with temp/pressure, positive with outliers)
        # Introduce some "bad days" or "bad batches"
        defects = 0
        if current_date.month in [2, 5]: # Simulate some challenging months
            defects += np.random.randint(0, 3) # Baseline defects
        if press_temp < 190 or press_temp > 210: # Temp too low/high
            defects += np.random.randint(1, 4)
        if press_pressure < 450 or press_pressure > 550: # Pressure too low/high
            defects += np.random.randint(1, 3)
        if fiber_consistency < 87 or fiber_consistency > 93: # Fiber consistency issues
            defects += np.random.randint(1, 2)
        if resin_content < 9 or resin_content > 11: # Resin content issues
             defects += np.random.randint(1, 2)
        if np.random.rand() < 0.05: # Random spike in defects for 5% of batches
            defects += np.random.randint(5, 10)
        total_defects = max(0, defects + np.random.randint(0, 2)) # Ensure non-negative

        # Simulate specific defect types (simple split for demo)
        defect_surface = 1 if np.random.rand() < (0.05 + total_defects * 0.05) else 0 # More defects -> higher chance
        defect_density_var = 1 if np.random.rand() < (0.03 + total_defects * 0.04) else 0
        defect_warpage = 1 if np.random.rand() < (0.02 + total_defects * 0.03) else 0

        # Simulate Hardboard Strength (Positive correlation with temp/pressure/resin, negative with defects)
        strength = np.mean(ranges['Hardboard_Strength_MPa_Ideal']) + \
                   (press_temp - 200) * 0.1 + \
                   (press_pressure - 500) * 0.01 + \
                   (resin_content - 10) * 0.5 - \
                   (total_defects * 0.5) + np.random.normal(0, 2) # Add some noise
        strength = max(35, min(60, strength)) # Keep within reasonable bounds

        # Simulate Hardboard Density (Positive correlation with pressure, negative with cycle time, some noise)
        density = np.mean(ranges['Hardboard_Density_g_cm3_Ideal']) + \
                  (press_pressure - 500) * 0.0005 - \
                  (cycle_time - 12) * 0.001 + np.random.normal(0, 0.01)
        density = max(0.80, min(1.0, density)) # Keep within reasonable bounds

        # Simulate Energy Consumption (Positive correlation with temp, pressure, cycle time, and defects/rework)
        energy_kwh = (press_temp * 0.5) + (press_pressure * 0.1) + \
                     (cycle_time * 5) + (total_defects * 10) + \
                     np.random.normal(0, 20) + 500 # Base consumption + noise
        energy_kwh = max(600, energy_kwh) # Ensure min consumption

        steam_kg = (press_temp * 0.8) + (cycle_time * 10) + \
                   (total_defects * 15) + np.random.normal(0, 30) + 1000 # Base consumption + noise
        steam_kg = max(1200, steam_kg) # Ensure min consumption

        data.append([
            current_date, batch_id,
            press_temp, press_pressure, cycle_time, fiber_consistency, resin_content, moisture_content,
            total_defects, defect_surface, defect_density_var, defect_warpage,
            strength, density,
            energy_kwh, steam_kg
        ])
        batch_id_counter += 1

    current_date += timedelta(days=1)

df = pd.DataFrame(data, columns=[
    'Date', 'Batch_ID',
    'Press_Temperature_C', 'Press_Pressure_psi', 'Press_Cycle_Time_min',
    'Fiber_Consistency_pct', 'Resin_Content_pct', 'Moisture_Content_PrePress_pct',
    'Total_Defects_Count', 'Defect_Type_Surface', 'Defect_Type_Density_Var', 'Defect_Type_Warpage',
    'Hardboard_Strength_MPa', 'Hardboard_Density_g_cm3',
    'Total_Energy_kWh_Batch', 'Steam_Consumption_kg_Batch'
])

print(df.head())
print(df.describe())

# Save to CSV for easy use in Power BI/Tableau
df.to_csv('hardboard_production_data.csv', index=False)
print("\nSample data saved to hardboard_production_data.csv")

        Date       Batch_ID  Press_Temperature_C  Press_Pressure_psi  \
0 2024-01-01  HB_BATCH_0001           215.136027          454.378898   
1 2024-01-01  HB_BATCH_0002           212.041181          450.738349   
2 2024-01-01  HB_BATCH_0003           185.097636          403.763762   
3 2024-01-01  HB_BATCH_0004           215.546680          591.857399   
4 2024-01-01  HB_BATCH_0005           190.695652          557.874640   

   Press_Cycle_Time_min  Fiber_Consistency_pct  Resin_Content_pct  \
0             10.665507              91.570470          11.180889   
1             14.233432              94.528777           8.169427   
2             10.857299              92.259118           8.868407   
3             12.869924              93.527715           8.229339   
4             11.164384              85.687527          10.708137   

   Moisture_Content_PrePress_pct  Total_Defects_Count  Defect_Type_Surface  \
0                       5.439726                    4                    0