In [3]:
import os
os.makedirs("./data/raw", exist_ok=True)

In [4]:
print(os.path.abspath("./data/raw"))

/Users/sahar/Desktop/IndustrialDataPipeline2/data/raw


In [6]:
import os
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

os.makedirs("./data/raw", exist_ok=True)

np.random.seed(42)
random.seed(42)

n_machines = 6
minutes_per_machine = 24 * 60
start_time = datetime(2025, 11, 1, 0, 0, 0)

# پروفایل دستگاه‌ها
machine_profiles = {
    f"M{str(i+1).zfill(2)}": {
        "temp_base": 60 + i * 3,
        "vib_base": 0.3 + i * 0.05,
        "pres_base": 25 + i * 1.5,
        "speed_base": 1200 + i * 100
    }
    for i in range(n_machines)
}


def generate_signal(t_idx, base, amplitude, period, trend, jitter, pulse_prob):
    periodic = amplitude * np.sin(2*np.pi*(t_idx % period) / period)
    slow = trend * t_idx
    jitter_val = np.random.normal(0, jitter)
    pulse = amplitude * (2 + np.random.rand()) if np.random.rand() < pulse_prob else 0
    return base + periodic + slow + jitter_val + pulse


# ----------------------------
#  failureهای حرفه‌ای
# ----------------------------

def gradual_failure(t, temp, vib, pres, speed, profile):
    """خرابی تدریجی – بهترین الگوی قابل یادگیری توسط ML"""
    temp += 0.01 * t + np.random.uniform(0.3, 0.7)
    vib += 0.0005 * t
    pres += 0.002 * t
    speed -= 0.02 * t
    return temp, vib, pres, speed

def sudden_failure(temp, vib, pres, speed):
    """خرابی ناگهانی"""
    temp += np.random.uniform(10, 25)
    vib += np.random.uniform(0.2, 0.5)
    pres += np.random.uniform(5, 12)
    speed -= np.random.uniform(250, 600)
    return temp, vib, pres, speed

def mild_failure(temp, vib):
    """خرابی خفیف – مدل می‌تواند تشخیص دهد ولی سخت است"""
    temp += np.random.uniform(4, 10)
    vib += np.random.uniform(0.05, 0.15)
    return temp, vib


# ----------------------------
#  تولید دیتای اصلی
# ----------------------------

rows = []

for machine_id, profile in machine_profiles.items():
    total_minutes = minutes_per_machine
    failure_times = sorted(random.sample(range(200, total_minutes-200), 4))  # 4 failure per machine

    for t in range(total_minutes):

        ts = start_time + timedelta(minutes=t)

        # سیگنال‌ها
        temp = generate_signal(t, profile["temp_base"], 3.5, 120, 0.0007, 0.28, 0.001)
        vib = generate_signal(t, profile["vib_base"], 0.05, 30, 0.00002, 0.01, 0.001)
        pres = generate_signal(t, profile["pres_base"], 0.85, 180, 0.0003, 0.22, 0.0007)
        speed = generate_signal(t, profile["speed_base"], 30, 90, 0.01, 4, 0.0006)

        vib += 0.00007 * (speed - profile["speed_base"])

        maintenance_flag = 0

        #  pre-failure
        for f_time in failure_times:
            if f_time-20 <= t <= f_time:
                temp, vib, pres, speed = gradual_failure(t - (f_time-20), temp, vib, pres, speed, profile)

        #  failure واقعی
        if t in failure_times:
            failure_type = random.choice(["sudden", "gradual", "mild"])

            if failure_type == "sudden":
                temp, vib, pres, speed = sudden_failure(temp, vib, pres, speed)

            elif failure_type == "gradual":
                temp, vib, pres, speed = gradual_failure(25, temp, vib, pres, speed, profile)

            elif failure_type == "mild":
                temp, vib = mild_failure(temp, vib)

            maintenance_flag = 1

        rows.append({
            "timestamp": ts.strftime("%Y-%m-%d %H:%M:%S"),
            "machine_id": machine_id,
            "temperature": round(float(temp), 3),
            "vibration": round(float(vib), 4),
            "pressure": round(float(pres), 3),
            "speed": round(float(speed), 2),
            "maintenance_flag": maintenance_flag
        })

df = pd.DataFrame(rows)
df = df.sort_values(by=["timestamp", "machine_id"]).reset_index(drop=True)

out_path = "./data/raw/industrial_sensor_data_final.csv"
df.to_csv(out_path, index=False)

print("Dataset created:", len(df))
print("Failure ratio:", df["maintenance_flag"].value_counts(normalize=True))    

Dataset created: 8640
Failure ratio: maintenance_flag
0    0.997222
1    0.002778
Name: proportion, dtype: float64
