In [None]:
# Engine Sensor Anomaly Detection (Synthetic Dataset)
# Description: Generates synthetic engine sensor data, cleans it, detects anomalies using Isolation Forest,
# and visualizes abnormal engine behavior. Suitable for automotive diagnostics/data science portfolio.

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler

np.random.seed(42)

# ---------------------------------------------------
# 1. Generate Synthetic Engine Dataset
# ---------------------------------------------------
N = 5000

time = np.arange(N)

# Normal engine behavior
rpm = np.random.normal(2000, 150, N)
temp = np.random.normal(90, 5, N)
oil_pressure = np.random.normal(45, 4, N)
vibration = np.random.normal(0.3, 0.05, N)

# Inject anomalies
anomaly_idx = np.random.choice(N, 150, replace=False)
rpm[anomaly_idx] += np.random.normal(800, 200, len(anomaly_idx))
temp[anomaly_idx] += np.random.normal(25, 10, len(anomaly_idx))
oil_pressure[anomaly_idx] -= np.random.normal(20, 5, len(anomaly_idx))
vibration[anomaly_idx] += np.random.normal(0.5, 0.2, len(anomaly_idx))

# Create DataFrame
data = pd.DataFrame({
    'time': time,
    'rpm': rpm,
    'temperature': temp,
    'oil_pressure': oil_pressure,
    'vibration': vibration
})

print("Dataset Created:", data.shape)

# ---------------------------------------------------
# 2. Data Cleaning
# ---------------------------------------------------
# Clip unrealistic sensor values

data['rpm'] = data['rpm'].clip(500, 5000)
data['temperature'] = data['temperature'].clip(40, 150)
data['oil_pressure'] = data['oil_pressure'].clip(5, 100)
data['vibration'] = data['vibration'].clip(0, 2)

# ---------------------------------------------------
# 3. Feature Scaling
# ---------------------------------------------------
features = ['rpm','temperature','oil_pressure','vibration']
scaler = StandardScaler()
scaled_data = scaler.fit_transform(data[features])

# ---------------------------------------------------
# 4. Anomaly Detection Model
# ---------------------------------------------------
model = IsolationForest(contamination=0.03, random_state=42)
data['anomaly'] = model.fit_predict(scaled_data)

# Convert labels: -1 = anomaly, 1 = normal
data['anomaly'] = data['anomaly'].map({1:0, -1:1})

print("Detected anomalies:", data['anomaly'].sum())

# ---------------------------------------------------
# 5. Visualization
# ---------------------------------------------------
plt.figure(figsize=(14,6))
plt.plot(data['time'], data['rpm'], label='RPM', alpha=0.6)
plt.scatter(data[data['anomaly']==1]['time'], data[data['anomaly']==1]['rpm'],
            color='red', label='Anomaly', s=10)
plt.title('Engine RPM Anomaly Detection')
plt.xlabel('Time')
plt.ylabel('RPM')
plt.legend()
plt.tight_layout()
plt.savefig('rpm_anomalies.png')
plt.close()

plt.figure(figsize=(14,6))
plt.plot(data['time'], data['temperature'], label='Temperature', alpha=0.6)
plt.scatter(data[data['anomaly']==1]['time'], data[data['anomaly']==1]['temperature'],
            color='red', label='Anomaly', s=10)
plt.title('Engine Temperature Anomaly Detection')
plt.xlabel('Time')
plt.ylabel('Temperature (Â°C)')
plt.legend()
plt.tight_layout()
plt.savefig('temp_anomalies.png')
plt.close()

# ---------------------------------------------------
# 6. Save Results
# ---------------------------------------------------
data.to_csv('engine_sensor_analysis.csv', index=False)
print("Results saved: engine_sensor_analysis.csv, rpm_anomalies.png, temp_anomalies.png")


Dataset Created: (5000, 5)
Detected anomalies: 150
Results saved: engine_sensor_analysis.csv, rpm_anomalies.png, temp_anomalies.png
