In [3]:
import pandas as pd
import numpy as np

# Set seed for reproducibility
np.random.seed(42)

# Number of samples
num_samples = 100

# Simulate speed (km/h), normal driving range 0-120, occasional outliers
speed = np.random.normal(60, 20, num_samples)
speed[speed < 0] = 0  # Ensure no negative speeds
speed[speed > 150] = 150  # Limit max speed
speed[np.random.choice(num_samples, 5, replace=False)] = np.random.choice([200, -10], 5)  # Inject outliers

# Simulate engine performance (% efficiency, 50-100), occasional dips due to faults
engine_performance = np.random.normal(80, 10, num_samples)
engine_performance[engine_performance < 50] = 50
engine_performance[engine_performance > 100] = 100
engine_performance[np.random.choice(num_samples, 5, replace=False)] = np.random.choice([30, 110], 5)  # Outliers

# Simulate ultrasonic sensor input (distance in cm), occasional errors
ultrasonic_sensor = np.random.normal(100, 30, num_samples)
ultrasonic_sensor[ultrasonic_sensor < 0] = 0  # No negative distances
ultrasonic_sensor[np.random.choice(num_samples, 5, replace=False)] = np.random.choice([500, -50], 5)  # Outliers

# Simulate temperature sensor readings (°C), realistic range 10-90
temperature_sensor = np.random.normal(40, 15, num_samples)
temperature_sensor[temperature_sensor < 10] = 10
temperature_sensor[temperature_sensor > 90] = 90

# Simulate fuel level (%), should decrease over time
fuel_level = np.linspace(100, 10, num_samples) + np.random.normal(0, 5, num_samples)
fuel_level[fuel_level < 0] = 0

# Simulate brake pressure (bar), normal range 0-10, some high-pressure outliers
brake_pressure = np.random.normal(2, 1, num_samples)
brake_pressure[brake_pressure < 0] = 0  # No negative pressure
brake_pressure[np.random.choice(num_samples, 5, replace=False)] = np.random.choice([15, -5], 5)  # Outliers

# Inject NaN values randomly in different columns
for col in [speed, engine_performance, ultrasonic_sensor, temperature_sensor, fuel_level, brake_pressure]:
    col[np.random.choice(num_samples, 5, replace=False)] = np.nan

# Create DataFrame
df = pd.DataFrame({
    "Speed (km/h)": speed,
    "Engine Performance (%)": engine_performance,
    "Ultrasonic Sensor (cm)": ultrasonic_sensor,
    "Temperature Sensor (°C)": temperature_sensor,
    "Fuel Level (%)": fuel_level,
    "Brake Pressure (bar)": brake_pressure,
})

# Save to CSV
file_path = "simulated_vehicle_data.csv"
df.to_csv(file_path, index=False)

file_path


'simulated_vehicle_data.csv'

In [7]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Set seed for reproducibility
np.random.seed(42)

# Number of samples
num_samples = 100

# Generate timestamps (1-minute intervals)
start_time = datetime(2025, 3, 6, 12, 0, 0)
timestamps = [start_time + timedelta(minutes=i) for i in range(num_samples)]

# Simulate PID sensor output (control variable, range 0-100)
pid_sensor = np.random.uniform(0, 100, num_samples)

# Control speed based on PID sensor (higher PID leads to higher speed, with some randomness)
speed = 30 + (pid_sensor * 1.2) + np.random.normal(0, 5, num_samples)
speed[speed > 150] = 150  # Limit max speed

# Control brake pressure inversely to PID sensor (higher PID = lower braking, lower PID = higher braking)
brake_pressure = (10 - (pid_sensor / 10)) + np.random.normal(0, 1, num_samples)
brake_pressure[brake_pressure < 0] = 0  # No negative pressure

# Simulate temperature sensor (slowly increasing but decreasing with high speed)
temperature = np.linspace(20, 80, num_samples) + np.random.normal(0, 2, num_samples)
temperature -= speed * 0.05  # High speed cools down the system
temperature[temperature < 10] = 10  # Minimum limit

# Fuel and engine performance are inversely proportional
fuel_level = np.linspace(100, 10, num_samples) + np.random.normal(0, 5, num_samples)
fuel_level[fuel_level < 0] = 0
engine_performance = 110 - fuel_level  # Higher fuel -> lower engine stress

# Inject NaN values randomly in different columns
nan_indices = np.random.choice(num_samples, 5, replace=False)
df_updated = pd.DataFrame({
    "Timestamp": timestamps,
    "PID Sensor Output": pid_sensor,
    "Speed (km/h)": speed,
    "Engine Performance (%)": engine_performance,
    "Temperature Sensor (°C)": temperature,
    "Fuel Level (%)": fuel_level,
    "Brake Pressure (bar)": brake_pressure,
})
df_updated.loc[nan_indices, ["Speed (km/h)", "Engine Performance (%)", "PID Sensor Output",
                             "Temperature Sensor (°C)", "Fuel Level (%)", "Brake Pressure (bar)"]] = np.nan

# Save to CSV
file_path = "simulated_vehicle_pid_data.csv"
df_updated.to_csv(file_path, index=False)

print("Dataset saved as", file_path)

Dataset saved as simulated_vehicle_pid_data.csv
