In [9]:
import pandas as pd
import numpy as np

# Número de registros (ex: 1000 linhas)
n =5000

# Gerando dados simulados com distribuição realista
np.random.seed(42)  # reprodutibilidade

data = pd.DataFrame({
    'engine_temp': np.random.normal(loc=90, scale=10, size=n),     # média 90°C
    'vibration': np.random.normal(loc=2, scale=0.5, size=n),        # em g (vibração média)
    'oil_pressure': np.random.normal(loc=3.5, scale=0.8, size=n),   # em bar
    'rpm': np.random.normal(loc=3000, scale=500, size=n),
    'speed': np.random.normal(loc=80, scale=20, size=n)
})

# Gerando coluna "failure" com base em condições
# Se temperatura > 100 e vibração > 2.5 e pressão < 3 → probabilidade maior de falha
data['failure'] = ((data['engine_temp'] > 100) & 
                   (data['vibration'] > 2.5) & 
                   (data['oil_pressure'] < 3)).astype(int)

# Salvando como CSV
data.to_csv("vehicle_sensor_data.csv", index=False)

data.head()


Unnamed: 0,engine_temp,vibration,oil_pressure,rpm,speed,failure
0,94.967142,1.78812,2.957204,2928.288392,86.965725,0
1,88.617357,1.773293,3.2556,2983.672048,85.666472,0
2,96.476885,1.102178,3.022095,3032.147439,61.269603,0
3,105.230299,1.834955,3.588334,3473.430731,91.591684,0
4,87.658466,2.366415,4.457743,2626.391346,50.198346,0


In [10]:
data.head(10)


Unnamed: 0,engine_temp,vibration,oil_pressure,rpm,speed,failure
0,94.967142,1.78812,2.957204,2928.288392,86.965725,0
1,88.617357,1.773293,3.2556,2983.672048,85.666472,0
2,96.476885,1.102178,3.022095,3032.147439,61.269603,0
3,105.230299,1.834955,3.588334,3473.430731,91.591684,0
4,87.658466,2.366415,4.457743,2626.391346,50.198346,0
5,87.65863,1.362884,2.883166,2576.826504,66.916313,0
6,105.792128,2.524241,4.300656,3618.309871,40.018244,0
7,97.674347,2.243887,2.874662,2768.677629,111.17253,0
8,85.305256,1.632883,2.821898,2368.482323,75.36901,0
9,95.4256,1.929235,4.154876,2757.435029,123.337681,0


In [11]:
data.info()
data.isnull().sum()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   engine_temp   5000 non-null   float64
 1   vibration     5000 non-null   float64
 2   oil_pressure  5000 non-null   float64
 3   rpm           5000 non-null   float64
 4   speed         5000 non-null   float64
 5   failure       5000 non-null   int64  
dtypes: float64(5), int64(1)
memory usage: 234.5 KB


engine_temp     0
vibration       0
oil_pressure    0
rpm             0
speed           0
failure         0
dtype: int64

In [12]:
data.describe()


Unnamed: 0,engine_temp,vibration,oil_pressure,rpm,speed,failure
count,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0
mean,90.056019,1.995063,3.508442,3008.257552,79.632954,0.0072
std,9.964798,0.505219,0.799016,501.668343,19.667493,0.084555
min,57.587327,0.0388,0.799537,1071.812335,6.89827,0.0
25%,83.42095,1.656709,2.971291,2667.659628,65.964798,0.0
50%,90.134656,1.991275,3.507934,3009.629569,79.839663,0.0
75%,96.660106,2.338619,4.040427,3353.307326,93.012129,0.0
max,129.262377,3.764528,6.243128,5239.542126,152.227001,1.0
