In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix


In [2]:
from google.colab import files
uploaded = files.upload()

Saving engine_failure_dataset.csv to engine_failure_dataset.csv


In [3]:
df = pd.read_csv(next(iter(uploaded)))
df.head()

Unnamed: 0,Time_Stamp,Temperature (°C),RPM,Fuel_Efficiency,Vibration_X,Vibration_Y,Vibration_Z,Torque,Power_Output (kW),Fault_Condition,Operational_Mode
0,24-12-2024 10:00,60.308585,3426.827588,20.445472,0.874657,0.005686,0.529798,107.877658,23.367684,2,Idle
1,24-12-2024 10:05,112.705055,2949.758424,23.083947,0.696461,0.391779,0.124336,60.351655,57.941022,3,Cruising
2,24-12-2024 10:10,108.670976,1817.97104,20.555326,0.495276,0.189714,0.886417,110.986564,47.732998,2,Cruising
3,24-12-2024 10:15,107.114691,2730.660539,23.226431,0.986206,0.983202,0.468114,77.416793,44.112039,2,Cruising
4,24-12-2024 10:20,118.075814,1854.488677,21.148226,0.71081,0.101139,0.481034,100.475881,80.681972,2,Cruising


In [4]:
df.tail()

Unnamed: 0,Time_Stamp,Temperature (°C),RPM,Fuel_Efficiency,Vibration_X,Vibration_Y,Vibration_Z,Torque,Power_Output (kW),Fault_Condition,Operational_Mode
995,27-12-2024 20:55,88.231211,1477.774501,23.749532,0.827987,0.664745,0.625331,178.466184,41.494154,3,Cruising
996,27-12-2024 21:00,105.241946,1617.745044,16.16611,0.753548,0.797969,0.618335,154.586204,52.98724,0,Cruising
997,27-12-2024 21:05,119.066775,2473.669785,17.657404,0.98296,0.699409,0.804165,164.138685,55.994142,3,Idle
998,27-12-2024 21:10,90.620157,2297.744136,17.479882,0.686213,0.87504,0.37069,78.687527,38.729023,1,Heavy Load
999,27-12-2024 21:15,93.144406,2511.308863,17.597145,0.950341,0.340887,0.072738,178.572291,83.769023,0,Cruising


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 11 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Time_Stamp         1000 non-null   object 
 1   Temperature (°C)   1000 non-null   float64
 2   RPM                1000 non-null   float64
 3   Fuel_Efficiency    1000 non-null   float64
 4   Vibration_X        1000 non-null   float64
 5   Vibration_Y        1000 non-null   float64
 6   Vibration_Z        1000 non-null   float64
 7   Torque             1000 non-null   float64
 8   Power_Output (kW)  1000 non-null   float64
 9   Fault_Condition    1000 non-null   int64  
 10  Operational_Mode   1000 non-null   object 
dtypes: float64(8), int64(1), object(2)
memory usage: 86.1+ KB


In [6]:
df.describe()

Unnamed: 0,Temperature (°C),RPM,Fuel_Efficiency,Vibration_X,Vibration_Y,Vibration_Z,Torque,Power_Output (kW),Fault_Condition
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,90.504323,2512.321847,22.490255,0.500771,0.502289,0.483137,123.574345,58.881629,1.465
std,17.255081,867.543649,4.422922,0.294531,0.281046,0.288449,42.89715,22.535297,1.106345
min,60.013118,1000.737383,15.045726,0.00017,0.000741,0.001086,50.059144,20.14891,0.0
25%,75.708728,1757.938417,18.634425,0.234537,0.264195,0.233457,85.735962,40.220009,0.0
50%,90.168595,2498.874857,22.433952,0.5192,0.503314,0.461902,122.377927,58.850054,1.0
75%,105.478252,3273.470646,26.448111,0.752874,0.744645,0.737903,160.091056,78.345886,2.0
max,119.982191,3996.039482,29.998912,0.999893,0.999531,0.999384,199.909402,99.932956,3.0


In [7]:
df = df.drop(columns=['Time_Stamp', 'Operational_Mode'])

In [8]:
df['Fault_Binary'] = df['Fault_Condition'].apply(lambda x: 0 if x == 2 else 1)

In [9]:
df = df.drop(columns=['Fault_Condition'])

In [10]:
print(df.isnull().sum())

Temperature (°C)     0
RPM                  0
Fuel_Efficiency      0
Vibration_X          0
Vibration_Y          0
Vibration_Z          0
Torque               0
Power_Output (kW)    0
Fault_Binary         0
dtype: int64


In [11]:
X = df.drop(columns=['Fault_Binary'])
y = df['Fault_Binary']


In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [13]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [14]:
lr = LogisticRegression()
lr.fit(X_train_scaled, y_train)
y_pred_lr = lr.predict(X_test_scaled)

In [15]:
print("Logistic Regression Evaluation:")
print(confusion_matrix(y_test, y_pred_lr))
print(classification_report(y_test, y_pred_lr))


Logistic Regression Evaluation:
[[  0  62]
 [  0 138]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        62
           1       0.69      1.00      0.82       138

    accuracy                           0.69       200
   macro avg       0.34      0.50      0.41       200
weighted avg       0.48      0.69      0.56       200



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
