In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

In [2]:
df = pd.read_csv(r'../dataset/predictive_maintenance.csv')
df.head(2)

Unnamed: 0,UDI,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Failure Type
0,1,M14860,M,298.1,308.6,1551,42.8,0,0,No Failure
1,2,L47181,L,298.2,308.7,1408,46.3,3,0,No Failure


In [3]:
le = LabelEncoder()
df['Type'] = le.fit_transform(df['Type'])

df = df.drop('Product ID', axis=1)

X = df.drop(['Target', 'Failure Type'], axis=1)  
y = df['Target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 0.9735


In [4]:
df = pd.read_csv(r'../dataset/predictive_maintenance.csv')
df.head(2)

Unnamed: 0,UDI,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Failure Type
0,1,M14860,M,298.1,308.6,1551,42.8,0,0,No Failure
1,2,L47181,L,298.2,308.7,1408,46.3,3,0,No Failure


In [5]:
categorical_cols = ['Type', 'Failure Type']
numerical_cols = ['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]']

In [6]:
categorical_transformer = OneHotEncoder(drop='first')
numerical_transformer = StandardScaler()

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)
    ])

In [7]:
data_preprocessed = preprocessor.fit_transform(df)

In [8]:
data_preprocessed.shape

(10000, 12)

In [9]:
# Data selection, Feature Selection	

In [10]:
X = df[['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]']]
y = df['Target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [11]:
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(df['Failure Type'])

X_train, X_test, y_encoded_train, y_encoded_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [12]:
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(df['Failure Type'])

X_train, X_test, y_encoded_train, y_encoded_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [13]:
df['Temperature Difference'] = df['Process temperature [K]'] - df['Air temperature [K]']

df['Operational Efficiency'] = df['Rotational speed [rpm]'] / df['Torque [Nm]']

print(df.head())

   UDI Product ID Type  Air temperature [K]  Process temperature [K]  \
0    1     M14860    M                298.1                    308.6   
1    2     L47181    L                298.2                    308.7   
2    3     L47182    L                298.1                    308.5   
3    4     L47183    L                298.2                    308.6   
4    5     L47184    L                298.2                    308.7   

   Rotational speed [rpm]  Torque [Nm]  Tool wear [min]  Target Failure Type  \
0                    1551         42.8                0       0   No Failure   
1                    1408         46.3                3       0   No Failure   
2                    1498         49.4                5       0   No Failure   
3                    1433         39.5                7       0   No Failure   
4                    1408         40.0                9       0   No Failure   

   Temperature Difference  Operational Efficiency  
0                    10.5         

In [14]:
numerical_cols = ['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]', 'Temperature Difference', 'Operational Efficiency']

scaler = MinMaxScaler()

df[numerical_cols] = scaler.fit_transform(df[numerical_cols])

print(df[numerical_cols].head())

   Air temperature [K]  Process temperature [K]  Rotational speed [rpm]  \
0             0.304348                 0.358025                0.222934   
1             0.315217                 0.370370                0.139697   
2             0.304348                 0.345679                0.192084   
3             0.315217                 0.358025                0.154249   
4             0.315217                 0.370370                0.139697   

   Torque [Nm]  Tool wear [min]  Temperature Difference  \
0     0.535714         0.000000                0.644444   
1     0.583791         0.011858                0.644444   
2     0.626374         0.019763                0.622222   
3     0.490385         0.027668                0.622222   
4     0.497253         0.035573                0.644444   

   Operational Efficiency  
0                0.027658  
1                0.019823  
2                0.019707  
3                0.027712  
4                0.026262  


In [15]:
data = pd.get_dummies(df, columns=['Type', 'Failure Type'])

print(data.head())

   UDI Product ID  Air temperature [K]  Process temperature [K]  \
0    1     M14860             0.304348                 0.358025   
1    2     L47181             0.315217                 0.370370   
2    3     L47182             0.304348                 0.345679   
3    4     L47183             0.315217                 0.358025   
4    5     L47184             0.315217                 0.370370   

   Rotational speed [rpm]  Torque [Nm]  Tool wear [min]  Target  \
0                0.222934     0.535714         0.000000       0   
1                0.139697     0.583791         0.011858       0   
2                0.192084     0.626374         0.019763       0   
3                0.154249     0.490385         0.027668       0   
4                0.139697     0.497253         0.035573       0   

   Temperature Difference  Operational Efficiency  Type_H  Type_L  Type_M  \
0                0.644444                0.027658   False   False    True   
1                0.644444               

In [16]:
X = data[['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]']]
y = data['Target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

log_reg = LogisticRegression()
log_reg.fit(X_train_scaled, y_train)

y_pred = log_reg.predict(X_test_scaled)
print("Accuracy:", accuracy_score(y_test, y_pred))


Accuracy: 0.973


In [17]:
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

y_pred_rf = rf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred_rf))


Accuracy: 0.9835


In [18]:
svm_clf = SVC()
svm_clf.fit(X_train_scaled, y_train)
y_pred_svm = svm_clf.predict(X_test_scaled)
print("Accuracy:", accuracy_score(y_test, y_pred_svm))


Accuracy: 0.9765
