In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, average_precision_score, precision_recall_curve

In [2]:
# 1. Load dataset
df = pd.read_csv("ai4i2020.csv")

In [4]:
df

Unnamed: 0,UDI,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Machine failure,TWF,HDF,PWF,OSF,RNF
0,1,M14860,M,298.1,308.6,1551,42.8,0,0,0,0,0,0,0
1,2,L47181,L,298.2,308.7,1408,46.3,3,0,0,0,0,0,0
2,3,L47182,L,298.1,308.5,1498,49.4,5,0,0,0,0,0,0
3,4,L47183,L,298.2,308.6,1433,39.5,7,0,0,0,0,0,0
4,5,L47184,L,298.2,308.7,1408,40.0,9,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,M24855,M,298.8,308.4,1604,29.5,14,0,0,0,0,0,0
9996,9997,H39410,H,298.9,308.4,1632,31.8,17,0,0,0,0,0,0
9997,9998,M24857,M,299.0,308.6,1645,33.4,22,0,0,0,0,0,0
9998,9999,H39412,H,299.0,308.7,1408,48.5,25,0,0,0,0,0,0


In [5]:
# 2. Prepare features and targets
X = df[['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]']]
y_binary = df['Machine failure']   # Model 1 target
fail_types = ['TWF', 'HDF', 'PWF', 'OSF', 'RNF']

In [6]:
# 3. Split data
X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=0.2, random_state=42)

In [7]:
# 4. Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [15]:
# ---------- MODEL 1: FAILURE PREDICTION (Binary) ----------
print("\n=== MODEL 1: Predict Failure (Yes/No) ===")
models1 = {
    "Logistic Regression": LogisticRegression(),
    "Random Forest": RandomForestClassifier(),
    "Gradient Boosting": GradientBoostingClassifier()
}
for name, model in models1.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else y_pred
    ap = average_precision_score(y_test, y_prob)
    print(f"\n{name}")
    print(f"Average Precision Score: {ap:.3f}")


=== MODEL 1: Predict Failure (Yes/No) ===

Logistic Regression
Average Precision Score: 0.464

Random Forest
Average Precision Score: 0.752

Gradient Boosting
Average Precision Score: 0.757


In [17]:
# ---------- MODEL 2: FAILURE TYPE CLASSIFICATION (Multi-class) ----------
print("\n=== MODEL 2: Classify Failure Type ===")
df_fail = df[df['Machine failure'] == 1].copy()
X2 = df_fail[['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]']]
# Create a single label for failure type
df_fail['failure_type'] = df_fail[fail_types].idxmax(axis=1)
y2 = LabelEncoder().fit_transform(df_fail['failure_type'])
X2_train, X2_test, y2_train, y2_test = train_test_split(X2, y2, test_size=0.2, random_state=42)
X2_train = scaler.fit_transform(X2_train)
X2_test = scaler.transform(X2_test)
models2 = {
    "Decision Tree": DecisionTreeClassifier(),
    "Naive Bayes": GaussianNB(),
    "SVM": SVC(),
}
for name, model in models2.items():
    model.fit(X2_train, y2_train)
    y2_pred = model.predict(X2_test)
    print(f"\n{name}")
    print("Confusion Matrix:")
    print(confusion_matrix(y2_test, y2_pred))
    print("Classification Report:")
    print(classification_report(y2_test, y2_pred, digits=3))


=== MODEL 2: Classify Failure Type ===

Decision Tree
Confusion Matrix:
[[17  3  1  0]
 [ 1 15  0  1]
 [ 1  1  9  1]
 [ 2  1  4 11]]
Classification Report:
              precision    recall  f1-score   support

           0      0.810     0.810     0.810        21
           1      0.750     0.882     0.811        17
           2      0.643     0.750     0.692        12
           3      0.846     0.611     0.710        18

    accuracy                          0.765        68
   macro avg      0.762     0.763     0.756        68
weighted avg      0.775     0.765     0.763        68


Naive Bayes
Confusion Matrix:
[[19  2  0  0]
 [ 0 16  0  1]
 [ 0  2 10  0]
 [ 1  3  3 11]]
Classification Report:
              precision    recall  f1-score   support

           0      0.950     0.905     0.927        21
           1      0.696     0.941     0.800        17
           2      0.769     0.833     0.800        12
           3      0.917     0.611     0.733        18

    accuracy         