## Getting Necessary Libraries

In [27]:
import numpy as np
import pandas as pd
import os
import seaborn as sns
import matplotlib.pyplot as plt
import pickle
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix,ConfusionMatrixDisplay
from sklearn.tree import DecisionTreeClassifier

import warnings
warnings.filterwarnings("ignore")

## Loading Dataset

In [28]:
dataset = pd.read_csv('cleaned_dataset.csv')

In [29]:
dataset

Unnamed: 0,Target,Temperature Difference [C],Type,Failure Type,Rotational speed [rpm],Torque [Nm],Air temperature [C],Process temperature [C],Tool wear [min]
0,0.0,10.5,1.0,0.0,0.253968,0.200000,0.304348,0.358025,0.000000
1,0.0,10.5,0.0,0.0,-0.502646,0.459259,0.315217,0.370370,0.011858
2,0.0,10.4,0.0,0.0,-0.026455,0.688889,0.304348,0.345679,0.019763
3,0.0,10.4,0.0,0.0,-0.370370,-0.044444,0.315217,0.358025,0.027668
4,0.0,10.5,0.0,0.0,-0.502646,-0.007407,0.315217,0.370370,0.035573
...,...,...,...,...,...,...,...,...,...
9941,0.0,9.6,0.0,0.0,0.534392,-0.785185,0.380435,0.333333,0.055336
9942,0.0,9.7,0.0,0.0,0.682540,-0.614815,0.391304,0.333333,0.067194
9943,0.0,9.7,2.0,0.0,0.751323,-0.496296,0.402174,0.358025,0.086957
9944,0.0,9.7,2.0,0.0,-0.502646,0.622222,0.402174,0.370370,0.098814


## Train Test Split

In [30]:
features = dataset.drop(columns=['Target', 'Failure Type'])
target = dataset['Target']
failure_type = dataset['Failure Type']

In [31]:
# Split the data for Target prediction
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Split the data for Failure Type prediction
X_train_ft, X_test_ft, y_train_ft, y_test_ft = train_test_split(features, failure_type, test_size=0.2, random_state=42)

In [32]:
# Check the shapes of the resulting data splits
print("Shapes for Target prediction:")
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

print("Shapes for Failure Type prediction:")
print(X_train_ft.shape, X_test_ft.shape, y_train_ft.shape, y_test_ft.shape)

Shapes for Target prediction:
(7956, 7) (1990, 7) (7956,) (1990,)
Shapes for Failure Type prediction:
(7956, 7) (1990, 7) (7956,) (1990,)


## Training 

### Training for Target Variable

In [33]:
target_model = DecisionTreeClassifier(random_state=42)
target_model.fit(X_train, y_train)

In [34]:
# Predict on the test set
y_pred = target_model.predict(X_test)

# Evaluate the model
target_accuracy = accuracy_score(y_test, y_pred)
target_classification_report = classification_report(y_test, y_pred)

print(f"Target Accuracy: {target_accuracy}")
print(f"Classification Report:\n{target_classification_report}")

Target Accuracy: 0.9341708542713568
Classification Report:
              precision    recall  f1-score   support

         0.0       0.97      0.96      0.97      1923
         1.0       0.09      0.10      0.10        67

    accuracy                           0.93      1990
   macro avg       0.53      0.53      0.53      1990
weighted avg       0.94      0.93      0.94      1990



### Training for Failure Type

In [35]:
failure_type_model = DecisionTreeClassifier(random_state=42)
failure_type_model.fit(X_train_ft, y_train_ft)

In [36]:
y_pred_ft = failure_type_model.predict(X_test_ft)
failure_type_accuracy = accuracy_score(y_test_ft, y_pred_ft)
failure_type_classification_report = classification_report(y_test_ft, y_pred_ft)

print(f"Failure Type Accuracy: {failure_type_accuracy}")
print(f"Classification Report:\n{failure_type_classification_report}")

Failure Type Accuracy: 0.9316582914572864
Classification Report:
              precision    recall  f1-score   support

         0.0       0.97      0.96      0.96      1923
         1.0       0.00      0.00      0.00        22
         2.0       0.00      0.00      0.00         7
         3.0       0.12      0.25      0.17        12
         4.0       0.15      0.15      0.15        26

    accuracy                           0.93      1990
   macro avg       0.25      0.27      0.26      1990
weighted avg       0.94      0.93      0.94      1990



## Saving the Model

### Saving model for Target

In [37]:
with open('target_model.pkl', 'wb') as f:
    pickle.dump(target_model, f)

### Saving model for Failure Type

In [38]:
with open('failure_type_model.pkl', 'wb') as f:
    pickle.dump(failure_type_model, f)

## Testing on Random Values

In [39]:
with open('target_model.pkl', 'rb') as f:
    target_model = pickle.load(f)

with open('failure_type_model.pkl', 'rb') as f:
    failure_type_model = pickle.load(f)

In [40]:
# Example feature columns (replace with actual feature names)
feature_columns = ['Temperature Difference [C]', 'Type', 'Rotational speed [rpm]', 
                   'Torque [Nm]', 'Air temperature [C]', 'Process temperature [C]', 
                   'Tool wear [min]']

# Create a random data point (replace with actual random values that fit your dataset)
random_data_point = np.array([[15.0, 1.0, 0.25, 0.2, 0.3, 0.35, 0.0]])

# Convert to DataFrame
random_data_df = pd.DataFrame(random_data_point, columns=feature_columns)

In [41]:
random_data_df

Unnamed: 0,Temperature Difference [C],Type,Rotational speed [rpm],Torque [Nm],Air temperature [C],Process temperature [C],Tool wear [min]
0,10.0,1.0,0.25,0.2,0.3,0.35,0.0


In [42]:
# Predict Target
target_prediction = target_model.predict(random_data_df)
target_proba = target_model.predict_proba(random_data_df)

# Predict Failure Type
failure_type_prediction = failure_type_model.predict(random_data_df)
failure_type_proba = failure_type_model.predict_proba(random_data_df)

# Display the results
print(f"Random Data Point: \n{random_data_df}")
print(f"Predicted Target: {target_prediction[0]} (Probability: {target_proba[0]})")
print(f"Predicted Failure Type: {failure_type_prediction[0]} (Probabilities: {failure_type_proba[0]})")


Random Data Point: 
   Temperature Difference [C]  Type  Rotational speed [rpm]  Torque [Nm]  \
0                        10.0   1.0                    0.25          0.2   

   Air temperature [C]  Process temperature [C]  Tool wear [min]  
0                  0.3                     0.35              0.0  
Predicted Target: 0.0 (Probability: [1. 0.])
Predicted Failure Type: 0.0 (Probabilities: [1. 0. 0. 0. 0.])
