# Prep

In [None]:
#import requuired libraries
import pandas as pd
import numpy as np
import sys
import joblib
sys.modules['sklearn.externals.joblib'] = joblib
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV , train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score, roc_auc_score, recall_score
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neural_network import MLPClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.ensemble import VotingClassifier
import xgboost as xgb
import warnings
warnings.filterwarnings('ignore')

In [None]:
data = 'UpsampledDataset.csv'
upsampled = pd.read_csv(data)

In [None]:
upsampled = pd.read_csv(data)

In [None]:
X = upsampled.drop('Machine_failure', axis=1)
y = upsampled['Machine_failure']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from sklearn.preprocessing import StandardScaler
# Normalizing the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# XGBoost Model

XGBoost, short for eXtreme Gradient Boosting, is an ensemble learning algorithm known for its speed and performance.

In [None]:
import xgboost as xgb

# Training the XGBoost model
xgb_model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb_model.fit(X_train_scaled,y_train)

# Predictions
y_pred = xgb_model.predict(X_test_scaled)

# Evaluating the model
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("Accuracy Score:", accuracy_score(y_test, y_pred))
accuracy_xgboost = accuracy_score(y_test, y_pred)

Confusion Matrix:
[[1941    1]
 [   0 1923]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1942
           1       1.00      1.00      1.00      1923

    accuracy                           1.00      3865
   macro avg       1.00      1.00      1.00      3865
weighted avg       1.00      1.00      1.00      3865

Accuracy Score: 0.9997412677878396


# ELM Model**

Extreme Learning Machines (ELM) is a type of machine learning algorithm that falls under the umbrella of neural networks.

In [None]:
# Create and train the ELM model
elm_model = MLPClassifier(hidden_layer_sizes=(10,), activation='logistic', max_iter=1000, random_state=42)
elm_model.fit(X_train_scaled, y_train)

# Make predictions on the test set
y_pred_elm = elm_model.predict(X_test_scaled)

# Evaluate the ELM model
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_elm))
print("\nClassification Report:")
print(classification_report(y_test, y_pred_elm))
print("Accuracy Score:", accuracy_score(y_test, y_pred_elm))


Confusion Matrix:
[[1942    0]
 [  53 1870]]

Classification Report:
              precision    recall  f1-score   support

           0       0.97      1.00      0.99      1942
           1       1.00      0.97      0.99      1923

    accuracy                           0.99      3865
   macro avg       0.99      0.99      0.99      3865
weighted avg       0.99      0.99      0.99      3865

Accuracy Score: 0.986287192755498


# Deep learning model with two layers**

A basic deep learning model with two layers typically refers to a neural network with one hidden layer between the input and output layers

In [None]:
# Create a sequential model
model = Sequential()

In [None]:
# Add the first hidden layer with input shape (assuming X_train has feature dimensions)
model.add(Dense(units=64, activation='relu', input_shape=(X_train_scaled.shape[1],)))

# Add the second hidden layer
model.add(Dense(units=32, activation='relu'))

# Output layer with sigmoid activation for binary classification
model.add(Dense(units=1, activation='sigmoid'))

In [None]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Train the model
model.fit(X_train_scaled, y_train, epochs=10, batch_size=32, validation_data=(X_test_scaled, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7c4a84aacc40>

In [None]:
# Make predictions
y_pred = (model.predict(X_test_scaled) > 0.5).astype("int32")

# Evaluate the model
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("Accuracy Score:", accuracy_score(y_test, y_pred))

Confusion Matrix:
[[1940    2]
 [  17 1906]]

Classification Report:
              precision    recall  f1-score   support

           0       0.99      1.00      1.00      1942
           1       1.00      0.99      1.00      1923

    accuracy                           1.00      3865
   macro avg       1.00      1.00      1.00      3865
weighted avg       1.00      1.00      1.00      3865

Accuracy Score: 0.9950840879689521


# Ensemble Model Containing the Top 3 Models Overall**

In [None]:
# Create individual models
xgb_model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')
elm_model = MLPClassifier(hidden_layer_sizes=(10,), activation='logistic', max_iter=1000, random_state=42)
deep_model = MLPClassifier(hidden_layer_sizes=(10, 5), max_iter=1000, random_state=42)

In [None]:
# Create an ensemble of the top 3 models
ensemble_model = VotingClassifier(estimators=[
    ('xgb', xgb_model),
    ('elm', elm_model),
    ('deep', deep_model)
], voting='hard')

In [None]:
# Train the ensemble model
ensemble_model.fit(X_train_scaled, y_train)

In [None]:
# Make predictions on the test set
y_pred_ensemble = ensemble_model.predict(X_test_scaled)

# Evaluate the ensemble model
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_ensemble))
print("\nClassification Report:")
print(classification_report(y_test, y_pred_ensemble))
print("Accuracy Score:", accuracy_score(y_test, y_pred_ensemble))

Confusion Matrix:
[[1942    0]
 [   4 1919]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1942
           1       1.00      1.00      1.00      1923

    accuracy                           1.00      3865
   macro avg       1.00      1.00      1.00      3865
weighted avg       1.00      1.00      1.00      3865

Accuracy Score: 0.9989650711513584


All the models exhibited strong performance, but XGBoost stands out as exceptional, boasting a remarkable accuracy of 99.97%. Its ability to handle complex datasets, robustness against overfitting, and efficient optimization procedures contribute to its superiority among the models evaluated. While the other models performed well, XGBoost's outstanding results make it a standout choice for predictive modeling in this context.