In [1]:
import pandas as pd
df = pd.read_csv('datasets.csv')
df.head()

Unnamed: 0,footfall,tempMode,AQ,USS,CS,VOC,RP,IP,Temperature,fail
0,0,7,7,1,6,6,36,3,1,1
1,190,1,3,3,5,1,20,4,1,0
2,31,7,2,2,6,1,24,6,1,0
3,83,4,3,4,5,1,28,6,1,0
4,640,7,5,6,4,0,68,6,1,0


In [2]:
df.shape

(944, 10)

In [3]:
df.isnull().sum()

footfall       0
tempMode       0
AQ             0
USS            0
CS             0
VOC            0
RP             0
IP             0
Temperature    0
fail           0
dtype: int64

In [4]:
df['fail'].value_counts()

fail
0    551
1    393
Name: count, dtype: int64

In [6]:
from sklearn.utils import resample
df_majority = df[df['fail'] == 0]
df_minority = df[df['fail'] == 1]
df_minority_upsampled = resample(df_minority,replace=True,n_samples=len(df_majority),random_state=42)
df_balanced = pd.concat([df_majority, df_minority_upsampled])

In [7]:
df_balanced['fail'].value_counts()

fail
0    551
1    551
Name: count, dtype: int64

In [8]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
x = df_balanced.drop(columns=['fail'])
y = df_balanced['fail']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [10]:
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [17]:
from sklearn.ensemble import RandomForestClassifier
rf_model = RandomForestClassifier()
rf_model.fit(x_train_scaled,y_train)
y_pred = rf_model.predict(x_test_scaled)

In [19]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [20]:
accuracy_score(y_test,y_pred)

0.9230769230769231

In [21]:
confusion_matrix(y_test,y_pred)

array([[109,   9],
       [  8,  95]])

In [22]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.93      0.92      0.93       118
           1       0.91      0.92      0.92       103

    accuracy                           0.92       221
   macro avg       0.92      0.92      0.92       221
weighted avg       0.92      0.92      0.92       221



In [25]:
!pip install xgboost

Collecting xgboost
  Downloading xgboost-3.0.1-py3-none-macosx_12_0_arm64.whl.metadata (2.1 kB)
Downloading xgboost-3.0.1-py3-none-macosx_12_0_arm64.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m0m
[?25hInstalling collected packages: xgboost
Successfully installed xgboost-3.0.1


In [26]:
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier

In [28]:
classifiers = [
    RandomForestClassifier(),
    AdaBoostClassifier(),
    GradientBoostingClassifier(),
    LogisticRegression(),
    SVC(),
    KNeighborsClassifier(),
    DecisionTreeClassifier(),
    GaussianNB(),
    XGBClassifier()
]

results = {}


for clf in classifiers:
    clf_name = clf.__class__.__name__
    clf.fit(x_train_scaled, y_train)
    y_pred = clf.predict(x_test_scaled)
    
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{clf_name} Accuracy: {accuracy}")
    
    print(f"Classification Report for {clf_name}:")
    print(classification_report(y_test, y_pred))
    
    print(f"Confusion Matrix for {clf_name}:")
    print(confusion_matrix(y_test, y_pred))
    print("="*50)

RandomForestClassifier Accuracy: 0.9230769230769231
Classification Report for RandomForestClassifier:
              precision    recall  f1-score   support

           0       0.94      0.92      0.93       118
           1       0.91      0.93      0.92       103

    accuracy                           0.92       221
   macro avg       0.92      0.92      0.92       221
weighted avg       0.92      0.92      0.92       221

Confusion Matrix for RandomForestClassifier:
[[108  10]
 [  7  96]]
AdaBoostClassifier Accuracy: 0.9140271493212669
Classification Report for AdaBoostClassifier:
              precision    recall  f1-score   support

           0       0.95      0.89      0.92       118
           1       0.88      0.94      0.91       103

    accuracy                           0.91       221
   macro avg       0.91      0.92      0.91       221
weighted avg       0.92      0.91      0.91       221

Confusion Matrix for AdaBoostClassifier:
[[105  13]
 [  6  97]]




GradientBoostingClassifier Accuracy: 0.9049773755656109
Classification Report for GradientBoostingClassifier:
              precision    recall  f1-score   support

           0       0.94      0.87      0.91       118
           1       0.87      0.94      0.90       103

    accuracy                           0.90       221
   macro avg       0.91      0.91      0.90       221
weighted avg       0.91      0.90      0.91       221

Confusion Matrix for GradientBoostingClassifier:
[[103  15]
 [  6  97]]
LogisticRegression Accuracy: 0.9230769230769231
Classification Report for LogisticRegression:
              precision    recall  f1-score   support

           0       0.95      0.90      0.93       118
           1       0.89      0.95      0.92       103

    accuracy                           0.92       221
   macro avg       0.92      0.92      0.92       221
weighted avg       0.93      0.92      0.92       221

Confusion Matrix for LogisticRegression:
[[106  12]
 [  5  98]]
SVC Ac

In [32]:
classifiers = [
    RandomForestClassifier(),
    AdaBoostClassifier(),
    GradientBoostingClassifier(),
    LogisticRegression(),
    SVC(),
    KNeighborsClassifier(),
    DecisionTreeClassifier(),
    GaussianNB(),
    XGBClassifier()
]

# Create a DataFrame to store the results
results_df = pd.DataFrame(columns=['Model', 'Accuracy', 'F1-Score', 'Precision', 'Recall'])

# Train and evaluate each classifier
for clf in classifiers:
    clf_name = clf.__class__.__name__
    clf.fit(x_train_scaled, y_train)
    y_pred = clf.predict(x_test_scaled)
    
    # Calculate evaluation metrics
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, output_dict=True)
    f1_score = report['weighted avg']['f1-score']
    precision = report['weighted avg']['precision']
    recall = report['weighted avg']['recall']
    
    # Append results to DataFrame
    results_df = pd.concat([results_df, pd.DataFrame([{'Model': clf_name, 'Accuracy': accuracy, 'F1-Score': f1_score, 
                                    'Precision': precision, 'Recall': recall}])], ignore_index=True)

results_df

  results_df = pd.concat([results_df, pd.DataFrame([{'Model': clf_name, 'Accuracy': accuracy, 'F1-Score': f1_score,


Unnamed: 0,Model,Accuracy,F1-Score,Precision,Recall
0,RandomForestClassifier,0.923077,0.923134,0.923531,0.923077
1,AdaBoostClassifier,0.914027,0.914126,0.916058,0.914027
2,GradientBoostingClassifier,0.904977,0.905083,0.90819,0.904977
3,LogisticRegression,0.923077,0.923165,0.925106,0.923077
4,SVC,0.918552,0.918642,0.920087,0.918552
5,KNeighborsClassifier,0.900452,0.900542,0.901223,0.900452
6,DecisionTreeClassifier,0.868778,0.868897,0.873324,0.868778
7,GaussianNB,0.923077,0.923165,0.925106,0.923077
8,XGBClassifier,0.918552,0.918596,0.91879,0.918552


In [33]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Instantiate the RandomForestClassifier
rf_classifier = RandomForestClassifier()

# Train the RandomForestClassifier
rf_classifier.fit(x_train_scaled, y_train)

# Predict on the test set
y_pred_rf = rf_classifier.predict(x_test_scaled)

# Calculate accuracy
accuracy_rf = accuracy_score(y_test, y_pred_rf)
print("Random Forest Classifier Accuracy:", accuracy_rf)

# Classification report
print("Classification Report for Random Forest Classifier:")
print(classification_report(y_test, y_pred_rf))

# Confusion matrix
print("Confusion Matrix for Random Forest Classifier:")
print(confusion_matrix(y_test, y_pred_rf))

Random Forest Classifier Accuracy: 0.9230769230769231
Classification Report for Random Forest Classifier:
              precision    recall  f1-score   support

           0       0.94      0.92      0.93       118
           1       0.91      0.93      0.92       103

    accuracy                           0.92       221
   macro avg       0.92      0.92      0.92       221
weighted avg       0.92      0.92      0.92       221

Confusion Matrix for Random Forest Classifier:
[[108  10]
 [  7  96]]


In [37]:
# test 1:
print("predcted class ",rf_classifier.predict(x_test_scaled[10].reshape(1,-1))[0])
print("actual class ", y_test.iloc[10])

predcted class  1
actual class  1


In [39]:
# test 2:
print("predcted class ",rf_classifier.predict(x_test_scaled[200].reshape(1,-1))[0])
print("actual class ", y_test.iloc[200])

predcted class  0
actual class  0


In [40]:
# test 3:
print("predcted class ",rf_classifier.predict(x_test_scaled[110].reshape(1,-1))[0])
print("actual class ", y_test.iloc[110])

predcted class  0
actual class  0


In [41]:
import pickle
pickle.dump(rf_classifier,open("models/rf_classifier.pkl",'wb'))
pickle.dump(scaler,open("models/scaler.pkl",'wb'))

In [43]:
import pickle

# Load the RandomForestClassifier model
with open("models/rf_classifier.pkl", "rb") as file:
    rf_classifier = pickle.load(file)

# Load the scaler
with open("models/scaler.pkl", "rb") as file:
    scaler = pickle.load(file)

In [44]:
import numpy as np

def predict(rf_classifier, scaler, footfall, tempMode, AQ, USS, CS, VOC, RP, IP, Temperature):
   
    # Prepare features array
    features = np.array([[ footfall, tempMode, AQ, USS, CS, VOC, RP, IP, Temperature]])
    
    # scalling
    scaled_features = scaler.transform(features)
    
    # predict by model
    result = rf_classifier.predict(scaled_features)
    
    return result[0]

In [46]:
# test 1:
footfall = 450
tempMode = 5
AQ = 4
USS = 5
CS = 5
VOC = 1
RP = 40
IP = 7
Temperature =20

result = predict(rf_classifier, scaler, footfall, tempMode, AQ, USS, CS, VOC, RP, IP, Temperature)


if result == 1:
    print("Machine failure ")
else: 
    print("Machine no failure")

Machine no failure




In [49]:
# test 2:
footfall = 400
tempMode = 7
AQ = 3
USS = 5
CS = 5
VOC = 1
RP = 35
IP = 7
Temperature =30

result = predict(rf_classifier, scaler, footfall, tempMode, AQ, USS, CS, VOC, RP, IP, Temperature)


if result == 1:
    print("Machine failure ")
else: 
    print("Machine no failure")

Machine no failure




In [50]:
# test 3:
footfall = 300
tempMode = 4
AQ = 2
USS = 3
CS = 6
VOC = 2
RP = 50
IP = 8
Temperature = 40

result = predict(rf_classifier, scaler, footfall, tempMode, AQ, USS, CS, VOC, RP, IP, Temperature)


if result == 1:
    print("Machine failure ")
else: 
    print("Machine no failure")

Machine no failure


