In [55]:
import polars as pl

dfMachinePredictions = pl.read_csv("data.csv")

In [56]:
dfMachinePredictions.head()

footfall,tempMode,AQ,USS,CS,VOC,RP,IP,Temperature,fail
i64,i64,i64,i64,i64,i64,i64,i64,i64,i64
0,7,7,1,6,6,36,3,1,1
190,1,3,3,5,1,20,4,1,0
31,7,2,2,6,1,24,6,1,0
83,4,3,4,5,1,28,6,1,0
640,7,5,6,4,0,68,6,1,0


In [57]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, roc_auc_score



In [58]:
# Preprocess your data
# Assuming the last column is the target variable
X = dfMachinePredictions[:, :9]  # Features
y = dfMachinePredictions[:, 9]   # Target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Decision Tree Classifier
clf = MLPClassifier()

# Train the classifier
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

cm = confusion_matrix(y_test, y_pred)
print(cm)

accuracy = accuracy_score(y_test, y_pred)
sensitivity = recall_score(y_test, y_pred, pos_label=1)  # Sensitivity (Recall) for positive class
specificity = recall_score(y_test, y_pred, pos_label=0)  # Recall for the negative class = Specificity
roc_auc = roc_auc_score(y_test, y_pred)  # AUC requires predicted probabilities

print(f"Classifier used: {clf}")
print(f"Accuracy: {accuracy:.2f}")
print(f"Sensitivity (Recall for positive class): {sensitivity:.2f}")
print(f"Specificity (Recall for negative class): {specificity:.2f}")
print(f"ROC AUC: {roc_auc:.2f}")




[[92 10]
 [18 69]]
Classifier used: MLPClassifier()
Accuracy: 0.85
Sensitivity (Recall for positive class): 0.79
Specificity (Recall for negative class): 0.90
ROC AUC: 0.85


In [59]:
tp, fn, fp, tn = cm.ravel()

sen = tn / (tn+fp)
spe = tp / (tp+fn)
acc = (tp + tn) / (tp + tn + fp + fn)

print(f"Accuracy calculated from CM: {acc:.2f}")
print(f"Sensitivity calculated from CM: {sen:.2f}")
print(f"Specificity calculated from CM: {spe:.2f}")

Accuracy calculated from CM: 0.85
Sensitivity calculated from CM: 0.79
Specificity calculated from CM: 0.90


In [60]:
def testClassifier(clf):
    # Preprocess your data
    # Assuming the last column is the target variable
    X = dfMachinePredictions[:, :9]  # Features
    y = dfMachinePredictions[:, 9]   # Target

    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Initialize the Decision Tree Classifier
    #clf = MLPClassifier()

    # Train the classifier
    clf.fit(X_train, y_train)

    # Make predictions
    y_pred = clf.predict(X_test)

    cm = confusion_matrix(y_test, y_pred)
    print(cm)

    accuracy = accuracy_score(y_test, y_pred)
    sensitivity = recall_score(y_test, y_pred, pos_label=1)  # Sensitivity (Recall) for positive class
    specificity = recall_score(y_test, y_pred, pos_label=0)  # Recall for the negative class = Specificity
    roc_auc = roc_auc_score(y_test, y_pred)  # AUC requires predicted probabilities

    print(f"Classifier used: {clf}")
    print(f"Accuracy: {accuracy:.2f}")
    print(f"Sensitivity (Recall for positive class): {sensitivity:.2f}")
    print(f"Specificity (Recall for negative class): {specificity:.2f}")
    print(f"ROC AUC: {roc_auc:.2f}")


testClassifier(MLPClassifier())
testClassifier(DecisionTreeClassifier())


[[92 10]
 [15 72]]
Classifier used: MLPClassifier()
Accuracy: 0.87
Sensitivity (Recall for positive class): 0.83
Specificity (Recall for negative class): 0.90
ROC AUC: 0.86
[[81 21]
 [15 72]]
Classifier used: DecisionTreeClassifier()
Accuracy: 0.81
Sensitivity (Recall for positive class): 0.83
Specificity (Recall for negative class): 0.79
ROC AUC: 0.81


In [61]:
from sklearn.model_selection import cross_val_score, KFold, StratifiedKFold

def testClassifier(clf):
    # Preprocess your data
    # Assuming the last column is the target variable
    X = dfMachinePredictions[:, :9]  # Features
    y = dfMachinePredictions[:, 9]   # Target

    accuracies = []
    sensitivities = []
    specificities = []
    aucs = []

    # Split the dataset into training and testing sets
    kf = StratifiedKFold(n_splits=5, shuffle=True)

    for train_index, test_index in kf.split(X, y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        # Initialize the Decision Tree Classifier
        #clf = MLPClassifier()

        # Train the classifier
        clf.fit(X_train, y_train)

        # Make predictions
        y_pred = clf.predict(X_test)

        cm = confusion_matrix(y_test, y_pred)
        #print(cm)

        accuracy = accuracy_score(y_test, y_pred)
        accuracies.append(accuracy)
        
        sensitivity = recall_score(y_test, y_pred, pos_label=1)  # Sensitivity (Recall) for positive class
        sensitivities.append(sensitivity)

        specificity = recall_score(y_test, y_pred, pos_label=0)  # Recall for the negative class = Specificity
        specificities.append(specificity)

        roc_auc = roc_auc_score(y_test, y_pred)  # AUC requires predicted probabilities
        aucs.append(roc_auc)

    print(f"Classifier used: {clf}")
    print(f"Accuraccies: {np.mean(accuracies)}")
    print(f"Sensitivities (Recall for positive class): {np.mean(sensitivities)}")
    print(f"Specificities (Recall for negative class): {np.mean(specificities)}")
    print(f"ROC AUCs: {np.mean(aucs)}")


testClassifier(MLPClassifier())
print()
testClassifier(DecisionTreeClassifier())


Classifier used: MLPClassifier()
Accuraccies: 0.8961612067995046
Sensitivities (Recall for positive class): 0.8574488802336904
Specificities (Recall for negative class): 0.9236855036855036
ROC AUCs: 0.8905671919595971

Classifier used: DecisionTreeClassifier()
Accuraccies: 0.8432004953281549
Sensitivities (Recall for positive class): 0.8065887698799091
Specificities (Recall for negative class): 0.8694348894348896
ROC AUCs: 0.8380118296573993


In [62]:
from sklearn.svm import OneClassSVM
import numpy as np
import polars as pl

def mapOneClassResultToBinaryClassification(classificationResults):
    # Create a copy of the array to avoid modifying the original
    mapped_arr = np.copy(classificationResults)
    # Map -1 to 1
    mapped_arr[classificationResults == -1] = 1
    # Map 1 to 0
    mapped_arr[classificationResults == 1] = 0

    return mapped_arr

# Generate some training data
#X_train = np.array([[0], [0.44], [0.45], [0.46], [1]])

#normal dataset
dfMachineNoFail = dfMachinePredictions.filter(pl.col("fail")==0)

X_normal = dfMachineNoFail[:, :9]
y_normal = dfMachineNoFail[:, 9]
X_train_normal, X_test_normal, y_train_normal, y_test_normal = train_test_split(X_normal, y_normal, test_size=0.2, random_state=42)

clf = OneClassSVM(nu=0.7)
clf.fit(X_train_normal)

#normal test
Y_normal_pred = mapOneClassResultToBinaryClassification(clf.predict(X_test_normal))

#anomaly test
dfMachineFail = dfMachinePredictions.filter(pl.col("fail")==1)
X_anomaly = dfMachineFail[:, :9]
y_anomaly = dfMachineFail[:, 9]
Y_anomaly_pred = mapOneClassResultToBinaryClassification(clf.predict(X_anomaly))

y_test = np.concatenate((y_test_normal, y_anomaly))
y_pred = np.concatenate((Y_normal_pred, Y_anomaly_pred)) 

accuracy = accuracy_score(y_test, y_pred)
sensitivity = recall_score(y_test, y_pred, pos_label=1)  # Sensitivity (Recall) for positive class
specificity = recall_score(y_test, y_pred, pos_label=0)  # Recall for the negative class = Specificity
roc_auc = roc_auc_score(y_test, y_pred)  # AUC requires predicted probabilities

print(f"Classifier used: {clf}")
print(f"Accuraccies: {accuracy}")
print(f"Sensitivities (Recall for positive class): {sensitivity}")
print(f"Specificities (Recall for negative class): {specificity}")
print(f"ROC AUCs: {roc_auc}")

# Fit the model

# Predict on the training data
#predictions = clf.predict(X_train)
#print(predictions)  # Output: array([-1,  1,  1,  1, -1])



Classifier used: OneClassSVM(nu=0.7)
Accuraccies: 0.6785714285714286
Sensitivities (Recall for positive class): 0.7608142493638677
Specificities (Recall for negative class): 0.38738738738738737
ROC AUCs: 0.5741008183756275
