# Machine Learning Approaches for Anomaly Detection in Medical Devices

In [None]:
# Importing Libraries

In [None]:
!pip install pandas
!pip install numpy
!pip install matplotlib
!pip install seaborn
!pip install scikit-learn
!pip install imbalanced-learn
!pip install xgboost
!pip install joblib

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
import joblib
import os
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report

In [None]:
# Importing Dataset

In [None]:
dataset = pd.read_csv("Dataset.csv")



In [None]:
dataset

In [None]:
dataset.head()

In [None]:
dataset.info()

In [None]:
dataset.dropna(inplace=True)

In [None]:
dataset.isnull().sum()

In [None]:
dataset.describe()

In [None]:
# Create a count plot
sns.set(style="darkgrid")  # Set the style of the plot
plt.figure(figsize=(8, 6))  # Set the figure size
# Replace 'dataset' with your actual DataFrame and 'Drug' with the column name
ax = sns.countplot(x='class', data=dataset, palette="Set3")
plt.title("Count Plot")  # Add a title to the plot
plt.xlabel("Categories")  # Add label to x-axis
plt.ylabel("Count")  # Add label to y-axis
# Annotate each bar with its count value
for p in ax.patches:
    ax.annotate(f'{p.get_height()}', (p.get_x() + p.get_width() / 2., p.get_height()),
                ha='center', va='center', fontsize=10, color='black', xytext=(0, 5),
                textcoords='offset points')

plt.show()  # Display the plot

In [None]:
# Converting object type to int type

In [None]:
le= LabelEncoder()
dataset['class']=  le.fit_transform(dataset['class'])

In [None]:
dataset

In [None]:
#Defining Dependent and independent variables

In [None]:
X=dataset.iloc[:,0:170]

In [None]:
X

In [None]:
y=dataset.iloc[:,-1]

In [None]:
y

In [None]:
#Datasplitting

In [None]:
X_train,X_test,y_train,y_test= train_test_split(X,y,test_size=0.20)

In [None]:
X_train.shape

In [None]:
X_test.shape

In [None]:
X_train.shape

In [None]:
X_test.shape

In [None]:
#Building a ML Model

In [None]:
labels=['POSITIVE','NEGATIVE']

In [None]:
#defining global variables to store accuracy and other metrics
precision = []
recall = []
fscore = []
accuracy = []

In [None]:
#function to calculate various metrics such as accuracy, precision etc
def calculateMetrics(algorithm, predict, testY):
    testY = testY.astype('int')
    predict = predict.astype('int')
    p = precision_score(testY, predict,average='macro') * 100
    r = recall_score(testY, predict,average='macro') * 100
    f = f1_score(testY, predict,average='macro') * 100
    a = accuracy_score(testY,predict)*100
    accuracy.append(a)
    precision.append(p)
    recall.append(r)
    fscore.append(f)
    print(algorithm+' Accuracy    : '+str(a))
    print(algorithm+' Precision   : '+str(p))
    print(algorithm+' Recall      : '+str(r))
    print(algorithm+' FSCORE      : '+str(f))
    report=classification_report(predict, testY,target_names=labels)
    print('\n',algorithm+" classification report\n",report)
    conf_matrix = confusion_matrix(testY, predict)
    plt.figure(figsize =(5, 5))
    ax = sns.heatmap(conf_matrix, xticklabels = labels, yticklabels = labels, annot = True, cmap="Blues" ,fmt ="g");
    ax.set_ylim([0,len(labels)])
    plt.title(algorithm+" Confusion matrix")
    plt.ylabel('True class')
    plt.xlabel('Predicted class')
    plt.show()

# Logistic Regression

In [None]:
if os.path.exists('Logistic Regression.pkl'):
    # Load the trained model from the file
    clf = joblib.load('Logistic Regression.pkl')
    print("Model loaded successfully.")
    predict = clf.predict(X_test)
    calculateMetrics("Logistic Regression", predict, y_test)
else:
    # Train the model (assuming X_train and y_train are defined)
    clf = LogisticRegression()
    clf.fit(X_train, y_train)
    # Save the trained model to a file
    joblib.dump(clf, 'Logistic Regression.pkl')
    print("Model saved successfully.")
    predict = clf.predict(X_test)
    calculateMetrics("Logistic Regression", predict, y_test)

# XGBoost Classifier

In [None]:
# Check if the model files exist
if os.path.exists('XGBClassifier.pkl'):
    # Load the trained model from the file
    clf = joblib.load('XGBClassifier.pkl')
    print("Model loaded successfully.")
    predict = clf.predict(X_test)
    calculateMetrics("XGBoost Classifier", predict, y_test)
else:
    # Train the model (assuming X_train and y_train are defined)
    clf = XGBClassifier(max_depth=100, random_state=0)
    clf.fit(X_train, y_train)
    # Save the trained model to a file
    joblib.dump(clf, 'XGBClassifier.pkl')
    print("Model saved successfuly.")
    predict = clf.predict(X_test)
    calculateMetrics("XGBoostClassifier", predict, y_test)

In [None]:
#showing all algorithms performance values
columns = ["Algorithm Name","Accuracy","Precison","Recall","FScore"]
values = []
algorithm_names = ["Logistic Regression", "XGBoostClassifier"]
for i in range(len(algorithm_names)):
    values.append([algorithm_names[i],accuracy[i],precision[i],recall[i],fscore[i]])

temp = pd.DataFrame(values,columns=columns)
temp

In [None]:
# prediction

In [None]:
test=pd.read_csv("Test.csv")

In [None]:
test

In [None]:

# Make predictions on the selected test data
predict = clf.predict(test)

# Loop through each prediction and print the corresponding row
for i, p in enumerate(predict):
    if p == 0:
        print(test.iloc[i])
        print("Row {}:************************************************** POSITIVE".format(i))

    else:
        print(test.iloc[i])
        print("Row {}:************************************************** NEGATIVE".format(i))



In [None]:
test['Predicted']=predict

In [None]:
test