In [1]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report,f1_score
import xgboost as xgb

from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split

Data import

In [6]:
df = pd.read_csv("./historical_data.csv")
df['Classes'] = df['Class'].apply(lambda x: 'Normal' if x == 'Normal' else 'High')
# Map 'Normal' to 1 and 'High' to 0
df['Classes'] = df['Class'].apply(lambda x: 0 if x == 'Normal' else 1)

# Drop the original 'Class' column
df = df.drop(columns=['Class'])

# Verify the changes
print(df.head())

           DATETIME    RR   SPO2   MAP   SBP   DBP     HR    PP       CO  \
0  2020-10-18 15:24  35.0   99.9   0.0   0.0   0.0  106.9   0.0     0.00   
1  2020-10-18 15:25  36.4  100.0  87.0  98.9  63.1  107.3  35.8  3841.34   
2  2020-10-18 15:26  35.2  100.0  75.2  97.9  63.0  107.5  34.9  3751.75   
3  2020-10-18 15:27  34.0  100.0  74.8  97.2  62.5  107.0  34.7  3712.90   
4  2020-10-18 15:28  34.9  100.0  74.0  96.0  62.0  107.0  34.0  3638.00   

   Classes  
0        0  
1        0  
2        0  
3        0  
4        0  


Label Encoding target column

Train Test split

In [7]:
X = df.drop(['DATETIME', 'Classes'], axis=1) 
y = df['Classes'] 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Feature Scaling

In [8]:

X_train=pd.DataFrame(X_train, index=X_train.index)
X_test=pd.DataFrame(X_test, index=X_test.index)

Ensembled Classifier:XGBoost

In [9]:
model_xgb= xgb.XGBClassifier(random_state=42,verbosity=0, min_child_weight=2,
                             max_depth=4, learning_rate=0.15, gamma=0.22, colsample_bytree=0.5)
param_grid = {
    'learning_rate': [0.1, 0.15, 0.2],
    'max_depth': [3, 4, 5],
    'min_child_weight': [1, 2, 3],
    'gamma': [0.2, 0.3, 0.4],
    'colsample_bytree': [0.5, 0.6, 0.7]
}

grid_search = GridSearchCV(model_xgb, param_grid, scoring='accuracy', cv=3)
grid_search.fit(X_train, y_train)

# Print the best parameters
print("Best Parameters:", grid_search.best_params_)

# Train the model with the best parameters
best_model_xgb = grid_search.best_estimator_
best_model_xgb.fit(X_train, y_train)


y_pred = best_model_xgb.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

print("Train F1_Score: ", metrics.f1_score(y_train, best_model_xgb.predict(X_train), average='micro'))
print("Val F1_Score: ", metrics.f1_score(y_test, best_model_xgb.predict(X_test), average='micro'))

Best Parameters: {'colsample_bytree': 0.5, 'gamma': 0.2, 'learning_rate': 0.1, 'max_depth': 3, 'min_child_weight': 1}
Accuracy: 0.9994453688297282
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      1780
           1       0.96      1.00      0.98        23

    accuracy                           1.00      1803
   macro avg       0.98      1.00      0.99      1803
weighted avg       1.00      1.00      1.00      1803

Train F1_Score:  1.0
Val F1_Score:  0.9994453688297282


SVC

In [10]:
from sklearn.model_selection import RandomizedSearchCV

# Define a reduced search space
param_dist = {
    'C': [1, 10, 25],
    'gamma': ['scale', 'auto', 1e-3],
    'kernel': ['rbf', 'linear']
}
# Create the SVM model
model_svc = SVC(probability=True)

# Instantiate the RandomizedSearchCV object
random_search = RandomizedSearchCV(model_svc, param_dist, n_iter=10, cv=3, scoring='accuracy', n_jobs=-1)

# Fit the model to the training data
random_search.fit(X_train, y_train)

# Access the best model from the random search
best_model_svc = random_search.best_estimator_
best_model_svc.fit(X_train, y_train)

# Make predictions on the test set
y_pred = best_model_svc.predict(X_test)


# Print accuracy and classification report
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Train F1_Score: ", metrics.f1_score(y_train, best_model_svc.predict(X_train), average='micro'))
print("Val F1_Score: ", metrics.f1_score(y_test, best_model_svc.predict(X_test), average='micro'))



Accuracy: 0.9988907376594565
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      1780
           1       1.00      0.91      0.95        23

    accuracy                           1.00      1803
   macro avg       1.00      0.96      0.98      1803
weighted avg       1.00      1.00      1.00      1803

Train F1_Score:  0.9991678224687933
Val F1_Score:  0.9988907376594565


Random Forest

In [11]:
# Define a search space for hyperparameter tuning
param_dist = {
    'var_smoothing': [1e-9, 1e-8, 1e-7, 1e-6, 1e-5]
}

# Create the Naive Bayes model
model_nb = GaussianNB()

# Instantiate the RandomizedSearchCV object
random_search_nb = RandomizedSearchCV(model_nb, param_dist, n_iter=5, cv=3, scoring='accuracy', n_jobs=-1)

# Fit the model to the training data
random_search_nb.fit(X_train, y_train)

# Access the best model from the random search
best_model_nb = random_search_nb.best_estimator_

# Make predictions on the test set
y_pred_nb = best_model_nb.predict(X_test)

# Print accuracy and classification report
print("Accuracy (Naive Bayes):", accuracy_score(y_test, y_pred_nb))
print("Classification Report (Naive Bayes):\n", classification_report(y_test, y_pred_nb))
print("F1 Score (Naive Bayes):", f1_score(y_test, y_pred_nb, average='micro'))

Accuracy (Naive Bayes): 0.9900166389351082
Classification Report (Naive Bayes):
               precision    recall  f1-score   support

           0       1.00      0.99      0.99      1780
           1       0.58      0.83      0.68        23

    accuracy                           0.99      1803
   macro avg       0.79      0.91      0.84      1803
weighted avg       0.99      0.99      0.99      1803

F1 Score (Naive Bayes): 0.9900166389351082


In [12]:

# Define the DecisionTreeClassifier
model_dt = DecisionTreeClassifier(random_state=42)

# Define the hyperparameter grid for tuning
param_grid = {
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Instantiate the GridSearchCV object
grid_search = GridSearchCV(model_dt, param_grid, cv=5, scoring='accuracy', n_jobs=-1)

# Fit the model to the training data
grid_search.fit(X_train, y_train)

# Access the best model from the grid search
best_model_dt = grid_search.best_estimator_

# Make predictions on the test set
y_pred_dt = best_model_dt.predict(X_test)

# Print accuracy and classification report
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.9988907376594565
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      1780
           1       1.00      0.91      0.95        23

    accuracy                           1.00      1803
   macro avg       1.00      0.96      0.98      1803
weighted avg       1.00      1.00      1.00      1803



KNN

In [13]:

# Create a KNN classifier
knn_model = KNeighborsClassifier()

# Define the hyperparameter grid
param_grid = {
    'n_neighbors': [3, 5, 7, 9],  # Adjust as needed
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan'] 
}

# Create the GridSearchCV object
grid_search = GridSearchCV(knn_model, param_grid, cv=5, scoring='accuracy')

# Fit the grid search to the data
grid_search.fit(X_train, y_train)

# Print the best parameters and best accuracy
print("Best Parameters:", grid_search.best_params_)
print("Best Accuracy:", grid_search.best_score_)

# Use the best model for predictions
best_knn_model = grid_search.best_estimator_
y_pred = best_knn_model.predict(X_test)

Best Parameters: {'metric': 'manhattan', 'n_neighbors': 3, 'weights': 'distance'}
Best Accuracy: 0.9918169209431346


In [14]:
voting_model = VotingClassifier(estimators=[('XGBoost', best_model_xgb), ('SVMClassifier',
                                                                           best_model_svc),('GaussianNB', best_model_nb),
                                            ('DecisionTreeClassifier', best_model_dt), ('KnnClassifier', best_knn_model)
                                           ], voting='soft')

voting_model.fit(X_train, y_train)

print("F1_Score: ", metrics.f1_score(y_test, voting_model.predict(X_test), average='micro'))
for clf in (best_model_xgb, best_model_svc, best_model_nb, best_model_dt,best_knn_model):
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, metrics.f1_score(y_test, y_pred, average='micro'))

y_test_pred = voting_model.predict(X_test)

F1_Score:  1.0
XGBClassifier 0.9994453688297282
SVC 0.9988907376594565
GaussianNB 0.9900166389351082
DecisionTreeClassifier 1.0
KNeighborsClassifier 0.9933444259567388


In [15]:
conf_matrix = confusion_matrix(y_test, y_test_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Print F1 score for the ensemble model
print("F1_Score: ", metrics.f1_score(y_test,y_test_pred, average='micro'))


Confusion Matrix:
[[1780    0]
 [   0   23]]
F1_Score:  1.0


In [16]:
import joblib


# Save the ensemble model as a pickle file
joblib.dump(voting_model, 'v_model.pkl')



['v_model.pkl']