In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.feature_selection import RFE
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.cluster import DBSCAN
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report , f1_score ,recall_score , precision_score,accuracy_score ,confusion_matrix ,roc_curve, auc, roc_auc_score
from sklearn.model_selection import GridSearchCV
from sklearn import metrics
from mlxtend.feature_selection import SequentialFeatureSelector as SFS


In [2]:
hyper = pd.read_csv('megringaftertransfer.csv')

print(hyper.head())
print("dimension of Hypertension data: {}".format(hyper.shape))
X=hyper.drop(["id",'hyper'],axis=1)
y= hyper['hyper']


   Age    DBP  Obese     SBP    bmi  gender   hc  hyper  id  wc  whr
0   31  74.33      0  128.00  28.76       1  101      0   1  88   87
1   21  70.00      0  123.33  27.59       1  110      0   7  86   78
2   23  56.67      0   90.00  22.45       1  104      2   8  72   69
3   24  90.00      0  126.67  28.16       1  108      0  13  89   82
4   20  69.00      0  120.00  25.05       1  108      2  15  81   75
dimension of Hypertension data: (399, 11)


In [3]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=30,random_state=1)

# CNN

In [175]:
from tensorflow.python.keras.layers import Dense
from tensorflow.python.keras import Sequential
#model
model = Sequential()
model.add(Dense(12, input_dim=9, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
# compile the keras model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit the keras model on the dataset
model.fit(X, y, epochs=10, batch_size=1)
y_pred = model.predict(X_test)
confusion_matrix(y_test, y_pred)
classi=classification_report(y_test,y_pred)
print(classi)
# evaluate the keras model
_, accuracy = model.evaluate(X, y)
print('Accuracy: %.2f' % (accuracy*100))


Train on 399 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
              precision    recall  f1-score   support

           0       0.07      1.00      0.12         2
           1       0.00      0.00      0.00         7
           2       0.00      0.00      0.00        21

    accuracy                           0.07        30
   macro avg       0.02      0.33      0.04        30
weighted avg       0.00      0.07      0.01        30

Accuracy: 23.81


  'precision', 'predicted', average, warn_for)


# Cross Validation

In [140]:
from sklearn.model_selection import cross_val_score
def crossV(model,X,y):
    accuracy = cross_val_score(svm_class, X, y, scoring='accuracy', cv = 10).mean() * 100
    print("Accuracy of svm is: " , accuracy)
    scores = cross_val_score( clf, X, y, cv=10, scoring='f1_macro').mean() * 100
    print("f1 of svm is: " , scores)

    scores = cross_val_score( clf, X, y, cv=10, scoring='recall_macro').mean() * 100
    print("recall of svm is: " , scores)

    scores = cross_val_score( clf, X, y, cv=10, scoring='precision_macro').mean() * 100
    print("precision of svm is: " , scores)


# SVM

In [154]:
def run_svm(X_train, X_test, y_train, y_test):
    clf = SVC(kernel='linear',random_state=42)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    confusion_matrix(y_test, y_pred)
    classi=classification_report(y_test,y_pred)
    print(classi)
    fpr, tpr, thresholds = metrics.roc_curve(y_test,y_pred, pos_label=2)
    print('Accuracy: ', accuracy_score(y_test, y_pred), "\nACU: ",metrics.auc(fpr, tpr) )
print("The result of svm before features selection:")
run_svm(X_train, X_test, y_train, y_test)
print("Cross Validation:\n")
crossV(clf,X,y)

# Step Forward Feature Selection (SFS)

In [155]:
for index in range(1, 10):
    
    sel = SFS(SVC(), k_features = index ,verbose= 0,  cv = 4, n_jobs= -1).fit(X_train, y_train)
    X_train_rfe = sel.transform(X_train)
    names=sel.k_feature_names_
    print(names)
    X_test_rfe = sel.transform(X_test)
    print('Selected Feature: ', index)
    run_svm(X_train_rfe, X_test_rfe, y_train, y_test)
    print()

('SBP',)
Selected Feature:  1
              precision    recall  f1-score   support

           0       0.14      0.50      0.22         2
           1       1.00      0.57      0.73         7
           2       0.95      0.86      0.90        21

    accuracy                           0.77        30
   macro avg       0.70      0.64      0.62        30
weighted avg       0.91      0.77      0.81        30

Accuracy:  0.7666666666666667 
ACU:  0.8412698412698412

('SBP', 'gender')
Selected Feature:  2
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      1.00      1.00         7
           2       1.00      1.00      1.00        21

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

Accuracy:  1.0 
ACU:  1.0

('Obese', 'SBP', 'gender')
Selected Feature:  3
              precision    recall  f1

# RFE

In [170]:
for index in range(1, 11):
    sel = RFE(SVC(kernel='linear',random_state=42), n_features_to_select = index).fit(X_train, y_train)
    s=sel.get_support()
    print(s)
    X_train_rfe = sel.transform(X_train)
    X_test_rfe = sel.transform(X_test)
    print('Selected Feature: ', index)
    run_randomForest(X_train_rfe, X_test_rfe, y_train, y_test)
    print()

[False False False False False  True False False False]
Selected Feature:  1


  'precision', 'predicted', average, warn_for)


              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       0.00      0.00      0.00         7
           2       0.70      1.00      0.82        21

    accuracy                           0.70        30
   macro avg       0.23      0.33      0.27        30
weighted avg       0.49      0.70      0.58        30

Accuracy:  0.7 
ACU:  0.5

[False False False  True False  True False False False]
Selected Feature:  2
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      1.00      1.00         7
           2       1.00      1.00      1.00        21

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

Accuracy:  1.0 
ACU:  1.0

[False False False  True False  True False False  True]
Selected Feature:  3
              precision    rec

# MLP

In [152]:
def run_ann(X_train, X_test, y_train, y_test):
    clf = MLPClassifier(hidden_layer_sizes=(8,8,8), activation='relu', solver='adam', max_iter=500,random_state=1)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    confusion_matrix(y_test, y_pred)
    classi=classification_report(y_test,y_pred)
    print(classi)
    fpr, tpr, thresholds = metrics.roc_curve(y_test,y_pred, pos_label=2)
    print('Accuracy: ', accuracy_score(y_test, y_pred), "\nACU: ",metrics.auc(fpr, tpr) )
print("Cross Validation:\n")
crossV(clf,X,y)
print("The result of ANN before features selection:")
run_ann(X_train, X_test, y_train, y_test)


The result of ANN before features selection:




              precision    recall  f1-score   support

           0       0.12      0.50      0.20         2
           1       1.00      0.43      0.60         7
           2       0.95      0.86      0.90        21

    accuracy                           0.73        30
   macro avg       0.69      0.60      0.57        30
weighted avg       0.90      0.73      0.78        30

Accuracy:  0.7333333333333333 
ACU:  0.8492063492063493
Cross Validation:





Accuracy of svm is:  64.44415917843389
f1 of svm is:  94.49942061344012
recall of svm is:  93.53618233618232
precision of svm is:  96.92927689594356


# Step Forward Feature Selection (SFS)

In [156]:
for index in range(1, 10):
    sel = SFS(MLPClassifier(hidden_layer_sizes=(8,8,8), activation='relu', solver='adam', max_iter=500,random_state=42), k_features = index ,verbose= 0,  cv = 4, n_jobs= -1).fit(X_train, y_train)
    X_train_rfe = sel.transform(X_train)
    X_test_rfe = sel.transform(X_test)
    names=sel.k_feature_names_
    print(names)
    print('Selected Feature: ', index)
    run_ann(X_train_rfe, X_test_rfe, y_train, y_test)
    print()

('DBP',)
Selected Feature:  1


  'precision', 'predicted', average, warn_for)


              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       0.00      0.00      0.00         7
           2       0.70      1.00      0.82        21

    accuracy                           0.70        30
   macro avg       0.23      0.33      0.27        30
weighted avg       0.49      0.70      0.58        30

Accuracy:  0.7 
ACU:  0.5
Cross Validation:





Accuracy of svm is:  64.44415917843389
f1 of svm is:  94.49942061344012
recall of svm is:  93.53618233618232
precision of svm is:  96.92927689594356

('DBP', 'wc')
Selected Feature:  2


  'precision', 'predicted', average, warn_for)


              precision    recall  f1-score   support

           0       0.25      0.50      0.33         2
           1       0.00      0.00      0.00         7
           2       0.81      1.00      0.89        21

    accuracy                           0.73        30
   macro avg       0.35      0.50      0.41        30
weighted avg       0.58      0.73      0.65        30

Accuracy:  0.7333333333333333 
ACU:  0.7222222222222222
Cross Validation:





Accuracy of svm is:  64.44415917843389
f1 of svm is:  94.49942061344012
recall of svm is:  93.53618233618232
precision of svm is:  96.92927689594356

('DBP', 'SBP', 'wc')
Selected Feature:  3


  'precision', 'predicted', average, warn_for)


              precision    recall  f1-score   support

           0       0.25      0.50      0.33         2
           1       0.00      0.00      0.00         7
           2       0.81      1.00      0.89        21

    accuracy                           0.73        30
   macro avg       0.35      0.50      0.41        30
weighted avg       0.58      0.73      0.65        30

Accuracy:  0.7333333333333333 
ACU:  0.7222222222222222
Cross Validation:





Accuracy of svm is:  64.44415917843389
f1 of svm is:  94.49942061344012
recall of svm is:  93.53618233618232
precision of svm is:  96.92927689594356

('DBP', 'SBP', 'wc', 'whr')
Selected Feature:  4


  'precision', 'predicted', average, warn_for)


              precision    recall  f1-score   support

           0       0.20      0.50      0.29         2
           1       0.00      0.00      0.00         7
           2       0.84      1.00      0.91        21

    accuracy                           0.73        30
   macro avg       0.35      0.50      0.40        30
weighted avg       0.60      0.73      0.66        30

Accuracy:  0.7333333333333333 
ACU:  0.7777777777777778
Cross Validation:





Accuracy of svm is:  64.44415917843389
f1 of svm is:  94.49942061344012
recall of svm is:  93.53618233618232
precision of svm is:  96.92927689594356

('DBP', 'SBP', 'gender', 'wc', 'whr')
Selected Feature:  5


  'precision', 'predicted', average, warn_for)


              precision    recall  f1-score   support

           0       0.20      0.50      0.29         2
           1       0.00      0.00      0.00         7
           2       0.84      1.00      0.91        21

    accuracy                           0.73        30
   macro avg       0.35      0.50      0.40        30
weighted avg       0.60      0.73      0.66        30

Accuracy:  0.7333333333333333 
ACU:  0.7777777777777778
Cross Validation:





Accuracy of svm is:  64.44415917843389
f1 of svm is:  94.49942061344012
recall of svm is:  93.53618233618232
precision of svm is:  96.92927689594356

('DBP', 'SBP', 'bmi', 'gender', 'wc', 'whr')
Selected Feature:  6


  'precision', 'predicted', average, warn_for)


              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       1.00      0.29      0.44         7
           2       0.75      1.00      0.86        21

    accuracy                           0.77        30
   macro avg       0.58      0.43      0.43        30
weighted avg       0.76      0.77      0.70        30

Accuracy:  0.7666666666666667 
ACU:  0.6111111111111112
Cross Validation:

Accuracy of svm is:  64.44415917843389
f1 of svm is:  94.49942061344012
recall of svm is:  93.53618233618232
precision of svm is:  96.92927689594356

('DBP', 'SBP', 'bmi', 'gender', 'hc', 'wc', 'whr')
Selected Feature:  7




              precision    recall  f1-score   support

           0       0.17      0.50      0.25         2
           1       1.00      0.14      0.25         7
           2       0.87      0.95      0.91        21

    accuracy                           0.73        30
   macro avg       0.68      0.53      0.47        30
weighted avg       0.85      0.73      0.71        30

Accuracy:  0.7333333333333333 
ACU:  0.8068783068783069
Cross Validation:





Accuracy of svm is:  64.44415917843389
f1 of svm is:  94.49942061344012
recall of svm is:  93.53618233618232
precision of svm is:  96.92927689594356

('DBP', 'Obese', 'SBP', 'bmi', 'gender', 'hc', 'wc', 'whr')
Selected Feature:  8




              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       0.60      0.43      0.50         7
           2       0.86      0.90      0.88        21

    accuracy                           0.73        30
   macro avg       0.49      0.44      0.46        30
weighted avg       0.74      0.73      0.74        30

Accuracy:  0.7333333333333333 
ACU:  0.7804232804232805
Cross Validation:





Accuracy of svm is:  64.44415917843389
f1 of svm is:  94.49942061344012
recall of svm is:  93.53618233618232
precision of svm is:  96.92927689594356

('Age', 'DBP', 'Obese', 'SBP', 'bmi', 'gender', 'hc', 'wc', 'whr')
Selected Feature:  9




              precision    recall  f1-score   support

           0       0.12      0.50      0.20         2
           1       1.00      0.43      0.60         7
           2       0.95      0.86      0.90        21

    accuracy                           0.73        30
   macro avg       0.69      0.60      0.57        30
weighted avg       0.90      0.73      0.78        30

Accuracy:  0.7333333333333333 
ACU:  0.8492063492063493
Cross Validation:





Accuracy of svm is:  64.44415917843389
f1 of svm is:  94.49942061344012
recall of svm is:  93.53618233618232
precision of svm is:  96.92927689594356



# naive bayes

In [159]:
def run_nb(X_train, X_test, y_train, y_test):
    clf = GaussianNB().fit(X_train, y_train)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    confusion_matrix(y_test, y_pred)
    classi=classification_report(y_test,y_pred)
    print(classi)
    fpr, tpr, thresholds = metrics.roc_curve(y_test,y_pred, pos_label=2)
    print('Accuracy: ', accuracy_score(y_test, y_pred), "\nACU: ",metrics.auc(fpr, tpr) )
print("Cross Validation:\n")
crossV(clf,X,y)
print("The result of ANN before features selection:")
run_ann(X_train, X_test, y_train, y_test)

Cross Validation:





Accuracy of svm is:  64.44415917843389
f1 of svm is:  94.49942061344012
recall of svm is:  93.53618233618232
precision of svm is:  96.92927689594356
The result of ANN before features selection:




              precision    recall  f1-score   support

           0       0.12      0.50      0.20         2
           1       1.00      0.43      0.60         7
           2       0.95      0.86      0.90        21

    accuracy                           0.73        30
   macro avg       0.69      0.60      0.57        30
weighted avg       0.90      0.73      0.78        30

Accuracy:  0.7333333333333333 
ACU:  0.8492063492063493
Cross Validation:





Accuracy of svm is:  64.44415917843389
f1 of svm is:  94.49942061344012
recall of svm is:  93.53618233618232
precision of svm is:  96.92927689594356


# Step Forward Feature Selection (SFS)

In [160]:
for index in range(1, 10):
    sel = SFS(GaussianNB(), k_features = index ,verbose= 0,  cv = 4, n_jobs= -1).fit(X_train, y_train)
    X_train_rfe = sel.transform(X_train)
    X_test_rfe = sel.transform(X_test)
    names=sel.k_feature_names_
    print(names)
    print('Selected Feature: ', index)
    run_nb(X_train_rfe, X_test_rfe, y_train, y_test)
    print()

('SBP',)
Selected Feature:  1
              precision    recall  f1-score   support

           0       0.12      0.50      0.20         2
           1       1.00      0.57      0.73         7
           2       0.94      0.81      0.87        21

    accuracy                           0.73        30
   macro avg       0.69      0.63      0.60        30
weighted avg       0.90      0.73      0.79        30

Accuracy:  0.7333333333333333 
ACU:  0.8068783068783069

('SBP', 'whr')
Selected Feature:  2
              precision    recall  f1-score   support

           0       0.20      0.50      0.29         2
           1       1.00      0.86      0.92         7
           2       0.95      0.86      0.90        21

    accuracy                           0.83        30
   macro avg       0.72      0.74      0.70        30
weighted avg       0.91      0.83      0.86        30

Accuracy:  0.8333333333333334 
ACU:  0.8253968253968254

('Age', 'SBP', 'whr')
Selected Feature:  3
              p

# Knn

In [161]:
def run_knn(X_train, X_test, y_train, y_test):
    clf = KNeighborsClassifier(n_neighbors=9).fit(X_train, y_train)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    confusion_matrix(y_test, y_pred)
    classi=classification_report(y_test,y_pred)
    print(classi)
    fpr, tpr, thresholds = metrics.roc_curve(y_test,y_pred, pos_label=2)
    print('Accuracy: ', accuracy_score(y_test, y_pred), "\nACU: ",metrics.auc(fpr, tpr) )
print("Cross Validation:\n")
crossV(clf,X,y)
print("The result of ANN before features selection:")
run_ann(X_train, X_test, y_train, y_test)

Cross Validation:





Accuracy of svm is:  64.44415917843389
f1 of svm is:  94.49942061344012
recall of svm is:  93.53618233618232
precision of svm is:  96.92927689594356
The result of ANN before features selection:




              precision    recall  f1-score   support

           0       0.12      0.50      0.20         2
           1       1.00      0.43      0.60         7
           2       0.95      0.86      0.90        21

    accuracy                           0.73        30
   macro avg       0.69      0.60      0.57        30
weighted avg       0.90      0.73      0.78        30

Accuracy:  0.7333333333333333 
ACU:  0.8492063492063493
Cross Validation:





Accuracy of svm is:  64.44415917843389
f1 of svm is:  94.49942061344012
recall of svm is:  93.53618233618232
precision of svm is:  96.92927689594356


# Step Forward Feature Selection (SFS)

In [162]:
for index in range(1, 10):
    sel = SFS(KNeighborsClassifier(n_neighbors=9), k_features = index ,verbose= 0,  cv = 4, n_jobs= -1).fit(X_train, y_train)
    X_train_rfe = sel.transform(X_train)
    X_test_rfe = sel.transform(X_test)
    names=sel.k_feature_names_
    print(names)
    print('Selected Feature: ', index)
    run_knn(X_train_rfe, X_test_rfe, y_train, y_test)
    print()

('SBP',)
Selected Feature:  1
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       1.00      0.71      0.83         7
           2       0.90      0.86      0.88        21

    accuracy                           0.77        30
   macro avg       0.63      0.52      0.57        30
weighted avg       0.86      0.77      0.81        30

Accuracy:  0.7666666666666667 
ACU:  0.7777777777777777

('SBP', 'gender')
Selected Feature:  2
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      1.00      1.00         7
           2       1.00      1.00      1.00        21

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

Accuracy:  1.0 
ACU:  1.0

('Obese', 'SBP', 'gender')
Selected Feature:  3
              precision    recall  f1

  'precision', 'predicted', average, warn_for)


('Age', 'DBP', 'Obese', 'SBP', 'bmi', 'gender', 'hc', 'wc', 'whr')
Selected Feature:  9
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       1.00      0.86      0.92         7
           2       0.87      0.95      0.91        21

    accuracy                           0.87        30
   macro avg       0.62      0.60      0.61        30
weighted avg       0.84      0.87      0.85        30

Accuracy:  0.8666666666666667 
ACU:  0.7936507936507937



# DecisionTree

In [163]:
def run_DecisionTree(X_train, X_test, y_train, y_test):
    clf = DecisionTreeClassifier(random_state=42).fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    confusion_matrix(y_test, y_pred)
    classi=classification_report(y_test,y_pred)
    print(classi)
    fpr, tpr, thresholds = metrics.roc_curve(y_test,y_pred, pos_label=2)
    print('Accuracy: ', accuracy_score(y_test, y_pred), "\nACU: ",metrics.auc(fpr, tpr) )
print("Cross Validation:\n")
crossV(clf,X,y)
print("The result of Decision Tree before features selection:")
run_DecisionTree(X_train, X_test, y_train, y_test)

Cross Validation:





Accuracy of svm is:  64.44415917843389
f1 of svm is:  94.49942061344012
recall of svm is:  93.53618233618232
precision of svm is:  96.92927689594356
The result of Decision Tree before features selection:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      1.00      1.00         7
           2       1.00      1.00      1.00        21

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

Accuracy:  1.0 
ACU:  1.0


# Step Forward Feature Selection (SFS)

In [164]:
for index in range(1, 10):
    sel =SFS(DecisionTreeClassifier(), k_features = index ,verbose= 0,  cv = 4, n_jobs= -1).fit(X_train, y_train)
    X_train_rfe = sel.transform(X_train)
    X_test_rfe = sel.transform(X_test)
    names=sel.k_feature_names_
    print(names)
    print('Selected Feature: ', index)
    run_DecisionTree(X_train_rfe, X_test_rfe, y_train, y_test)
    print()

('SBP',)
Selected Feature:  1
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       1.00      0.43      0.60         7
           2       0.90      0.86      0.88        21

    accuracy                           0.70        30
   macro avg       0.63      0.43      0.49        30
weighted avg       0.86      0.70      0.75        30

Accuracy:  0.7 
ACU:  0.7936507936507937

('SBP', 'gender')
Selected Feature:  2
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      1.00      1.00         7
           2       1.00      1.00      1.00        21

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

Accuracy:  1.0 
ACU:  1.0

('Age', 'SBP', 'gender')
Selected Feature:  3
              precision    recall  f1-score   support


# RFE

In [167]:
for index in range(1, 11):
    sel = RFE(DecisionTreeClassifier(), n_features_to_select = index).fit(X_train, y_train)
    s=sel.get_support()
    print(s)
    X_train_rfe = sel.transform(X_train)
    X_test_rfe = sel.transform(X_test)
    print('Selected Feature: ', index)
    run_DecisionTree(X_train_rfe, X_test_rfe, y_train, y_test)
    print()

[False False False  True False False False False False]
Selected Feature:  1
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       1.00      0.43      0.60         7
           2       0.90      0.86      0.88        21

    accuracy                           0.70        30
   macro avg       0.63      0.43      0.49        30
weighted avg       0.86      0.70      0.75        30

Accuracy:  0.7 
ACU:  0.7936507936507937

[False False False  True False  True False False False]
Selected Feature:  2
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      1.00      1.00         7
           2       1.00      1.00      1.00        21

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

Accuracy:  1.0 
ACU:  1.0

[False False Fa

# Random Forest

In [165]:
def run_randomForest(X_train, X_test, y_train, y_test):
    clf = RandomForestClassifier(n_estimators=100, random_state=1, n_jobs=-1)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    confusion_matrix(y_test, y_pred)
    classi=classification_report(y_test,y_pred)
    print(classi)
    fpr, tpr, thresholds = metrics.roc_curve(y_test,y_pred, pos_label=2)
    print('Accuracy: ', accuracy_score(y_test, y_pred), "\nACU: ",metrics.auc(fpr, tpr) )
print("Cross Validation:\n")
crossV(clf,X,y)
print("The result of Ranodm forest before features selection:")
run_randomForest(X_train, X_test, y_train, y_test)

Cross Validation:





Accuracy of svm is:  64.44415917843389
f1 of svm is:  94.49942061344012
recall of svm is:  93.53618233618232
precision of svm is:  96.92927689594356
The result of Ranodm forest before features selection:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      1.00      1.00         7
           2       1.00      1.00      1.00        21

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

Accuracy:  1.0 
ACU:  1.0


# Step Forward Feature Selection (SFS)

In [166]:
for index in range(1, 10):
    sel = SFS(RandomForestClassifier(n_estimators=100, random_state=42), k_features = index ,verbose= 0,  cv = 4, n_jobs= -1,scoring='accuracy').fit(X_train, y_train)
    X_train_rfe = sel.transform(X_train)
    X_test_rfe = sel.transform(X_test)
    names=sel.k_feature_names_
    print(names)
    print('Selected Feature: ', index)
    run_randomForest(X_train_rfe, X_test_rfe, y_train, y_test)
    print()

('SBP',)
Selected Feature:  1
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       1.00      0.43      0.60         7
           2       0.90      0.86      0.88        21

    accuracy                           0.70        30
   macro avg       0.63      0.43      0.49        30
weighted avg       0.86      0.70      0.75        30

Accuracy:  0.7 
ACU:  0.7936507936507937

('SBP', 'gender')
Selected Feature:  2
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      1.00      1.00         7
           2       1.00      1.00      1.00        21

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

Accuracy:  1.0 
ACU:  1.0

('Age', 'SBP', 'gender')
Selected Feature:  3
              precision    recall  f1-score   support


# RFE

In [168]:
for index in range(1, 11):
    sel = RFE(RandomForestClassifier(n_estimators=100, random_state=1, n_jobs=-1), n_features_to_select = index).fit(X_train, y_train)
    s=sel.get_support()
    print(s)
    X_train_rfe = sel.transform(X_train)
    X_test_rfe = sel.transform(X_test)
    print('Selected Feature: ', index)
    run_randomForest(X_train_rfe, X_test_rfe, y_train, y_test)
    print()

[False False False  True False False False False False]
Selected Feature:  1
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       1.00      0.43      0.60         7
           2       0.90      0.86      0.88        21

    accuracy                           0.70        30
   macro avg       0.63      0.43      0.49        30
weighted avg       0.86      0.70      0.75        30

Accuracy:  0.7 
ACU:  0.7936507936507937

[False False False  True False  True False False False]
Selected Feature:  2
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      1.00      1.00         7
           2       1.00      1.00      1.00        21

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

Accuracy:  1.0 
ACU:  1.0

[False  True Fa