In [8]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.feature_selection import RFE
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score ,recall_score , precision_score,accuracy_score ,confusion_matrix ,roc_curve, auc, roc_auc_score
from sklearn.model_selection import GridSearchCV
from mlxtend.feature_selection import SequentialFeatureSelector as SFS


In [4]:
dataset2 = pd.read_csv("export_CKD.csv",delimiter=",")
array = dataset2.values
#Split dataset
#Sample vectors
X = array[:,2:26]
# Target vector (class labels) 
y = array[:,26]
# Print the shape of the array
X.shape, y.shape


((400, 24), (400,))

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2 ,random_state=42)

In [None]:
def run_svm(X_train, X_test, y_train, y_test):
    clf = SVC(kernel='linear',random_state=42)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    confusion_matrix(y_test, y_pred)
    classi=classification_report(y_test,y_pred)
    print(classi)
    fpr, tpr, thresholds = metrics.roc_curve(y_test,y_pred, pos_label=2)
    print('Accuracy: ', accuracy_score(y_test, y_pred), "\nACU: ",metrics.auc(fpr, tpr) )
    

In [None]:
print("The result of svm before features selection:")
run_svm(X_train, X_test, y_train, y_test)

In [None]:
for index in range(1, 25):
    
    sel = SFS(SVC(), k_features = index ,verbose= 0,  cv = 4, n_jobs= -1).fit(X_train, y_train)
    X_train_rfe = sel.transform(X_train)
    X_test_rfe = sel.transform(X_test)
    print('Selected Feature: ', index)
    run_svm(X_train_rfe, X_test_rfe, y_train, y_test)
    print()

In [None]:
def run_ann(X_train, X_test, y_train, y_test):
    clf = MLPClassifier(hidden_layer_sizes=(8,8,8), activation='relu', solver='adam', max_iter=500,random_state=1)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    confusion_matrix(y_test, y_pred)
    classi=classification_report(y_test,y_pred)
    print(classi)
    fpr, tpr, thresholds = metrics.roc_curve(y_test,y_pred, pos_label=2)
    print('Accuracy: ', accuracy_score(y_test, y_pred), "\nACU: ",metrics.auc(fpr, tpr) )

print("The result of ANN before features selection:")
run_ann(X_train, X_test, y_train, y_test)


In [None]:
for index in range(1, 25):
    sel = SFS(MLPClassifier(hidden_layer_sizes=(8,8,8), activation='relu', solver='adam', max_iter=500,random_state=42), k_features = index ,verbose= 0,  cv = 4, n_jobs= -1).fit(X_train, y_train)
    
    X_train_rfe = sel.transform(X_train)
    X_test_rfe = sel.transform(X_test)
    print('Selected Feature: ', index)
    run_ann(X_train_rfe, X_test_rfe, y_train, y_test)
    print()

In [None]:
def run_nb(X_train, X_test, y_train, y_test):
    clf = GaussianNB().fit(X_train, y_train)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    confusion_matrix(y_test, y_pred)
    classi=classification_report(y_test,y_pred)
    print(classi)
    fpr, tpr, thresholds = metrics.roc_curve(y_test,y_pred, pos_label=2)
    print('Accuracy: ', accuracy_score(y_test, y_pred), "\nACU: ",metrics.auc(fpr, tpr) )

print("The result of ANN before features selection:")
run_ann(X_train, X_test, y_train, y_test)

In [None]:
for index in range(1, 25):
    sel = SFS(GaussianNB(), k_features = index ,verbose= 0,  cv = 4, n_jobs= -1).fit(X_train, y_train)
    X_train_rfe = sel.transform(X_train)
    X_test_rfe = sel.transform(X_test)
    print('Selected Feature: ', index)
    run_nb(X_train_rfe, X_test_rfe, y_train, y_test)
    print()

In [None]:
 def run_knn(X_train, X_test, y_train, y_test):
    clf = KNeighborsClassifier(n_neighbors=9).fit(X_train, y_train)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    confusion_matrix(y_test, y_pred)
    classi=classification_report(y_test,y_pred)
    print(classi)
    fpr, tpr, thresholds = metrics.roc_curve(y_test,y_pred, pos_label=2)
    print('Accuracy: ', accuracy_score(y_test, y_pred), "\nACU: ",metrics.auc(fpr, tpr) )

print("The result of ANN before features selection:")
run_ann(X_train, X_test, y_train, y_test)

In [None]:
for index in range(1, 25):
    sel = SFS(KNeighborsClassifier(n_neighbors=9), k_features = index ,verbose= 0,  cv = 4, n_jobs= -1).fit(X_train, y_train)
    X_train_rfe = sel.transform(X_train)
    X_test_rfe = sel.transform(X_test)
    print('Selected Feature: ', index)
    run_knn(X_train_rfe, X_test_rfe, y_train, y_test)
    print()

In [None]:
def run_DecisionTree(X_train, X_test, y_train, y_test):
    clf = DecisionTreeClassifier(random_state=42).fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    confusion_matrix(y_test, y_pred)
    classi=classification_report(y_test,y_pred)
    print(classi)
    fpr, tpr, thresholds = metrics.roc_curve(y_test,y_pred, pos_label=2)
    print('Accuracy: ', accuracy_score(y_test, y_pred), "\nACU: ",metrics.auc(fpr, tpr) )
print("The result of Decision Tree before features selection:")
run_DecisionTree(X_train, X_test, y_train, y_test)

In [None]:
for index in range(1, 25):
    sel =SFS(DecisionTreeClassifier(), k_features = index ,verbose= 0,  cv = 4, n_jobs= -1).fit(X_train, y_train)
    X_train_rfe = sel.transform(X_train)
    X_test_rfe = sel.transform(X_test)
    print('Selected Feature: ', index)
    run_DecisionTree(X_train_rfe, X_test_rfe, y_train, y_test)
    print()

In [None]:
def run_randomForest(X_train, X_test, y_train, y_test):
    clf = RandomForestClassifier(n_estimators=100, random_state=1, n_jobs=-1)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    confusion_matrix(y_test, y_pred)
    classi=classification_report(y_test,y_pred)
    print(classi)
    fpr, tpr, thresholds = metrics.roc_curve(y_test,y_pred, pos_label=2)
    print('Accuracy: ', accuracy_score(y_test, y_pred), "\nACU: ",metrics.auc(fpr, tpr) )
print("The result of Ranodm forest before features selection:")
run_randomForest(X_train, X_test, y_train, y_test)


In [None]:
for index in range(1, 25):
    sel = SFS(RandomForestClassifier(n_estimators=100, random_state=42), k_features = index ,verbose= 0,  cv = 4, n_jobs= -1,scoring='accuracy').fit(X_train, y_train)
    X_train_rfe = sel.transform(X_train)
    
    X_test_rfe = sel.transform(X_test)
    print('Selected Feature: ', index)
    run_randomForest(X_train_rfe, X_test_rfe, y_train, y_test)
    print()

In [None]:
for index in range(1, 25):
    sel = RFE(RandomForestClassifier(n_estimators=100, random_state=42), n_features_to_select = index).fit(X_train, y_train)
    sel.get_support()
    X_train_rfe = sel.transform(X_train)
    X_test_rfe = sel.transform(X_test)
    print('Selected Feature: ', index)
    run_randomForest(X_train_rfe, X_test_rfe, y_train, y_test)
    print()