In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split 
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier   
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier

def rfeFeature(indep_X, dep_Y, n):
    rfelist = []
    
    log_model = LogisticRegression(solver='lbfgs', max_iter=1000)
    RF = RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0)
    DT = DecisionTreeClassifier(criterion='gini', max_features='sqrt', splitter='best', random_state=0)
    svc_model = SVC(kernel='linear', random_state=0)
    
    rfemodellist = [log_model, svc_model, RF, DT]
    
    for model in rfemodellist:
        log_rfe = RFE(estimator=model, n_features_to_select=n)
        log_fit = log_rfe.fit(indep_X, dep_Y)
        log_rfe_feature = log_fit.transform(indep_X)
        rfelist.append(log_rfe_feature)
    
    return rfelist

def split_scalar(indep_X, dep_Y):
    X_train, X_test, y_train, y_test = train_test_split(indep_X, dep_Y, test_size=0.25, random_state=0)
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    return X_train, X_test, y_train, y_test

def cm_prediction(classifier, X_test, y_test):
    y_pred = classifier.predict(X_test)
    from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
    
    cm = confusion_matrix(y_test, y_pred)
    Accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    
    return classifier, Accuracy, report, X_test, y_test, cm

def logistic(X_train, y_train, X_test, y_test):
    classifier = LogisticRegression(random_state=0, max_iter=1000)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)

def svm_linear(X_train, y_train, X_test, y_test):
    classifier = SVC(kernel='linear', random_state=0)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)

def svm_nl(X_train, y_train, X_test, y_test):
    classifier = SVC(kernel='rbf', random_state=0)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)

def naive_bayes(X_train, y_train, X_test, y_test):
    classifier = GaussianNB()
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)

def knn(X_train, y_train, X_test, y_test):
    classifier = KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=2)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)

def decision_tree(X_train, y_train, X_test, y_test):
    classifier = DecisionTreeClassifier(criterion='entropy', random_state=0)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)

def random_forest(X_train, y_train, X_test, y_test):
    classifier = RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)

def rfe_classification(acclog, accsvml, accsvmnl, accknn, accnav, accdes, accrf): 
    rfedataframe = pd.DataFrame(index=['Logistic', 'SVC', 'Random', 'DecisionTree'], columns=['Logistic', 'SVMl', 'SVMnl', 'KNN', 'Naive', 'Decision', 'Random'])
    
    for number, idex in enumerate(rfedataframe.index):
        rfedataframe['Logistic'][idex] = acclog[number]       
        rfedataframe['SVMl'][idex] = accsvml[number]
        rfedataframe['SVMnl'][idex] = accsvmnl[number]
        rfedataframe['KNN'][idex] = accknn[number]
        rfedataframe['Naive'][idex] = accnav[number]
        rfedataframe['Decision'][idex] = accdes[number]
        rfedataframe['Random'][idex] = accrf[number]
    
    return rfedataframe

dataset1 = pd.read_csv("prep.csv", index_col=None)
df2 = dataset1
df2 = pd.get_dummies(df2, drop_first=True)

indep_X = df2.drop('classification_yes', axis=1)
dep_Y = df2['classification_yes']

rfelist = rfeFeature(indep_X, dep_Y, 3)

  from pandas.core import (


In [2]:
rfelist

[array([[1., 0., 0.],
        [1., 0., 0.],
        [0., 0., 0.],
        ...,
        [1., 0., 1.],
        [0., 0., 1.],
        [0., 0., 0.]]),
 array([[0., 0., 1.],
        [0., 0., 1.],
        [0., 0., 1.],
        ...,
        [0., 1., 0.],
        [0., 1., 1.],
        [0., 0., 1.]]),
 array([[ 3.07735602, 12.51815562, 38.86890244],
        [ 0.7       , 10.7       , 34.        ],
        [ 0.6       , 12.        , 34.        ],
        ...,
        [ 6.        ,  9.1       , 26.        ],
        [ 6.8       ,  8.5       , 38.86890244],
        [ 1.        , 16.3       , 53.        ]]),
 array([[12.51815562,  1.        ,  0.        ],
        [10.7       ,  1.        ,  0.        ],
        [12.        ,  0.        ,  0.        ],
        ...,
        [ 9.1       ,  1.        ,  1.        ],
        [ 8.5       ,  0.        ,  1.        ],
        [16.3       ,  0.        ,  0.        ]])]