In [1]:
# note during the execution of this file, change the k number and excute...

import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
import time

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

#from sklearn.feature_selection import SelectKBest
#from sklearn.feature_selection import chi2

from sklearn.feature_selection import RFE


In [2]:

def rfeFeature(indep,dep,n):
    rfe_list=[]
    
    logistic_model= LogisticRegression(solver="lbfgs")
    RF_model= RandomForestClassifier(n_estimators=10, criterion="entropy", random_state= 0)
    DT_model= DecisionTreeClassifier(criterion='gini',splitter='best',max_features='sqrt',random_state= 0)
    svc_model= SVC(kernel='linear', random_state=0)
    
    rfe_modellist=[logistic_model, RF_model,DT_model,svc_model]
    
    for i in rfe_modellist:
        
        print(i)
        log_rfe= RFE(estimator=i,n_features_to_select=n)
        log_fit= log_rfe.fit(indep,dep)
        log_rfe_feature =log_fit.transform(indep)
        
        rfe_list.append(log_rfe_feature)
    return rfe_list

def Split_Scaler(indep,dep):
    x_train,x_test,y_train,y_test=train_test_split(indep,dep,test_size=0.30,random_state=0)
    scX=StandardScaler()
    x_train=scX.fit_transform(x_train)
    x_test=scX.transform(x_test)
    return x_train,x_test,y_train,y_test

def cm_predition(classifier, x_test):
    y_pred=classifier.predict(x_test)

    #making the Confusion Matrix
    from sklearn.metrics import confusion_matrix
    cm=confusion_matrix(y_test,y_pred)

    # making Classification Report
    from sklearn.metrics import classification_report
    clf_report=classification_report(y_test,y_pred)

    # finding the accuracy score
    from sklearn.metrics import accuracy_score
    accuracy_score=accuracy_score(y_test,y_pred)
    
    return classifier,cm,clf_report,accuracy_score,x_test,y_test,y_pred

def logistic(x_train,y_train,x_test):
    from sklearn.linear_model import LogisticRegression
    classifier=LogisticRegression(random_state=0)
    classifier.fit(x_train,y_train)
    classifier,cm,clf_report,accuracy_score,x_test,y_test,y_pred = cm_predition(classifier,x_test)
    return classifier,cm,clf_report,accuracy_score,x_test,y_test,y_pred 
    
def svm_linear(x_train,y_train,x_test):
    from sklearn.svm import SVC
    classifier=SVC(kernel='linear',random_state=0)
    classifier.fit(x_train,y_train)
    classifier,cm,clf_report,accuracy_score,x_test,y_test,y_pred = cm_predition(classifier,x_test)
    return classifier,cm,clf_report,accuracy_score,x_test,y_test,y_pred 
    
def svm_nonlinear(x_train,y_train,x_test):
    from sklearn.svm import SVC
    classifier=SVC(kernel='rbf', random_state=0)
    classifier.fit(x_train,y_train)
    classifier,cm,clf_report,accuracy_score,x_test,y_test,y_pred = cm_predition(classifier,x_test)
    return classifier,cm,clf_report,accuracy_score,x_test,y_test,y_pred 
    
def random(x_train,y_train,x_test):
    from sklearn.ensemble import RandomForestClassifier
    classifier=RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
    classifier.fit(x_train,y_train)
    classifier,cm,clf_report,accuracy_score,x_test,y_test,y_pred = cm_predition(classifier,x_test)
    return classifier,cm,clf_report,accuracy_score,x_test,y_test ,y_pred 

def Decision(x_train,y_train,x_test):
    from sklearn.tree import DecisionTreeClassifier
    classifier=DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
    classifier.fit(x_train,y_train)
    classifier,cm,clf_report,accuracy_score,x_test,y_test,y_pred = cm_predition(classifier,x_test)
    return classifier,cm,clf_report,accuracy_score,x_test,y_test,y_pred 

def knn(x_train,y_train,x_test):
    from sklearn.neighbors import KNeighborsClassifier
    classifier=KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
    classifier.fit(x_train,y_train)
    classifier,cm,clf_report,accuracy_score,x_test,y_test,y_pred = cm_predition(classifier,x_test)
    return classifier,cm,clf_report,accuracy_score,x_test,y_test,y_pred 

def navie(x_train,y_train,x_test):
    from sklearn.naive_bayes import GaussianNB
    classifier=GaussianNB()
    classifier.fit(x_train,y_train)
    classifier,cm,clf_report,accuracy_score,x_test,y_test,y_pred = cm_predition(classifier,x_test)
    return classifier,cm,clf_report,accuracy_score,x_test,y_test,y_pred 

# Displaying the values in the tabular frame

def rfe_classification(acc_logistic, acc_svmL, acc_svmNL, acc_knn, acc_rf, acc_des, acc_nav):
    
    df=pd.DataFrame(index=["Logistic","Random","Decision","SVC"],columns=["Logistic","SVM_linear","SVM_Nonlinear","KNN","Random","Decision","Navie"])
    
    for number,idex in enumerate(df.index):
        
         df['Logistic'][idex]= acc_logistic[number]
         df['SVM_linear'][idex]= acc_svmL[number]
         df['SVM_Nonlinear'][idex]= acc_svmNL[number]
         df['KNN'][idex]= acc_knn[number]
         df['Random'][idex]= acc_rf[number]
         df['Decision'][idex]= acc_des[number]
         df['Navie'][idex]= acc_nav[number]
         
    return df




In [18]:
dataset=pd.read_csv("prep.csv",index_col=None)
df1=dataset  

#Convert categorical variable into dummy/indicator variables.
df1=pd.get_dummies(df1,drop_first=True,dtype=int)
df1

# separation of independent and dependent variables
indep=df1.drop("classification_yes",axis=1)
dep=df1["classification_yes"]
  
    
rfe_list=rfeFeature(indep, dep, 1)

acc_logistic=[]
acc_svmL=[]
acc_svmNL=[]
acc_knn=[]
acc_nav=[]
acc_des=[]
acc_rf=[]



LogisticRegression()
RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=0)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

DecisionTreeClassifier(max_features='sqrt', random_state=0)
SVC(kernel='linear', random_state=0)


In [19]:
for i in rfe_list:
    
    x_train,x_test,y_train,y_test = Split_Scaler(i, dep) 

    classifier,cm,clf_report,accuracy_score,x_test,y_test,y_pred = logistic(x_train,y_train,x_test)
    acc_logistic.append(accuracy_score)

    classifier,cm,clf_report,accuracy_score,x_test,y_test,y_pred = svm_linear(x_train,y_train,x_test)
    acc_svmL.append(accuracy_score)

    classifier,cm,clf_report,accuracy_score,x_test,y_test,y_pred = svm_nonlinear(x_train,y_train,x_test)
    acc_svmNL.append(accuracy_score)

    classifier,cm,clf_report,accuracy_score,x_test,y_test,y_pred = knn(x_train,y_train,x_test)
    acc_knn.append(accuracy_score)

    classifier,cm,clf_report,accuracy_score,x_test,y_test,y_pred= navie(x_train,y_train,x_test)
    acc_nav.append(accuracy_score)

    classifier,cm,clf_report,accuracy_score,x_test,y_test,y_pred = Decision(x_train,y_train,x_test)
    acc_des.append(accuracy_score)

    classifier,cm,clf_report,accuracy_score,x_test,y_test,y_pred = random(x_train,y_train,x_test)
    acc_rf.append(accuracy_score)

result= rfe_classification(acc_logistic,acc_svmL,acc_svmNL,acc_knn,acc_rf,acc_des,acc_nav)
result

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

Unnamed: 0,Logistic,SVM_linear,SVM_Nonlinear,KNN,Random,Decision,Navie
Logistic,0.775,0.775,0.775,0.625,0.775,0.775,0.775
Random,0.9,0.908333,0.908333,0.883333,0.9,0.908333,0.9
Decision,0.9,0.908333,0.908333,0.883333,0.9,0.908333,0.9
SVC,0.625,0.625,0.625,0.625,0.625,0.625,0.591667


In [11]:
result #5

Unnamed: 0,Logistic,SVM_linear,SVM_Nonlinear,KNN,Random,Decision,Navie
Logistic,0.975,0.975,0.975,0.975,0.975,0.975,0.975
Random,0.966667,0.966667,0.983333,0.975,0.975,0.95,0.916667
Decision,0.95,0.983333,0.933333,0.95,0.966667,0.975,0.858333
SVC,0.983333,0.983333,0.983333,0.983333,0.983333,0.983333,0.983333


In [14]:
result #4

Unnamed: 0,Logistic,SVM_linear,SVM_Nonlinear,KNN,Random,Decision,Navie
Logistic,0.95,0.95,0.95,0.95,0.95,0.95,0.95
Random,0.975,0.975,0.975,0.983333,0.966667,0.958333,0.875
Decision,0.975,0.916667,0.916667,0.975,0.975,0.975,0.816667
SVC,0.958333,0.958333,0.958333,0.958333,0.958333,0.958333,0.958333


In [8]:
result #3

Unnamed: 0,Logistic,SVM_linear,SVM_Nonlinear,KNN,Random,Decision,Navie
Logistic,0.941667,0.941667,0.941667,0.941667,0.941667,0.941667,0.941667
Random,0.941667,0.941667,0.941667,0.941667,0.941667,0.941667,0.941667
Decision,0.941667,0.941667,0.941667,0.941667,0.933333,0.908333,0.9
SVC,0.975,0.975,0.975,0.975,0.966667,0.966667,0.8


In [17]:
result #2

Unnamed: 0,Logistic,SVM_linear,SVM_Nonlinear,KNN,Random,Decision,Navie
Logistic,0.85,0.85,0.85,0.625,0.85,0.85,0.85
Random,0.925,0.925,0.933333,0.916667,0.933333,0.916667,0.858333
Decision,0.933333,0.933333,0.933333,0.941667,0.933333,0.933333,0.716667
SVC,0.741667,0.741667,0.741667,0.741667,0.741667,0.741667,0.741667


In [20]:
result #1

Unnamed: 0,Logistic,SVM_linear,SVM_Nonlinear,KNN,Random,Decision,Navie
Logistic,0.775,0.775,0.775,0.625,0.775,0.775,0.775
Random,0.9,0.908333,0.908333,0.883333,0.9,0.908333,0.9
Decision,0.9,0.908333,0.908333,0.883333,0.9,0.908333,0.9
SVC,0.625,0.625,0.625,0.625,0.625,0.625,0.591667
