# Part2. Model design 3

OVERSAMPLING TECHNIQUE - SMOTE to overcome the data imbalance

In [1]:
from imblearn.over_sampling import SMOTE

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import tempfile

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.metrics import SpecificityAtSensitivity
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay

from sklearn import metrics
from collections import Counter
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor 
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, BaggingClassifier,AdaBoostClassifier,GradientBoostingClassifier
from sklearn.linear_model import LinearRegression,LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.feature_selection import RFE

In [2]:
#load the original data
df1=pd.read_csv("../Data/kag_risk_factors_cervical_cancer.csv")

#load the processed datasets:
X_train=pd.read_csv("../Data/X_train_preprocessed.csv")
X_test=pd.read_csv("../Data/X_test_preprocessed.csv")
X_validate=pd.read_csv("../Data/X_validate_preprocessed.csv")
y_train=pd.read_csv("../Data/y_train_preprocessed.csv")
y_test=pd.read_csv("../Data/y_test_preprocessed.csv")
y_validate=pd.read_csv("../Data/y_validate_preprocessed.csv")

In [3]:
#convert df to np:
y_train_np=y_train.to_numpy()
X_train_np=X_train.to_numpy()

In [4]:
#using SMOTE create a balanced train dataset:
x_train_s, y_train_s = SMOTE(random_state=33).fit_resample(X_train_np, y_train_np.ravel())
print(sorted(Counter(y_train_s).items()))



[(0, 615), (1, 615)]


In [5]:
l_final = [] #--> New list for storing metrics of base models

def models_dt(x,y,x_test,y_test):
    mod = {}
    model = DecisionTreeClassifier().fit(x,y)
    ypred = model.predict(x_test)
    mod['Model'] = 'Decision Tree After Sampling'
    mod['Train_Score'] = model.score(x_train_s,y_train_s)
    mod['Test_accuracy'] = metrics.accuracy_score(y_test,ypred)
    mod['f1score'] = metrics.f1_score(y_test,ypred)
    mod['recall'] = metrics.recall_score(y_test, ypred)
    mod['precision'] = metrics.precision_score(y_test, ypred)
    model.predict_proba(x_test)
    mod['roc_auc'] = metrics.roc_auc_score(y_test,ypred)
    return mod
l_final.append(models_dt(x_train_s,y_train_s,X_test,y_test))

def models_rf(x,y, x_test, y_test):
    mod = {}
    model = RandomForestClassifier().fit(x,y)
    ypred = model.predict(x_test)
    mod['Model'] = 'Random Forest After Sampling'
    mod['Train_Score'] = model.score(x_train_s,y_train_s)
    mod['Test_accuracy'] = metrics.accuracy_score(y_test,ypred)
    mod['f1score'] = metrics.f1_score(y_test,ypred)
    mod['recall'] = metrics.recall_score(y_test, ypred)
    mod['precision'] = metrics.precision_score(y_test, ypred)
    model.predict_proba(x_test)
    mod['roc_auc'] = metrics.roc_auc_score(y_test,ypred)
    return mod
l_final.append(models_rf(x_train_s,y_train_s, X_test, y_test))


In [6]:
final_model = pd.DataFrame(l_final)
final_model

Unnamed: 0,Model,Train_Score,Test_accuracy,f1score,recall,precision,roc_auc
0,Decision Tree After Sampling,1.0,0.948276,0.625,0.555556,0.714286,0.768432
1,Random Forest After Sampling,1.0,0.948276,0.666667,0.666667,0.666667,0.819315


In [7]:
#recall is 0.66 with the second model

In [8]:
for i in range(1,df1.shape[1]):
   
    model = RFE(DecisionTreeClassifier(),i).fit(X_train,y_train)
    print(X_train.columns[model.support_])
    x_train_1,x_test_1,y_train_1,y_test_1 = train_test_split(model.transform(X_train),y_train,test_size=0.3,random_state=0)
    dt = DecisionTreeClassifier()
    dt.fit(x_train_1,y_train_1)
    y_pred_1 = dt.predict(x_test_1)
    dt_recall = metrics.roc_auc_score(y_test_1,y_pred_1)
    print(dt_recall)
    print('*'*3)



Index(['Schiller'], dtype='object')
0.925091575091575
***
Index(['Age', 'Schiller'], dtype='object')
0.7639194139194139
***
Index(['Age', 'First sexual intercourse', 'Schiller'], dtype='object')
0.863919413919414
***
Index(['Age', 'First sexual intercourse', 'Hormonal Contraceptives (years)',
       'Schiller'],
      dtype='object')
0.8611721611721612
***
Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Hormonal Contraceptives (years)', 'Schiller'],
      dtype='object')
0.8611721611721612
***
Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Hormonal Contraceptives (years)', 'Schiller'],
      dtype='object')
0.8611721611721612
***
Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Hormonal Contraceptives (years)', 'IUD (years)',
       'Schiller'],
      dtype='object')
0.6945054945054945
***
Index(['Age', 'Number of sexual partners', 'First sexual inter



Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Hormonal Contraceptives (years)', 'IUD (years)',
       'STDs:genital herpes', 'STDs:HIV', 'STDs: Number of diagnosis',
       'Schiller'],
      dtype='object')
0.863919413919414
***
Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Hormonal Contraceptives (years)', 'IUD (years)',
       'STDs:genital herpes', 'STDs:HIV', 'STDs: Number of diagnosis',
       'Dx:CIN', 'Schiller'],
      dtype='object')
0.7305860805860807
***
Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Hormonal Contraceptives (years)', 'IUD (years)',
       'STDs:genital herpes', 'STDs:HIV', 'STDs:HPV',
       'STDs: Number of diagnosis', 'Dx:CIN', 'Schiller'],
      dtype='object')
0.6972527472527472
***
Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Hormonal C



In [9]:
# List of features with high recall score with decission tree classifier
rfe_dt = ['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Smokes', 'Smokes (years)', 'Smokes (packs/year)',
       'Hormonal Contraceptives', 'Hormonal Contraceptives (years)','IUD (years)',
        'STDs', 'STDs (number)', 'STDs:genital herpes',
       'STDs:molluscum contagiosum', 'STDs:HIV', 'STDs:HPV',
        'STDs: Number of diagnosis', 'Dx:CIN', 'Schiller', 'Citology']  


len(rfe_dt)



20

In [10]:
for i in range(1,df1.shape[1]):
   
    model = RFE(RandomForestClassifier(),i).fit(X_train,y_train)
    print(X_train.columns[model.support_])
    x_train_1,x_test_1,y_train_1,y_test_1 = train_test_split(model.transform(X_train),y_train,test_size=0.3,random_state=0)
    rf = RandomForestClassifier()
    rf.fit(x_train_1,y_train_1)
    y_pred_1 = rf.predict(x_test_1)
    rf_recall = metrics.roc_auc_score(y_test_1,y_pred_1)
    print(rf_recall)
    print('*'*100)



  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:,

  self.estimator_.fit(X[:, features], y)
  rf.fit(x_train_1,y_train_1)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)


Index(['Schiller'], dtype='object')
0.925091575091575
****************************************************************************************************


  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  self.estimator_.fit(X[:, features], y)
  rf.fit(x_tra

Index(['Age', 'Schiller'], dtype='object')
0.9305860805860806
****************************************************************************************************


  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  self.estimator_.fit(X[:, features], y)
  rf.fit(x_tra

Index(['Age', 'Hormonal Contraceptives (years)', 'Schiller'], dtype='object')
0.6611721611721612
****************************************************************************************************


  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  self.estimator_.fit(X[:, features], y)


Index(['Age', 'First sexual intercourse', 'Hinselmann', 'Schiller'], dtype='object')
0.8278388278388278
****************************************************************************************************


  rf.fit(x_train_1,y_train_1)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  self.estimator_.fit(X[:, features], y)
  rf.fit(x_train_1,

Index(['Age', 'First sexual intercourse', 'Hormonal Contraceptives (years)',
       'Hinselmann', 'Schiller'],
      dtype='object')
0.8972527472527473
****************************************************************************************************


  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  self.estimator_.fit(X[:, features], y)
  rf.fit(x_train_1,y_train_1)
  estimator.fit(X[:, features], y)


Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Hormonal Contraceptives (years)', 'Hinselmann', 'Schiller'],
      dtype='object')
0.8917582417582417
****************************************************************************************************


  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  self.estimator_.fit(X[:, features], y)


Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Hormonal Contraceptives (years)', 'Hinselmann',
       'Schiller'],
      dtype='object')
0.8945054945054945
****************************************************************************************************


  rf.fit(x_train_1,y_train_1)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  self.estimator_.fit(X[:, features], y)
  rf.fit(x_train_1,y_train_1)
  estimator.fit(X[:, features], y)


Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Hormonal Contraceptives (years)', 'Hinselmann',
       'Schiller', 'Citology'],
      dtype='object')
0.8250915750915749
****************************************************************************************************


  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  self.estimator_.fit(X[:, features], y)
  rf.fit(x_train_1,y_train_1)
  estimator.fit(X[:, features], y)


Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Smokes (years)',
       'Hormonal Contraceptives (years)', 'Hinselmann', 'Schiller',
       'Citology'],
      dtype='object')
0.925091575091575
****************************************************************************************************


  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  self.estimator_.fit(X[:, features], y)
  rf.fit(x_train_1,y_train_1)
  estimator.fit(X[:, features], y)


Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Smokes (years)',
       'Hormonal Contraceptives (years)', 'STDs (number)', 'Hinselmann',
       'Schiller', 'Citology'],
      dtype='object')
0.8972527472527473
****************************************************************************************************


  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  self.estimator_.fit(X[:, features], y)
  rf.fit(x_train_1,y_train_1)


Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Smokes (years)',
       'Hormonal Contraceptives (years)', 'IUD (years)', 'STDs (number)',
       'Hinselmann', 'Schiller', 'Citology'],
      dtype='object')
0.8945054945054945
****************************************************************************************************


  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  self.estimator_.fit(X[:, features], y)
  rf.fit(x_train_1,y_train_1)


Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Smokes (packs/year)',
       'Hormonal Contraceptives (years)', 'IUD (years)', 'STDs (number)', 'Dx',
       'Hinselmann', 'Schiller', 'Citology'],
      dtype='object')
0.7611721611721611
****************************************************************************************************


  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  self.estimator_.fit(X[:, features], y)
  rf.fit(x_train_1,y_train_1)
  estimator.fit(X[:, features], y)


Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Smokes (years)',
       'Hormonal Contraceptives (years)', 'IUD (years)', 'STDs:genital herpes',
       'STDs: Number of diagnosis', 'Dx:Cancer', 'Hinselmann', 'Schiller',
       'Citology'],
      dtype='object')
0.8611721611721612
****************************************************************************************************


  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  self.estimator_.fit(X[:, features], y)
  rf.fit(x_train_1,y_train_1)


Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Smokes (years)', 'Smokes (packs/year)',
       'Hormonal Contraceptives (years)', 'IUD (years)', 'STDs:genital herpes',
       'STDs: Number of diagnosis', 'Dx', 'Hinselmann', 'Schiller',
       'Citology'],
      dtype='object')
0.8278388278388278
****************************************************************************************************


  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  self.estimator_.fit(X[:, features], y)
  rf.fit(x_train_1,y_train_1)


Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Smokes (years)', 'Smokes (packs/year)',
       'Hormonal Contraceptives (years)', 'IUD (years)', 'STDs (number)',
       'STDs:genital herpes', 'Dx:HPV', 'Dx', 'Hinselmann', 'Schiller',
       'Citology'],
      dtype='object')
0.8945054945054945
****************************************************************************************************


  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  self.estimator_.fit(X[:, features], y)
  rf.fit(x_train_1,y_train_1)
  estimator.fit(X[:, features], y)


Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Smokes (years)', 'Smokes (packs/year)',
       'Hormonal Contraceptives (years)', 'IUD (years)', 'STDs (number)',
       'STDs:genital herpes', 'STDs: Number of diagnosis', 'Dx:Cancer',
       'Dx:CIN', 'Hinselmann', 'Schiller', 'Citology'],
      dtype='object')
0.8611721611721612
****************************************************************************************************


  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  self.estimator_.fit(X[:, features], y)
  rf.fit(x_train_1,y_train_1)


Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Smokes (years)', 'Smokes (packs/year)',
       'Hormonal Contraceptives', 'Hormonal Contraceptives (years)',
       'IUD (years)', 'STDs (number)', 'STDs:genital herpes',
       'STDs: Number of diagnosis', 'Dx:Cancer', 'Dx', 'Hinselmann',
       'Schiller', 'Citology'],
      dtype='object')
0.7945054945054946
****************************************************************************************************


  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  self.estimator_.fit(X[:, features], y)
  rf.fit(x_train_1,y_train_1)


Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Smokes (years)', 'Smokes (packs/year)',
       'Hormonal Contraceptives', 'Hormonal Contraceptives (years)',
       'IUD (years)', 'STDs (number)', 'STDs:genital herpes',
       'STDs: Number of diagnosis', 'Dx:Cancer', 'Dx:HPV', 'Dx', 'Hinselmann',
       'Schiller', 'Citology'],
      dtype='object')
0.8278388278388278
****************************************************************************************************


  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  self.estimator_.fit(X[:, features], y)
  rf.fit(x_train_1,y_train_1)
  estimator.fit(X[:, features], y)


Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Smokes (years)', 'Smokes (packs/year)',
       'Hormonal Contraceptives', 'Hormonal Contraceptives (years)', 'IUD',
       'IUD (years)', 'STDs (number)', 'STDs:genital herpes',
       'STDs: Number of diagnosis', 'Dx:Cancer', 'Dx:HPV', 'Dx', 'Hinselmann',
       'Schiller', 'Citology'],
      dtype='object')
0.7945054945054946
****************************************************************************************************


  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  self.estimator_.fit(X[:, features], y)
  rf.fit(x_train_1,y_train_1)


Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Smokes (years)', 'Smokes (packs/year)',
       'Hormonal Contraceptives', 'Hormonal Contraceptives (years)', 'IUD',
       'IUD (years)', 'STDs (number)', 'STDs:genital herpes',
       'STDs: Number of diagnosis', 'Dx:Cancer', 'Dx:CIN', 'Dx:HPV', 'Dx',
       'Hinselmann', 'Schiller', 'Citology'],
      dtype='object')
0.8611721611721612
****************************************************************************************************


  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  self.estimator_.fit(X[:, features], y)
  rf.fit(x_train_1,y_train_1)


Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Smokes', 'Smokes (years)', 'Smokes (packs/year)',
       'Hormonal Contraceptives', 'Hormonal Contraceptives (years)', 'IUD',
       'IUD (years)', 'STDs (number)', 'STDs:genital herpes',
       'STDs: Number of diagnosis', 'Dx:Cancer', 'Dx:CIN', 'Dx:HPV', 'Dx',
       'Hinselmann', 'Schiller', 'Citology'],
      dtype='object')
0.8945054945054945
****************************************************************************************************


  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  self.estimator_.fit(X[:, features], y)
  rf.fit(x_train_1,y_train_1)
  estimator.fit(X[:, features], y)


Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Smokes (years)', 'Smokes (packs/year)',
       'Hormonal Contraceptives', 'Hormonal Contraceptives (years)', 'IUD',
       'IUD (years)', 'STDs', 'STDs (number)', 'STDs:genital herpes',
       'STDs:HIV', 'STDs: Number of diagnosis', 'Dx:Cancer', 'Dx:CIN',
       'Dx:HPV', 'Dx', 'Hinselmann', 'Schiller', 'Citology'],
      dtype='object')
0.7305860805860807
****************************************************************************************************


  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  self.estimator_.fit(X[:, features], y)
  rf.fit(x_train_1,y_train_1)


Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Smokes', 'Smokes (years)', 'Smokes (packs/year)',
       'Hormonal Contraceptives', 'Hormonal Contraceptives (years)', 'IUD',
       'IUD (years)', 'STDs', 'STDs (number)', 'STDs:syphilis',
       'STDs:genital herpes', 'STDs: Number of diagnosis', 'Dx:Cancer',
       'Dx:CIN', 'Dx:HPV', 'Dx', 'Hinselmann', 'Schiller', 'Citology'],
      dtype='object')
0.7278388278388279
****************************************************************************************************


  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  self.estimator_.fit(X[:, features], y)
  rf.fit(x_train_1,y_train_1)


Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Smokes', 'Smokes (years)', 'Smokes (packs/year)',
       'Hormonal Contraceptives', 'Hormonal Contraceptives (years)', 'IUD',
       'IUD (years)', 'STDs', 'STDs (number)', 'STDs:condylomatosis',
       'STDs:syphilis', 'STDs:genital herpes', 'STDs: Number of diagnosis',
       'Dx:Cancer', 'Dx:CIN', 'Dx:HPV', 'Dx', 'Hinselmann', 'Schiller',
       'Citology'],
      dtype='object')
0.7278388278388279
****************************************************************************************************


  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  self.estimator_.fit(X[:, features], y)
  rf.fit(x_train_1,y_train_1)
  estimator.fit(X[:, features], y)


Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Smokes', 'Smokes (years)', 'Smokes (packs/year)',
       'Hormonal Contraceptives', 'Hormonal Contraceptives (years)', 'IUD',
       'IUD (years)', 'STDs', 'STDs (number)',
       'STDs:vulvo-perineal condylomatosis', 'STDs:syphilis',
       'STDs:genital herpes', 'STDs:HIV', 'STDs: Number of diagnosis',
       'Dx:Cancer', 'Dx:CIN', 'Dx:HPV', 'Dx', 'Hinselmann', 'Schiller',
       'Citology'],
      dtype='object')
0.7305860805860807
****************************************************************************************************


  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  self.estimator_.fit(X[:, features], y)
  rf.fit(x_train_1,y_train_1)


Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Smokes', 'Smokes (years)', 'Smokes (packs/year)',
       'Hormonal Contraceptives', 'Hormonal Contraceptives (years)', 'IUD',
       'IUD (years)', 'STDs', 'STDs (number)', 'STDs:condylomatosis',
       'STDs:vulvo-perineal condylomatosis', 'STDs:syphilis',
       'STDs:genital herpes', 'STDs:HIV', 'STDs: Number of diagnosis',
       'Dx:Cancer', 'Dx:CIN', 'Dx:HPV', 'Dx', 'Hinselmann', 'Schiller',
       'Citology'],
      dtype='object')
0.7972527472527473
****************************************************************************************************


  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  self.estimator_.fit(X[:, features], y)
  rf.fit(x_train_1,y_train_1)


Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Smokes', 'Smokes (years)', 'Smokes (packs/year)',
       'Hormonal Contraceptives', 'Hormonal Contraceptives (years)', 'IUD',
       'IUD (years)', 'STDs', 'STDs (number)', 'STDs:condylomatosis',
       'STDs:vulvo-perineal condylomatosis', 'STDs:syphilis',
       'STDs:genital herpes', 'STDs:HIV', 'STDs:HPV',
       'STDs: Number of diagnosis', 'Dx:Cancer', 'Dx:CIN', 'Dx:HPV', 'Dx',
       'Hinselmann', 'Schiller', 'Citology'],
      dtype='object')
0.7305860805860807
****************************************************************************************************


  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  estimator.fit(X[:, features], y)
  self.estimator_.fit(X[:, features], y)
  rf.fit(x_train_1,y_train_1)
  estimator.fit(X[:, features], y)


Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Smokes', 'Smokes (years)', 'Smokes (packs/year)',
       'Hormonal Contraceptives', 'Hormonal Contraceptives (years)', 'IUD',
       'IUD (years)', 'STDs', 'STDs (number)', 'STDs:condylomatosis',
       'STDs:vulvo-perineal condylomatosis', 'STDs:syphilis',
       'STDs:pelvic inflammatory disease', 'STDs:genital herpes', 'STDs:HIV',
       'STDs:Hepatitis B', 'STDs: Number of diagnosis', 'Dx:Cancer', 'Dx:CIN',
       'Dx:HPV', 'Dx', 'Hinselmann', 'Schiller', 'Citology'],
      dtype='object')
0.7611721611721611
****************************************************************************************************


  estimator.fit(X[:, features], y)
  self.estimator_.fit(X[:, features], y)
  rf.fit(x_train_1,y_train_1)


Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Smokes', 'Smokes (years)', 'Smokes (packs/year)',
       'Hormonal Contraceptives', 'Hormonal Contraceptives (years)', 'IUD',
       'IUD (years)', 'STDs', 'STDs (number)', 'STDs:condylomatosis',
       'STDs:vulvo-perineal condylomatosis', 'STDs:syphilis',
       'STDs:pelvic inflammatory disease', 'STDs:genital herpes', 'STDs:HIV',
       'STDs:Hepatitis B', 'STDs:HPV', 'STDs: Number of diagnosis',
       'Dx:Cancer', 'Dx:CIN', 'Dx:HPV', 'Dx', 'Hinselmann', 'Schiller',
       'Citology'],
      dtype='object')
0.7305860805860807
****************************************************************************************************


  estimator.fit(X[:, features], y)
  self.estimator_.fit(X[:, features], y)
  rf.fit(x_train_1,y_train_1)


Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Smokes', 'Smokes (years)', 'Smokes (packs/year)',
       'Hormonal Contraceptives', 'Hormonal Contraceptives (years)', 'IUD',
       'IUD (years)', 'STDs', 'STDs (number)', 'STDs:condylomatosis',
       'STDs:vaginal condylomatosis', 'STDs:vulvo-perineal condylomatosis',
       'STDs:syphilis', 'STDs:pelvic inflammatory disease',
       'STDs:genital herpes', 'STDs:HIV', 'STDs:Hepatitis B', 'STDs:HPV',
       'STDs: Number of diagnosis', 'Dx:Cancer', 'Dx:CIN', 'Dx:HPV', 'Dx',
       'Hinselmann', 'Schiller', 'Citology'],
      dtype='object')
0.7305860805860807
****************************************************************************************************
Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Smokes', 'Smokes (years)', 'Smokes (packs/year)',
       'Hormonal Contraceptives', 'Hormonal Contraceptives (years)', 'IUD',
   

  self.estimator_.fit(X[:, features], y)
  rf.fit(x_train_1,y_train_1)
  self.estimator_.fit(X[:, features], y)


0.7305860805860807
****************************************************************************************************
Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Smokes', 'Smokes (years)', 'Smokes (packs/year)',
       'Hormonal Contraceptives', 'Hormonal Contraceptives (years)', 'IUD',
       'IUD (years)', 'STDs', 'STDs (number)', 'STDs:condylomatosis',
       'STDs:vaginal condylomatosis', 'STDs:vulvo-perineal condylomatosis',
       'STDs:syphilis', 'STDs:pelvic inflammatory disease',
       'STDs:genital herpes', 'STDs:molluscum contagiosum', 'STDs:HIV',
       'STDs:Hepatitis B', 'STDs:HPV', 'STDs: Number of diagnosis',
       'Dx:Cancer', 'Dx:CIN', 'Dx:HPV', 'Dx', 'Hinselmann', 'Schiller',
       'Citology'],
      dtype='object')
0.6972527472527472
****************************************************************************************************


  rf.fit(x_train_1,y_train_1)
  self.estimator_.fit(X[:, features], y)
  rf.fit(x_train_1,y_train_1)


Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Smokes', 'Smokes (years)', 'Smokes (packs/year)',
       'Hormonal Contraceptives', 'Hormonal Contraceptives (years)', 'IUD',
       'IUD (years)', 'STDs', 'STDs (number)', 'STDs:condylomatosis',
       'STDs:vaginal condylomatosis', 'STDs:vulvo-perineal condylomatosis',
       'STDs:syphilis', 'STDs:pelvic inflammatory disease',
       'STDs:genital herpes', 'STDs:molluscum contagiosum', 'STDs:HIV',
       'STDs:Hepatitis B', 'STDs:HPV', 'STDs: Number of diagnosis',
       'Dx:Cancer', 'Dx:CIN', 'Dx:HPV', 'Dx', 'Hinselmann', 'Schiller',
       'Citology'],
      dtype='object')
0.7305860805860807
****************************************************************************************************
Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Smokes', 'Smokes (years)', 'Smokes (packs/year)',
       'Hormonal Contraceptives', 'Hormona

  self.estimator_.fit(X[:, features], y)
  rf.fit(x_train_1,y_train_1)
  self.estimator_.fit(X[:, features], y)


0.7972527472527473
****************************************************************************************************
Index(['Age', 'Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Smokes', 'Smokes (years)', 'Smokes (packs/year)',
       'Hormonal Contraceptives', 'Hormonal Contraceptives (years)', 'IUD',
       'IUD (years)', 'STDs', 'STDs (number)', 'STDs:condylomatosis',
       'STDs:vaginal condylomatosis', 'STDs:vulvo-perineal condylomatosis',
       'STDs:syphilis', 'STDs:pelvic inflammatory disease',
       'STDs:genital herpes', 'STDs:molluscum contagiosum', 'STDs:HIV',
       'STDs:Hepatitis B', 'STDs:HPV', 'STDs: Number of diagnosis',
       'Dx:Cancer', 'Dx:CIN', 'Dx:HPV', 'Dx', 'Hinselmann', 'Schiller',
       'Citology'],
      dtype='object')
0.7639194139194139
****************************************************************************************************


  rf.fit(x_train_1,y_train_1)


In [11]:
#list of features with high recall score with random forest classifier
rfe_rf= ['Number of sexual partners', 'First sexual intercourse',
       'Num of pregnancies', 'Smokes (years)', 'Smokes (packs/year)',
       'Hormonal Contraceptives', 'Hormonal Contraceptives (years)', 'IUD',
       'IUD (years)', 'STDs (number)', 'STDs:condylomatosis',
       'STDs:genital herpes', 'Dx:CIN', 'Dx:HPV', 'Dx', 'Hinselmann', 'Schiller',
       'Citology']  # Set of features with high recall score

len(rfe_rf)

18

In [17]:
x_train_s = pd.DataFrame(x_train_s, columns = X_train.columns)
y_train_s = pd.DataFrame(y_train_s, columns = ['Biopsy'])
x_test_1 = pd.DataFrame(x_test_1, columns=X_train.columns)

rfe_dt_df_train = x_train_s[rfe_dt]
rfe_dt_df_test = x_test_1[rfe_dt]
rfe_rf_df_train = x_train_s[rfe_rf]
rfe_rf_df_test = x_test_1[rfe_rf]

l_feature = [] 
def models_dt(x,y, x_test, y_test):
    mod = {}
    model = DecisionTreeClassifier().fit(x,y)
    ypred = model.predict(rfe_dt_df_test)
    mod['Model'] = 'Decision Tree after Feature Selection'
    mod['Train_Score'] = model.score(rfe_dt_df_train,y_train_s.values)
    mod['Test_accuracy'] = metrics.accuracy_score(y_test,ypred)
    mod['f1score'] = metrics.f1_score(y_test_1,y_pred_1)
    mod['recall'] = metrics.recall_score(y_test, ypred)
    mod['precision'] = metrics.precision_score(y_test, ypred)
    model.predict_proba(rfe_dt_df_test)
    mod['roc_auc'] = metrics.roc_auc_score(y_test,ypred)
    return mod
l_feature.append(models_dt(rfe_dt_df_train,y_train_s.values, rfe_dt_df_test,y_test_1))

def models_rf(x,y, rfe_rf_df_test, y_train_s, y_test):
    mod = {}
    model = RandomForestClassifier().fit(x,y)
    ypred = model.predict(rfe_rf_df_test)
    mod['Model'] = 'Random Forest after Feature Selection'
    mod['Train_Score'] = model.score(rfe_rf_df_train.values,y_train_s.values)
    mod['Test_accuracy'] = metrics.accuracy_score(y_test,ypred)
    mod['f1score'] = metrics.f1_score(y_test,ypred)
    mod['recall'] = metrics.recall_score(y_test, ypred)
    mod['precision'] = metrics.precision_score(y_test, ypred)
    model.predict_proba(rfe_rf_df_test)
    mod['roc_auc'] = metrics.roc_auc_score(y_test,ypred)
    return mod
l_feature.append(models_rf(rfe_rf_df_train.values,y_train_s.values, rfe_rf_df_test, y_train_s, y_test))


  model = RandomForestClassifier().fit(x,y)


ValueError: Found input variables with inconsistent numbers of samples: [116, 197]

In [None]:
final_model = final_model.append(l_feature)
final_model

