# Experiments

## Ensembling two classifiers 


1. Normalizacja wsparć poszczególnych klasyfikatorów?


In [13]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import load_breast_cancer, load_wine, load_digits, load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import VotingClassifier, BaggingClassifier

from sklearn.model_selection import StratifiedKFold
from sklearn.impute import SimpleImputer
from sklearn import preprocessing

In [14]:
class Dataset(object):
    def __init__(self, data, target):
        self.data = data
        self.target = target

def load_mammographic(): 
    columns = ['bi_rads', 'age', 'shape', 'margin', 'density', 'class']

    mammographic = pd.read_csv('datasets/mammographic_masses.data', names=columns)
    mammographic = mammographic.replace('?', np.nan)
    imp = SimpleImputer(missing_values=np.nan, strategy='median')
    dataset = pd.DataFrame(imp.fit_transform(mammographic), columns=columns)

    y = dataset['class'].values.astype(int)
    X = dataset.drop('class', axis=1).values
    return Dataset(X, y)

def load_abalone(): 
    columns = ['sex', 'len', 'dia', 'h', 'ww', 'sw', 'vw', 'shw', 'class']

    dataset = pd.read_csv('datasets/abalone.data', names=columns)
    dataset = dataset.replace('M', 0)
    dataset = dataset.replace('F', 1)
    dataset = dataset.replace('I', 2)

    y = dataset['class'].values.astype(int)
    X = dataset.drop('class', axis=1).values
    return Dataset(X, y)

def load_vihno_verde(): 
    dataset = pd.read_csv('datasets/winequality-white.csv', header=0, sep=';')

    y = dataset['quality'].values.astype(int)
    X = dataset.drop('quality', axis=1).values
    return Dataset(X, y)


def load_balance(): 
    columns = ['class', 'lw', 'ld', 'rw', 'rd']
    dataset = pd.read_csv('datasets/balance-scale.data', names=columns)
    
    dataset = dataset.replace('L', 0)
    dataset = dataset.replace('B', 1)
    dataset = dataset.replace('R', 2)

    y = dataset['class'].values.astype(int)
    X = dataset.drop('class', axis=1).values
    return Dataset(X, y)

def load_yeast(): 
    columns = ['name','xz','mcg', 'gvh', 'alm', 'mit', 'erl', 'pox', 'vac', 'class']
    dataset = pd.read_csv('datasets/yeast.data', names=columns, delim_whitespace=True)
    dataset = dataset.drop('name', axis=1)
    
    dataset = dataset.replace('CYT', 0)
    dataset = dataset.replace('NUC', 1)
    dataset = dataset.replace('MIT', 2)
    dataset = dataset.replace('ME3', 3)
    dataset = dataset.replace('ME2', 4)
    dataset = dataset.replace('ME1', 5)
    dataset = dataset.replace('EXC', 6)
    dataset = dataset.replace('VAC', 7)
    dataset = dataset.replace('POX', 8)
    dataset = dataset.replace('ERL', 9)

    y = dataset['class'].values.astype(int)
    X = dataset.drop('class', axis=1).values
    return Dataset(X, y)

def load_biodeg(): 
    columns = [x for x in range(41)]
    columns.append('class')
    dataset = pd.read_csv('datasets/biodeg.csv', names=columns, sep=";")
    
    dataset = dataset.replace('RB', 0)
    dataset = dataset.replace('NRB', 1)

    y = dataset['class'].values.astype(int)
    X = dataset.drop('class', axis=1).values
    return Dataset(X, y)

def load_tic_tac(): 
    columns = [x for x in range(9)]
    columns.append('class')
    dataset = pd.read_csv('datasets/tic-tac-toe.data', names=columns)
    
    dataset = dataset.replace('x', 3)
    dataset = dataset.replace('o', 4)
    dataset = dataset.replace('b', 5)
    
    dataset = dataset.replace('negative', 0)
    dataset = dataset.replace('positive', 1)
    
    y = dataset['class'].values.astype(int)
    X = dataset.drop('class', axis=1).values
    return Dataset(X, y)

def load_segmentation(): 
    columns = ['class'] + [x for x in range(19)]
    dataset = pd.read_csv('datasets/segmentation.test', names=columns)
    
    dataset = dataset.replace('BRICKFACE', 1)
    dataset = dataset.replace('SKY', 2)
    dataset = dataset.replace('FOLIAGE', 3)
    dataset = dataset.replace('CEMENT', 4)
    dataset = dataset.replace('WINDOW', 5)
    dataset = dataset.replace('PATH', 6)
    dataset = dataset.replace('GRASS', 7)
    
    y = dataset['class'].values.astype(int)
    X = dataset.drop('class', axis=1).values
    return Dataset(X, y)

def load_phishing_websites(): 
    columns = ['id'] + [x for x in range(30)] + ['class']
    dataset = pd.read_csv('datasets/phishing-websites.csv', names=columns)
    dataset = dataset.sample(100)
    
    dataset['class'] = dataset['class'].replace(-1, 0)
    
    y = dataset['class'].values.astype(int)
    X = dataset.drop('class', axis=1).values
    return Dataset(X, y)

def load_indian_liver(): 
    columns = [x for x in range(10)] + ['class']
    dataset = pd.read_csv('datasets/indian-liver.csv', names=columns)
    dataset = dataset.replace('Male', 0)
    dataset = dataset.replace('Female', 1)

    imp = SimpleImputer(missing_values=np.nan, strategy='median')
    dataset = pd.DataFrame(imp.fit_transform(dataset), columns=columns)
    
    y = dataset['class'].values.astype(int)
    X = dataset.drop('class', axis=1).values
    
    return Dataset(X, y)

def load_diabetic_retinppathy(): 
    dataset = pd.read_csv('datasets/diabetic-retinopathy.csv', header=0)
    dataset = dataset.drop('id', axis=1)
    
    y = dataset['Class'].values.astype(int)
    X = dataset.drop('Class', axis=1).values
    
    return Dataset(X, y)

def load_thoracic_surgery(): 
    encoder = preprocessing.OrdinalEncoder()
    le = preprocessing.LabelEncoder()
    dataset = pd.read_csv('datasets/thoracic-surgery.csv', header=0)
    dataset = dataset.drop('id', axis=1)
    
    y = le.fit_transform(dataset['Risk1Yr'].values)
    X = dataset.drop('Risk1Yr', axis=1).values
    X = encoder.fit_transform(X)
    
    return Dataset(X, y)

def load_house_votes_84(): 
    columns = ['class'] + [x for x in range(17)]
    encoder = preprocessing.OrdinalEncoder()
    le = preprocessing.LabelEncoder()
    imp = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
    
    dataset = pd.read_csv('datasets/house-votes-84.data', names=columns)
    dataset = dataset.replace('?', np.nan)
    
    y = le.fit_transform(dataset['class'].values)
    
    X = dataset.drop('class', axis=1).values
    X = imp.fit_transform(X)
    X = encoder.fit_transform(X)
    
    return Dataset(X, y)

def load_australian():
    columns = [x for x in range(14)] + ['class']
    dataset = pd.read_csv('datasets/australian.dat', names=columns, sep=" ")
    
    y = dataset['class'].values
    X = dataset.drop('class', axis=1).values
    
    return Dataset(X, y)

def load_ecoli():
    encoder = preprocessing.OrdinalEncoder()
    le = preprocessing.LabelEncoder()
    columns = [x for x in range(8)] + ['class']
    dataset = pd.read_csv('datasets/ecoli.data', names=columns, delim_whitespace=True)
    y = le.fit_transform(dataset['class'].values)
    X = dataset.drop('class', axis=1).values
    X = encoder.fit_transform(X)
    
    return Dataset(X, y)

def load_ionoshpere():
    encoder = preprocessing.OrdinalEncoder()
    le = preprocessing.LabelEncoder()
    columns = [x for x in range(34)] + ['class']
    dataset = pd.read_csv('datasets/ionosphere.data', names=columns)
    y = le.fit_transform(dataset['class'].values)
    X = dataset.drop('class', axis=1).values
    
    return Dataset(X, y)

def load_glasss():
    encoder = preprocessing.OrdinalEncoder()
    le = preprocessing.LabelEncoder()
    columns = ['id'] + [x for x in range(9)] + ['class']
    dataset = pd.read_csv('datasets/glass.data', names=columns)
    dataset = dataset.drop('id', axis=1)
    
    y = le.fit_transform(dataset['class'].values)
    X = dataset.drop('class', axis=1).values
    
    return Dataset(X, y)

def load_haberman():
    encoder = preprocessing.OrdinalEncoder()
    le = preprocessing.LabelEncoder()
    columns = [x for x in range(3)] + ['class']
    dataset = pd.read_csv('datasets/haberman.data', names=columns)
    
    y = le.fit_transform(dataset['class'].values)
    X = dataset.drop('class', axis=1).values
    
    return Dataset(X, y)

def load_letter_recognition():
    encoder = preprocessing.OrdinalEncoder()
    le = preprocessing.LabelEncoder()
    columns = ['class'] + [x for x in range(16)]
    dataset = pd.read_csv('datasets/letter-recognition.data', names=columns)
    
    y = le.fit_transform(dataset['class'].values)
    X = dataset.drop('class', axis=1).values
    
    return Dataset(X, y)

def load_horse_colic():
    encoder = preprocessing.OrdinalEncoder()
    le = preprocessing.LabelEncoder()
    imp = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
    
    columns = [x for x in range(23)] + ['class'] + [25, 26, 27]
    dataset = pd.read_csv('datasets/horse-colic.data', names=columns, delim_whitespace=True)
    dataset = dataset.replace('?', np.nan)
    
    y = le.fit_transform(dataset['class'].values)
    X = dataset.drop('class', axis=1).values
    X = imp.fit_transform(X)
    
    return Dataset(X, y)

def load_heart_statlog():
    encoder = preprocessing.OrdinalEncoder()
    le = preprocessing.LabelEncoder()
    imp = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
    
    columns = [x for x in range(13)] + ['class']
    dataset = pd.read_csv('datasets/heart.data', names=columns, delim_whitespace=True)
    dataset = dataset.replace('?', np.nan)
    
    y = le.fit_transform(dataset['class'].values)
    X = dataset.drop('class', axis=1).values
    X = imp.fit_transform(X)
    
    return Dataset(X, y)

def load_hepatitis():
    encoder = preprocessing.OrdinalEncoder()
    le = preprocessing.LabelEncoder()
    imp = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
    
    columns = ['class'] + [x for x in range(19)]
    dataset = pd.read_csv('datasets/hepatitis.data', names=columns)
    dataset = dataset.replace('?', np.nan)
    
    y = le.fit_transform(dataset['class'].values)
    X = dataset.drop('class', axis=1).values
    X = imp.fit_transform(X)
    
    return Dataset(X, y)

def load_connectionist_bench():
    encoder = preprocessing.OrdinalEncoder()
    le = preprocessing.LabelEncoder()
    imp = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
    
    columns = [x for x in range(60)] + ['class']
    dataset = pd.read_csv('datasets/sonar.data', names=columns)
    dataset = dataset.replace('?', np.nan)
    
    y = le.fit_transform(dataset['class'].values)
    X = dataset.drop('class', axis=1).values
    X = imp.fit_transform(X)
    
    return Dataset(X, y)

def load_waveform():
    encoder = preprocessing.OrdinalEncoder()
    le = preprocessing.LabelEncoder()
    imp = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
    
    columns = [x for x in range(40)] + ['class']
    dataset = pd.read_csv('datasets/waveform.data', names=columns)
    dataset = dataset.replace('?', np.nan)
    
    y = le.fit_transform(dataset['class'].values)
    X = dataset.drop('class', axis=1).values
    X = imp.fit_transform(X)
    
    return Dataset(X, y)

def load_car_evaluation():
    encoder = preprocessing.OrdinalEncoder()
    le = preprocessing.LabelEncoder()
    imp = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
    
    columns = [x for x in range(6)] + ['class']
    dataset = pd.read_csv('datasets/car.data', names=columns)
    dataset = dataset.replace('?', np.nan)
    
    y = le.fit_transform(dataset['class'].values)
    X = dataset.drop('class', axis=1).values
    X = encoder.fit_transform(X)
    
    return Dataset(X, y)

def load_nursery():
    encoder = preprocessing.OrdinalEncoder()
    le = preprocessing.LabelEncoder()
    imp = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
    
    columns = [x for x in range(8)] + ['class']
    dataset = pd.read_csv('datasets/nursery.data', names=columns)
    dataset = dataset.replace('?', np.nan)
    
    y = le.fit_transform(dataset['class'].values)
    X = dataset.drop('class', axis=1).values
    X = encoder.fit_transform(X)
    
    return Dataset(X, y)

def load_credit_screening():
    # Credit Approval dataset
    # 
    encoder = preprocessing.OrdinalEncoder()
    le = preprocessing.LabelEncoder()
    imp = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
    
    columns = [x for x in range(15)] + ['class']
    dataset = pd.read_csv('datasets/crx.data', names=columns)
    dataset = dataset.replace('?', np.nan)
    
    y = le.fit_transform(dataset['class'].values)
    X = dataset.drop('class', axis=1).values
    X = imp.fit_transform(X)
    X = encoder.fit_transform(X)
    
    return Dataset(X, y)

def load_german():
    # German Credit Approval dataset statlog-numeric
    encoder = preprocessing.OrdinalEncoder()
    le = preprocessing.LabelEncoder()
    imp = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
    
    columns = [x for x in range(24)] + ['class']
    dataset = pd.read_csv('datasets/german.data', names=columns, delim_whitespace=True)
    dataset = dataset.replace('?', np.nan)
    
    y = le.fit_transform(dataset['class'].values)
    X = dataset.drop('class', axis=1).values
    
    return Dataset(X, y)

def load_banknote():
    # banknote authentication Data Set
    encoder = preprocessing.OrdinalEncoder()
    le = preprocessing.LabelEncoder()
    imp = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
    
    columns = [x for x in range(4)] + ['class']
    dataset = pd.read_csv('datasets/banknote.data', names=columns)
    dataset = dataset.replace('?', np.nan)

    y = le.fit_transform(dataset['class'].values)
    X = dataset.drop('class', axis=1).values
    
    return Dataset(X, y)

def load_wilt():
    # Wilt Data Set
    encoder = preprocessing.OrdinalEncoder()
    le = preprocessing.LabelEncoder()
    imp = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
    
    columns = ['class'] + [x for x in range(5)]
    dataset = pd.read_csv('datasets/wilt.data', names=columns)
    dataset = dataset.replace('?', np.nan)

    y = le.fit_transform(dataset['class'].values)
    X = dataset.drop('class', axis=1).values
    
    return Dataset(X, y)

def load_contraceptive():
#     Contraceptive Method Choice Data Set 
    encoder = preprocessing.OrdinalEncoder()
    le = preprocessing.LabelEncoder()
    imp = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
    
    columns = [x for x in range(9)] +  ['class']
    dataset = pd.read_csv('datasets/cmc.data', names=columns)
    dataset = dataset.replace('?', np.nan)

    y = le.fit_transform(dataset['class'].values)
    X = dataset.drop('class', axis=1).values
    
    return Dataset(X, y)

def load_vertebral():
    # Vertebral Column Data Set 
    encoder = preprocessing.OrdinalEncoder()
    le = preprocessing.LabelEncoder()
    imp = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
    
    columns = [x for x in range(6)] +  ['class']
    dataset = pd.read_csv('datasets/vertebral.data', names=columns, delim_whitespace=True)
    dataset = dataset.replace('?', np.nan)

    y = le.fit_transform(dataset['class'].values)
    X = dataset.drop('class', axis=1).values
    
    return Dataset(X, y)

In [15]:
# dataset = load_breast_cancer()
datasets = [
    load_mammographic,
    load_abalone,
    load_balance,
    load_yeast,
    load_biodeg,
    load_tic_tac,
    load_segmentation,
    load_phishing_websites,
    load_indian_liver,
    load_diabetic_retinppathy,
    load_thoracic_surgery,
    load_house_votes_84,
    load_australian,
    load_ecoli,
    load_ionoshpere,
    load_glasss,
    load_haberman,
    load_letter_recognition,
    load_horse_colic,
    load_heart_statlog,
    load_hepatitis,
    load_connectionist_bench,
    load_waveform,
    load_car_evaluation,
    load_nursery,
    load_credit_screening,
    load_german,
    load_banknote,
    load_wilt,
    load_contraceptive,
    load_vertebral
]

scaler = preprocessing.StandardScaler()

dataset = load_credit_screening()
X = dataset.data
y = dataset.target

X = scaler.fit_transform(X)


In [16]:
class Ensemble(object):
    
    def __init__(self, base_classifiers):
        self.base_classifiers = base_classifiers
        self.meta_classifier = MLPClassifier(max_iter=2000)
        
    def fit_base_classfiers(self, X, y):
        for classifier in self.base_classifiers:
            classifier.fit(X,y)
            
    def fit_meta_classifier_supports(self, X, y):
        supports = self.get_supports(X)
        self.meta_classifier = MLPClassifier(max_iter=2000)
        self.meta_classifier.fit(supports, y)
        
    def fit_meta_classifier_features(self, X, y, ):
        supports = self.get_supports(X)
        supports_with_features = np.concatenate((supports, X), axis=1)
        self.meta_classifier = MLPClassifier(max_iter=20000)
        self.meta_classifier.fit(supports_with_features, y)
        
    def predict_meta(self, X):
        supports = self.get_supports(X)
        return self.meta_classifier.predict(supports)

    def predict_meta_features(self, X):
        supports = self.get_supports(X)
        supports = np.concatenate((supports, X), axis=1)
        return self.meta_classifier.predict(supports)
        
    def get_supports(self, X):
        temp = []
        for classifier in self.base_classifiers:
            if temp == []:
                temp = classifier.predict_proba(X)
            else:
                temp = np.concatenate((temp, classifier.predict_proba(X)), axis=1)
        return temp

In [17]:
estimators = [
    SVC(probability=True, kernel='linear'),
    SVC(probability=True, kernel='rbf', gamma='auto'),
    KNeighborsClassifier(n_neighbors=3),
    KNeighborsClassifier(n_neighbors=5),
    MLPClassifier(max_iter=5000)
]
ens = Ensemble(estimators)

bc_names = ['svm_lin', 'svm_rbf', 'knn3', 'knn5', 'mlp']
voting_bc = list(zip(bc_names, estimators))
voitng_clf = VotingClassifier(voting_bc)
bagging_clf = BaggingClassifier()

In [18]:
results = pd.DataFrame(columns=['no_features', 'features', 'voting', 'bagging', 'svm_lin', 'svm_rbf', 'knn3', 'knn5', 'mlp'])

kf = StratifiedKFold(n_splits=10, random_state=5, shuffle=True)
for train_index, test_index in kf.split(X, y):
    train_base, train_ensemble = np.split(train_index, [int(.5*len(train_index))])
    
    X_train = X[train_index]
    y_train = y[train_index]
    ens.fit_base_classfiers(X_train, y_train)
    
    X_ens = X[train_index]
    y_ens = y[train_index]
    ens.fit_meta_classifier_supports(X_ens, y_ens)
    
    X_test = X[test_index]
    y_test = y[test_index]
    ens_score = accuracy_score(ens.predict_meta(X_test), y_test)
    
    ens.fit_meta_classifier_features(X_ens, y_ens)
    ens_score_features = accuracy_score(ens.predict_meta_features(X_test), y_test)
    
    voitng_clf.fit(X_train, y_train)
    voting = accuracy_score(voitng_clf.predict(X_test), y_test)
    
    bagging_clf.fit(X_train, y_train)
    bagging = accuracy_score(bagging_clf.predict(X_test), y_test)
    
    svm_lin = accuracy_score(ens.base_classifiers[0].predict(X_test), y_test)
    svm_rbf = accuracy_score(ens.base_classifiers[1].predict(X_test), y_test)
    knn3 = accuracy_score(ens.base_classifiers[1].predict(X_test), y_test)
    knn5 = accuracy_score(ens.base_classifiers[2].predict(X_test), y_test)
    mlp = accuracy_score(ens.base_classifiers[3].predict(X_test), y_test)
    
    results.loc[len(results)] = [ens_score, ens_score_features, voting, bagging, svm_lin, svm_rbf, knn3, knn5, mlp]







In [19]:
for cls in ens.base_classifiers:
    print(accuracy_score(cls.predict(X_test), y_test))

0.8529411764705882
0.8823529411764706
0.8235294117647058
0.8382352941176471
0.75


In [20]:
results

Unnamed: 0,no_features,features,voting,bagging,svm_lin,svm_rbf,knn3,knn5,mlp
0,0.885714,0.885714,0.871429,0.9,0.9,0.857143,0.857143,0.857143,0.9
1,0.828571,0.828571,0.857143,0.871429,0.871429,0.885714,0.885714,0.814286,0.842857
2,0.871429,0.871429,0.885714,0.857143,0.857143,0.857143,0.857143,0.828571,0.842857
3,0.884058,0.884058,0.913043,0.927536,0.884058,0.927536,0.927536,0.913043,0.913043
4,0.855072,0.855072,0.898551,0.869565,0.84058,0.884058,0.884058,0.913043,0.869565
5,0.811594,0.826087,0.84058,0.797101,0.811594,0.826087,0.826087,0.73913,0.768116
6,0.869565,0.855072,0.84058,0.797101,0.826087,0.826087,0.826087,0.855072,0.826087
7,0.852941,0.852941,0.897059,0.882353,0.882353,0.882353,0.882353,0.867647,0.882353
8,0.823529,0.823529,0.882353,0.852941,0.852941,0.882353,0.882353,0.897059,0.852941
9,0.75,0.75,0.852941,0.808824,0.852941,0.882353,0.882353,0.823529,0.838235


In [31]:
results.mean()
results.mean().to_frame().transpose().rename(index={0: 'load_credit_screening'})

In [22]:
results.max()

no_features    0.885714
features       0.885714
voting         0.913043
bagging        0.927536
svm_lin        0.900000
svm_rbf        0.927536
knn3           0.927536
knn5           0.913043
mlp            0.913043
dtype: float64

In [23]:
results.min()

no_features    0.750000
features       0.750000
voting         0.840580
bagging        0.797101
svm_lin        0.811594
svm_rbf        0.826087
knn3           0.826087
knn5           0.739130
mlp            0.768116
dtype: float64

In [24]:
results.median()

no_features    0.854007
features       0.854007
voting         0.876891
bagging        0.863354
svm_lin        0.855042
svm_rbf        0.882353
knn3           0.882353
knn5           0.856108
mlp            0.847899
dtype: float64

In [46]:
def perform_test(X, y):
    estimators = [
        SVC(probability=True, kernel='linear'),
        SVC(probability=True, kernel='rbf', gamma='auto'),
        KNeighborsClassifier(n_neighbors=3),
        KNeighborsClassifier(n_neighbors=5),
        MLPClassifier(max_iter=5000)
    ]
    ens = Ensemble(estimators)

    bc_names = ['svm_lin', 'svm_rbf', 'knn3', 'knn5', 'mlp']
    voting_bc = list(zip(bc_names, estimators))
    voitng_clf = VotingClassifier(voting_bc)
    bagging_clf = BaggingClassifier()

    results = pd.DataFrame(columns=['no_features', 'features', 'voting', 'bagging', 'svm_lin', 'svm_rbf', 'knn3', 'knn5', 'mlp'])

    kf = StratifiedKFold(n_splits=10, random_state=5, shuffle=True)
    for train_index, test_index in kf.split(X, y):
        train_base, train_ensemble = np.split(train_index, [int(.5*len(train_index))])

        X_train = X[train_index]
        y_train = y[train_index]
        ens.fit_base_classfiers(X_train, y_train)

        X_ens = X[train_index]
        y_ens = y[train_index]
        ens.fit_meta_classifier_supports(X_ens, y_ens)

        X_test = X[test_index]
        y_test = y[test_index]
        ens_score = accuracy_score(ens.predict_meta(X_test), y_test)

        ens.fit_meta_classifier_features(X_ens, y_ens)
        ens_score_features = accuracy_score(ens.predict_meta_features(X_test), y_test)

        voitng_clf.fit(X_train, y_train)
        voting = accuracy_score(voitng_clf.predict(X_test), y_test)
        
        bagging_clf.fit(X_train, y_train)
        bagging = accuracy_score(bagging_clf.predict(X_test), y_test)

        svm_lin = accuracy_score(ens.base_classifiers[0].predict(X_test), y_test)
        svm_rbf = accuracy_score(ens.base_classifiers[1].predict(X_test), y_test)
        knn3 = accuracy_score(ens.base_classifiers[1].predict(X_test), y_test)
        knn5 = accuracy_score(ens.base_classifiers[2].predict(X_test), y_test)
        mlp = accuracy_score(ens.base_classifiers[3].predict(X_test), y_test)

        results.loc[len(results)] = [ens_score, ens_score_features, voting, bagging, svm_lin, svm_rbf, knn3, knn5, mlp]
        
    return results.mean().to_frame().transpose()

In [47]:
scaler = preprocessing.StandardScaler()
columns_results=['no_features', 'features', 'voting', 'bagging', 'svm_lin', 'svm_rbf', 'knn3', 'knn5', 'mlp']
loop_results = pd.DataFrame(columns=columns_results)

for data in datasets:
    print(data.__name__)

    dataset = data()
    X = dataset.data
    y = dataset.target
    X = scaler.fit_transform(X)
    
    loop_result = perform_test(X, y).rename(index={0: data.__name__})
    loop_results = loop_results.append(loop_result)

load_mammographic








load_abalone








load_balance








load_yeast








load_biodeg








load_tic_tac








load_segmentation








load_phishing_websites








load_indian_liver








load_diabetic_retinppathy








load_thoracic_surgery








load_house_votes_84








load_australian








load_ecoli








load_ionoshpere








load_glasss








load_haberman








load_letter_recognition










load_horse_colic








load_heart_statlog








load_hepatitis








load_connectionist_bench








load_waveform








load_car_evaluation








load_nursery








load_credit_screening








load_german








load_banknote








load_wilt








load_contraceptive








load_vertebral








In [48]:
print(loop_results)

                           no_features  features    voting   bagging  \
load_mammographic             0.794913  0.791788  0.810593  0.770062   
load_abalone                  0.199684  0.193717  0.258227  0.224985   
load_balance                  0.982329  0.990345  0.918234  0.803078   
load_yeast                    0.546500  0.533742  0.598483  0.580281   
load_biodeg                   0.872028  0.872010  0.880564  0.854149   
load_tic_tac                  0.951939  0.958200  0.844390  0.954034   
load_segmentation             0.976667  0.978095  0.964286  0.968095   
load_phishing_websites        0.878889  0.888889  0.890909  0.910000   
load_indian_liver             0.706670  0.699800  0.708781  0.715651   
load_diabetic_retinppathy     0.712436  0.713321  0.710690  0.682024   
load_thoracic_surgery         0.770213  0.787234  0.848936  0.825532   
load_house_votes_84           0.958822  0.956549  0.961150  0.951681   
load_australian               0.831906  0.830415  0.850770  0.86

In [49]:
loop_results.to_csv('results.csv')