In [0]:
import pandas as pd
import numpy as np

#Visualization tools
from matplotlib import pyplot as plt
import seaborn as sns; sns.set()
import itertools

#ML steps structure
from sklearn.pipeline import FeatureUnion, Pipeline

#Preprocessing
from sklearn.preprocessing import FunctionTransformer, MinMaxScaler
from sklearn.feature_extraction.text import HashingVectorizer, CountVectorizer
from sklearn.base import TransformerMixin
from imblearn.under_sampling import RandomUnderSampler

#Dimensionality reduction
from sklearn.decomposition import TruncatedSVD

#Model Validation
from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict
from sklearn.metrics.cluster import adjusted_rand_score
from sklearn.metrics import explained_variance_score

#Ensemble
from sklearn.ensemble import BaggingClassifier
#from vecstack import stacking
from mlxtend.classifier import StackingClassifier

#Model selected
from sklearn.naive_bayes import GaussianNB #Naive Bayes
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier



In [0]:
#Read the data using the Unnamed (probably id) as index
url = 'https://s3.amazonaws.com/drivendata/data/4/public/81e8f2de-9915-4934-b9ae-9705685c9d50.csv'
training = pd.read_csv(url, index_col='Unnamed: 0')

labels = ['Function', 'Object_Type', 'Operating_Status', 'Position_Type', 'Pre_K', 'Reporting', 
          'Sharing', 'Student_Type', 'Use']

numeric = ['FTE', 'Total']

categoric = [ 'Facility_or_Department', 'Function_Description', 
            'Fund_Description', 'Job_Title_Description', 'Location_Description', 
            'Object_Description', 'Position_Extra', 'Program_Description', 'SubFund_Description', 
            'Sub_Object_Description', 
            'Text_1', 'Text_2', 'Text_3', 'Text_4']

## Pre-Processing

In [0]:
#Imputing data in Total column
def impute_func_total(data):
    if(pd.isnull(data['Total'])):        
        if(data['Object_Type'] == 'Base Salary/Compensation'):
            return 24146
        if(data['Object_Type'] == 'Benefits'):
            return 38163
        if(data['Object_Type'] == 'Contracted Services'):
            return 24146
        if(data['Object_Type'] == 'Equipment & Equipment Lease'):
            return 11257
        if(data['Object_Type'] == 'NO_LABEL'):
            return 58545
        if(data['Object_Type'] == 'Other Compensation/Stipend'):
            return 1605
        if(data['Object_Type'] == 'Other Non-Compensation'):
            return 10646
        if(data['Object_Type'] == 'Rent/Utilities'):
            return 46611
        if(data['Object_Type'] == 'Substitute Compensation'):
            return 1090
        if(data['Object_Type'] == 'Supplies/Materials'):
            return 7745
        if(data['Object_Type'] == 'Travel & Conferences'):
            return 1659
    else:
        return data['Total']

In [0]:
#Imputing data in FTE column
def impute_func_FTE(data):
    if(pd.isnull(data['FTE'])):        
        if(data['Object_Type'] == 'Base Salary/Compensation'):
            return 0.45
        if(data['Object_Type'] == 'Benefits'):
            return 0.0
        if(data['Object_Type'] == 'Contracted Services'):
            return 0.0
        if(data['Object_Type'] == 'Equipment & Equipment Lease'):
            return 0.0
        if(data['Object_Type'] == 'NO_LABEL'):
            return 0.75
        if(data['Object_Type'] == 'Other Compensation/Stipend'):
            return 0.000107
        if(data['Object_Type'] == 'Other Non-Compensation'):
            return 0.0
        if(data['Object_Type'] == 'Rent/Utilities'):
            return 0.0
        if(data['Object_Type'] == 'Substitute Compensation'):
            return 0.000059
        if(data['Object_Type'] == 'Supplies/Materials'):
            return 0.0
        if(data['Object_Type'] == 'Travel & Conferences'):
            return 0.0
    else:
        return data['FTE']

In [0]:
def preProcessing(training):
    # Remove inconsistent data
    training.loc[(training['FTE'] < 0) | (training['FTE'] > 1), 'FTE'] = np.nan
    training.loc[training['Total'] < 0, 'Total'] = np.nan
    
    training['Total'] = training.apply(impute_func_total, axis = 1)
    
    training['FTE'] = training.apply(impute_func_FTE, axis = 1)
    
    for category in categoric:
        training[category] = training[category].str.lower()
    
    training[categoric] = training[categoric].fillna("")
    
    return training

In [0]:
df_training = preProcessing(training)
df_training = df_training.reset_index(drop = True)

In [0]:
X = df_training.drop(columns=labels)
labels_data = pd.get_dummies(df_training['Object_Type'])

In [0]:
#col_names = list(range(0,11))
#labels_data.columns = col_names
#labels_data = labels_data.idxmax(axis=1)

In [0]:
labels_true = labels_data.idxmax(axis=1)

## Pipeline

In [0]:
def combine_text_columns(dataset):    
  return dataset[categoric].apply(lambda x: " ".join(x), axis = 1)

get_text_data = FunctionTransformer(combine_text_columns, validate = False)

In [0]:
def combine_numeric_columns(dataset):
  return dataset[numeric]

get_numeric_data = FunctionTransformer(combine_numeric_columns, validate = False)

In [0]:
#def pipeline_(clf):
pl = Pipeline([
          ('union', FeatureUnion(
              transformer_list = [
                  ('numeric_features', Pipeline([
                      ('selector', get_numeric_data),
                  ])),
                  ('text_features', Pipeline([
                      ('selector', get_text_data),
                      ('vectorizer',HashingVectorizer(token_pattern="[A-Za-z0-9]+(?=\\s+)", 
                                                      norm=None, 
                                                      binary=False,
                                                      ngram_range=(1,2), stop_words = 'english') 
                      )
                  ]))
               ]
          )),
          ('reduce_dim', TruncatedSVD(n_components = 150))
#           ('clf', clf)
      ])
 # return pl

In [0]:
sdv_data = pl.fit_transform(X, labels_true)

In [0]:
rus = RandomUnderSampler(replacement=True)
X_resampled, y_resampled = rus.fit_resample(sdv_data, labels_true)

In [0]:
#pd.DataFrame(y_resampled).to_csv("y_.csv")

In [0]:
#from imblearn.under_sampling import NearMiss

In [0]:
#nm1 = NearMiss(version=3)
#X_resampled_nm1, y_resampled = nm1.fit_resample(sdv_data, labels_true)

In [0]:
#from imblearn.under_sampling import AllKNN

In [0]:
#allknn = AllKNN()
#X_res, y_res = allknn.fit_resample(sdv_data, labels_true)

## Bagging with Naive Bayes method



In [0]:
d = {'NO_LABEL':1, 'Base Salary/Compensation':2, 'Benefits':3, 'Substitute Compensation':4, 'Supplies/Materials':5, 
    'Rent/Utilities':6, 'Other Compensation/Stipend': 7, 'Contracted Services' : 8, 'Equipment & Equipment Lease':9, 
     'Other Non-Compensation':10, 'Travel & Conferences':11}

In [0]:
Y = pd.DataFrame(y_resampled)
target = Y.applymap(lambda s: d.get(s) if s in d else s)

In [0]:
bagging_NB = GaussianNB(var_smoothing = 1e-30)

In [0]:
cross_val_score(bagging_NB, X_resampled, target, cv=10)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


array([0.60559006, 0.60982496, 0.63297572, 0.60897798, 0.61151892,
       0.60446076, 0.61716544, 0.61321287, 0.61434218, 0.62648221])

In [0]:
np.mean([0.10982496, 0.11405985, 0.11010728, 0.11716544, 0.1177301 ,
       0.11716544, 0.10897798, 0.11377753, 0.11998871, 0.13438735])

0.11631846399999998

In [0]:
np.std([0.10982496, 0.11405985, 0.11010728, 0.11716544, 0.1177301 ,
       0.11716544, 0.10897798, 0.11377753, 0.11998871, 0.13438735])

0.006996910667534919

In [0]:
#target = df_training[['Object_Type']].applymap(lambda s: d.get(s) if s in d else s)
#target = target['Object_Type'].values
#train = df_training.drop(columns=labels)
#labels_obT = df_training['Object_Type'].unique()
#X_train, X_test, y_train, y_test = train_test_split(train_NB,
#                                                    target_NB,
#                                                    test_size=0.3, 
#                                                    random_state=42)

### Bagging NB 10 classifiers

In [0]:
bagging_NB = BaggingClassifier(GaussianNB(var_smoothing = 1e-30))

In [0]:
cross_val_score(bagging_NB, X_resampled, target, cv=10)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


array([0.60700169, 0.61236589, 0.63099944, 0.61123659, 0.60982496,
       0.60446076, 0.61688312, 0.61631846, 0.61377753, 0.63015246])

In [0]:
np.mean([0.11038961, 0.11490683, 0.10897798, 0.11801242, 0.11801242,
       0.11716544, 0.11462451, 0.18407679, 0.11970638, 0.11688312])

0.12227555000000001

In [0]:
np.std([0.11038961, 0.11490683, 0.10897798, 0.11801242, 0.11801242,
       0.11716544, 0.11462451, 0.18407679, 0.11970638, 0.11688312])

0.020852988671760695

### Bagging NB 15 classifiers

In [0]:
bagging_NB = BaggingClassifier(GaussianNB(var_smoothing = 1e-30), n_estimators = 15)

In [0]:
cross_val_score(bagging_NB, X_resampled, target, cv=10)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


array([0.60728402, 0.61462451, 0.63184641, 0.61038961, 0.61010728,
       0.61180124, 0.61857708, 0.6092603 , 0.61236589, 0.62252964])

In [0]:
np.mean([0.15386787, 0.13184641, 0.10897798, 0.1177301 , 0.1284585 ,
       0.16883117, 0.11038961, 0.11405985, 0.11998871, 0.18322981])

0.13373800100000002

In [0]:
np.std([0.15386787, 0.13184641, 0.10897798, 0.1177301 , 0.1284585 ,
       0.16883117, 0.11038961, 0.11405985, 0.11998871, 0.18322981])

0.024715104831355843

### Bagging NB 20 classifiers

In [0]:
bagging_NB = BaggingClassifier(GaussianNB(), n_estimators = 20)

In [0]:
cross_val_score(bagging_NB, X_resampled, target, cv=10)

In [0]:
np.mean([0.11038961, 0.11490683, 0.12902315, 0.11744777, 0.12196499,
       0.11801242, 0.10982496, 0.1134952 , 0.19649915, 0.17080745])

0.130237153

In [0]:
np.std([0.11038961, 0.11490683, 0.12902315, 0.11744777, 0.12196499,
       0.11801242, 0.10982496, 0.1134952 , 0.19649915, 0.17080745])

0.027830577649889717

## Bagging with KNN method

In [0]:
bagging_KNN = KNeighborsClassifier(n_neighbors=7)

In [0]:
cross_val_score(bagging_KNN, X_resampled, y_resampled, cv=10)

array([0.53218521, 0.52484472, 0.52484472, 0.52315076, 0.51919819,
       0.53303219, 0.52004517, 0.52173913, 0.52879729, 0.51468097])

In [0]:
np.mean([0.53218521, 0.52484472, 0.52484472, 0.52315076, 0.51919819,
       0.53303219, 0.52004517, 0.52173913, 0.52879729, 0.51468097])

0.524251835

In [0]:
np.std([0.53218521, 0.52484472, 0.52484472, 0.52315076, 0.51919819,
       0.53303219, 0.52004517, 0.52173913, 0.52879729, 0.51468097])

0.005511443793138503

### Bagging KNN 10 classifiers

In [0]:
#from imblearn.ensemble import BalancedBaggingClassifier
#bbc = BalancedBaggingClassifier(base_estimator=KNeighborsClassifier(n_neighbors=7),
#                                 sampling_strategy='not majority',
#                                 replacement=False,
#                                 random_state=0)

In [0]:
bagging_KNN = BaggingClassifier(KNeighborsClassifier(n_neighbors=7))

In [0]:
cross_val_score(bagging_KNN, X_resampled, y_resampled, cv=10)

array([0.53218521, 0.53303219, 0.53020892, 0.52484472, 0.52597403,
       0.54009034, 0.52597403, 0.52625635, 0.5364201 , 0.52399774])

In [0]:
np.mean([0.53218521, 0.53303219, 0.53020892, 0.52484472, 0.52597403,
       0.54009034, 0.52597403, 0.52625635, 0.5364201 , 0.52399774])

0.529898363

In [0]:
np.std([0.53218521, 0.53303219, 0.53020892, 0.52484472, 0.52597403,
       0.54009034, 0.52597403, 0.52625635, 0.5364201 , 0.52399774])

0.0051573067799463804

### Bagging KNN 15 classifiers

In [0]:
bagging_KNN = BaggingClassifier(KNeighborsClassifier(n_neighbors=7), n_estimators = 15)

In [0]:
cross_val_score(bagging_KNN, X_resampled, y_resampled, cv=10)

array([0.52851496, 0.53472614, 0.53783173, 0.52936194, 0.52823264,
       0.54150198, 0.5299266 , 0.52766798, 0.53811406, 0.52795031])

In [0]:
np.mean([0.52851496, 0.53472614, 0.53783173, 0.52936194, 0.52823264,
       0.54150198, 0.5299266 , 0.52766798, 0.53811406, 0.52795031])

0.5323828340000001

In [0]:
np.std([0.52851496, 0.53472614, 0.53783173, 0.52936194, 0.52823264,
       0.54150198, 0.5299266 , 0.52766798, 0.53811406, 0.52795031])

0.0049031407144017256

### Bagging KNN 20 classifiers

In [0]:
bagging_KNN = BaggingClassifier(KNeighborsClassifier(n_neighbors=7), n_estimators = 20)

In [0]:
cross_val_score(bagging_KNN, X_resampled, y_resampled, cv=10)

array([0.53331451, 0.53416149, 0.53246753, 0.52625635, 0.52738566,
       0.53811406, 0.52597403, 0.53387916, 0.53020892, 0.52428007])

In [0]:
np.mean([0.53331451, 0.53416149, 0.53246753, 0.52625635, 0.52738566,
       0.53811406, 0.52597403, 0.53387916, 0.53020892, 0.52428007])

0.5306041779999999

In [0]:
np.std([0.53331451, 0.53416149, 0.53246753, 0.52625635, 0.52738566,
       0.53811406, 0.52597403, 0.53387916, 0.53020892, 0.52428007])

0.004259666553425978

## Bagging with AD method

In [0]:
bagging_AD = DecisionTreeClassifier(random_state=0, max_depth = 25)

In [0]:
cross_val_score(bagging_AD, X_resampled, y_resampled, cv=10)

array([0.92010164, 0.92490119, 0.92546584, 0.93732355, 0.92744212,
       0.91417278, 0.91897233, 0.92603049, 0.92123094, 0.92151327])

In [0]:
np.mean([0.92010164, 0.92490119, 0.92546584, 0.93732355, 0.92744212,
       0.91417278, 0.91897233, 0.92603049, 0.92123094, 0.92151327])

0.923715415

In [0]:
np.std([0.92010164, 0.92490119, 0.92546584, 0.93732355, 0.92744212,
       0.91417278, 0.91897233, 0.92603049, 0.92123094, 0.92151327])

0.00588676020455946

### Bagging AD 10 classifiers

In [0]:
bagging_AD = BaggingClassifier(DecisionTreeClassifier(random_state=0, max_depth = 25))

In [0]:
cross_val_score(bagging_AD, X_resampled, y_resampled, cv=10)

array([0.95115754, 0.95736872, 0.95313382, 0.95878035, 0.95256917,
       0.94522868, 0.94833427, 0.95454545, 0.95200452, 0.94974591])

In [0]:
np.mean([0.95115754, 0.95736872, 0.95313382, 0.95878035, 0.95256917,
       0.94522868, 0.94833427, 0.95454545, 0.95200452, 0.94974591])

0.9522868429999999

In [0]:
np.std([0.95115754, 0.95736872, 0.95313382, 0.95878035, 0.95256917,
       0.94522868, 0.94833427, 0.95454545, 0.95200452, 0.94974591])

0.0038359009562762203

### Bagging AD 15 classifiers

In [0]:
bagging_AD = BaggingClassifier(DecisionTreeClassifier(random_state=0, max_depth = 25), n_estimators = 15)

In [0]:
cross_val_score(bagging_AD, X_resampled, y_resampled, cv=10)

array([0.9539808 , 0.95680407, 0.95736872, 0.959345  , 0.95652174,
       0.95115754, 0.9539808 , 0.95708639, 0.959345  , 0.95680407])

In [0]:
np.mean([0.9539808 , 0.95680407, 0.95736872, 0.959345  , 0.95652174,
       0.95115754, 0.9539808 , 0.95708639, 0.959345  , 0.95680407])

0.9562394130000001

In [0]:
np.std([0.9539808 , 0.95680407, 0.95736872, 0.959345  , 0.95652174,
       0.95115754, 0.9539808 , 0.95708639, 0.959345  , 0.95680407])

0.0024088903610129404

### Bagging AD 20 classifiers

In [0]:
bagging_AD = BaggingClassifier(DecisionTreeClassifier(random_state=0, max_depth = 25), n_estimators = 20)

In [0]:
cross_val_score(bagging_AD, X_resampled, y_resampled, cv=10)

array([0.95765104, 0.96216827, 0.95793337, 0.96640316, 0.95652174,
       0.95143986, 0.95708639, 0.95962733, 0.96103896, 0.95623941])

In [0]:
np.mean([0.95765104, 0.96216827, 0.95793337, 0.96640316, 0.95652174,
       0.95143986, 0.95708639, 0.95962733, 0.96103896, 0.95623941])

0.958610953

In [0]:
np.std([0.95765104, 0.96216827, 0.95793337, 0.96640316, 0.95652174,
       0.95143986, 0.95708639, 0.95962733, 0.96103896, 0.95623941])

0.003813393244510859

## Bagging with ML method

In [0]:
mlp = MLPClassifier(hidden_layer_sizes=(50,), max_iter=200, alpha=1e-4,
                    solver='sgd', verbose=False, tol=1e-4, random_state=42, momentum = 0.8, 
                    validation_fraction = 0.2, early_stopping = True, n_iter_no_change = 50,
                    learning_rate_init=.1)

In [0]:
cross_val_score(mlp, X_resampled, y_resampled, cv=10)

array([0.10700169, 0.10135517, 0.11038961, 0.108131  , 0.09514399,
       0.10474308, 0.10784867, 0.0920384 , 0.09232072, 0.12874082])

In [0]:
np.mean([0.10700169, 0.10135517, 0.11038961, 0.108131  , 0.09514399,
       0.10474308, 0.10784867, 0.0920384 , 0.09232072, 0.12874082])

0.104771315

In [0]:
np.std([0.10700169, 0.10135517, 0.11038961, 0.108131  , 0.09514399,
       0.10474308, 0.10784867, 0.0920384 , 0.09232072, 0.12874082])

0.01026321221241698

### Bagging ML 10 classifiers

In [0]:
bagging_ML = BaggingClassifier(mlp)

In [0]:
cross_val_score(bagging_ML, X_resampled, y_resampled, cv=10)



array([0.11293055, 0.11660079, 0.12337662, 0.13184641, 0.12083569,
       0.11236589, 0.11518916, 0.11405985, 0.11716544, 0.13297572])

In [0]:
np.mean([0.11293055, 0.11660079, 0.12337662, 0.13184641, 0.12083569,
       0.11236589, 0.11518916, 0.11405985, 0.11716544, 0.13297572])

0.11973461199999999

In [0]:
np.std([0.11293055, 0.11660079, 0.12337662, 0.13184641, 0.12083569,
       0.11236589, 0.11518916, 0.11405985, 0.11716544, 0.13297572])

0.007116024544940524

### Bagging ML 15 classifiers

In [0]:
bagging_ML = BaggingClassifier(mlp, n_estimators = 15)

In [0]:
cross_val_score(bagging_ML, X_resampled, y_resampled, cv=10)

array([0.1177301 , 0.11660079, 0.12563523, 0.11998871, 0.1177301 ,
       0.11857708, 0.11490683, 0.11801242, 0.12027103, 0.12196499])

In [0]:
np.mean([0.1177301 , 0.11660079, 0.12563523, 0.11998871, 0.1177301 ,
       0.11857708, 0.11490683, 0.11801242, 0.12027103, 0.12196499])

0.119141728

In [0]:
np.std([0.1177301 , 0.11660079, 0.12563523, 0.11998871, 0.1177301 ,
       0.11857708, 0.11490683, 0.11801242, 0.12027103, 0.12196499])

0.0028652985218639918

### Bagging ML 20 classifiers

In [0]:
bagging_ML = BaggingClassifier(mlp, n_estimators = 20)

In [0]:
cross_val_score(bagging_ML, X_resampled, y_resampled, cv=10)

array([0.1177301 , 0.1134952 , 0.12365895, 0.11518916, 0.11405985,
       0.14426877, 0.12930548, 0.11998871, 0.12140034, 0.12394128])

In [0]:
np.mean([0.1177301 , 0.1134952 , 0.12365895, 0.11518916, 0.11405985,
       0.14426877, 0.12930548, 0.11998871, 0.12140034, 0.12394128])

0.122303784

In [0]:
np.std([0.1177301 , 0.1134952 , 0.12365895, 0.11518916, 0.11405985,
       0.14426877, 0.12930548, 0.11998871, 0.12140034, 0.12394128])

0.008713598931873325

## Stacking model

In [0]:
d = {'NO_LABEL':1, 'Base Salary/Compensation':2, 'Benefits':3, 'Substitute Compensation':4, 'Supplies/Materials':5, 
    'Rent/Utilities':6, 'Other Compensation/Stipend': 7, 'Contracted Services' : 8, 'Equipment & Equipment Lease':9, 
     'Other Non-Compensation':10, 'Travel & Conferences':11}

In [0]:
Y = pd.DataFrame(y_resampled)

In [0]:
target = Y.applymap(lambda s: d.get(s) if s in d else s)

In [0]:
#Método A
clfA1 = DecisionTreeClassifier(random_state=0, max_depth = 25)
clfA2 = DecisionTreeClassifier(random_state=10, max_depth = 10, min_samples_split = 4)
clfA3 = DecisionTreeClassifier(random_state=5, max_depth = 15)
clfA4 = DecisionTreeClassifier(random_state=15, max_depth = 25, min_weight_fraction_leaf = 0.2)
clfA5 = DecisionTreeClassifier(random_state=0, max_depth = 25, criterion = "entropy", max_features = 100)
clfA6 = DecisionTreeClassifier(random_state=8, max_depth = 25, max_features = 100)
clfA7 = DecisionTreeClassifier(random_state=0, max_depth = 20, max_features = "sqrt")
clfA8 = DecisionTreeClassifier(random_state=0, max_depth = 20, max_features = "log2")
clfA9 = DecisionTreeClassifier(random_state=0, max_depth = 25, max_features = 0.6, min_samples_split = 5)
clfA10 = DecisionTreeClassifier(random_state=0, max_depth = 25, splitter = "random")
clfA11 = DecisionTreeClassifier(random_state=0, max_depth = 5, splitter = "random")
clfA12 = DecisionTreeClassifier(random_state=10, max_depth = 15, min_samples_split = 4)
clfA13 = DecisionTreeClassifier(random_state=5, max_depth = 15, max_features = "sqrt")
clfA14 = DecisionTreeClassifier(random_state=30, max_depth = 5, min_weight_fraction_leaf = 0.2)
clfA15 = DecisionTreeClassifier(random_state=7, max_depth = 30, criterion = "entropy", max_features = 50)
clfA16 = DecisionTreeClassifier(random_state=8, max_depth = 40, max_features = 100)
clfA17 = DecisionTreeClassifier(random_state=50, max_depth = 20, max_features = "sqrt", criterion = "entropy")
clfA18 = DecisionTreeClassifier(random_state=30, max_depth = 10, max_features = "log2")
clfA19 = DecisionTreeClassifier(random_state=4, max_depth = 15, max_features = 0.5, min_samples_split = 3)
clfA20 = DecisionTreeClassifier(random_state=19, max_depth = 25, splitter = "random", criterion = "entropy")


#Método B
clfB1 = KNeighborsClassifier(n_neighbors=7)
clfB2 = KNeighborsClassifier(n_neighbors=5, weights = "distance")
clfB3 = KNeighborsClassifier(n_neighbors=4, weights = "distance")
clfB4 = KNeighborsClassifier(n_neighbors=8)
clfB5 = KNeighborsClassifier(n_neighbors=7, metric = "minkowski", p = 1)
clfB6 = KNeighborsClassifier(n_neighbors=4, algorithm = "ball_tree")
clfB7 = KNeighborsClassifier(n_neighbors=3, algorithm = "brute")
clfB8 = KNeighborsClassifier(n_neighbors=7, algorithm = "kd_tree", leaf_size = 50)
clfB9 = KNeighborsClassifier(n_neighbors=5, algorithm = "kd_tree")
clfB10 = KNeighborsClassifier(n_neighbors=7, algorithm = "brute")
clfB11 = KNeighborsClassifier(n_neighbors=3)
clfB12 = KNeighborsClassifier(n_neighbors=3, weights = "distance")
clfB13 = KNeighborsClassifier(n_neighbors=4, weights = "distance", metric = "minkowski", p = 1)
clfB14 = KNeighborsClassifier(n_neighbors=8, algorithm = "brute")
clfB15 = KNeighborsClassifier(n_neighbors=7, metric = "minkowski", p = 1)
clfB16 = KNeighborsClassifier(n_neighbors=7, algorithm = "ball_tree")
clfB17 = KNeighborsClassifier(n_neighbors=6, algorithm = "ball_tree", leaf_size = 20)
clfB18 = KNeighborsClassifier(n_neighbors=7, algorithm = "kd_tree", leaf_size = 50, metric = "minkowski", p = 1)
clfB19 = KNeighborsClassifier(n_neighbors=5, algorithm = "kd_tree", metric = "minkowski", p = 1)
clfB20 = KNeighborsClassifier(n_neighbors=7, algorithm = "brute")

#Método C
clfC1 = MLPClassifier(hidden_layer_sizes=(20,), max_iter=100, alpha=1e-4,
                    solver='sgd', verbose=False, tol=1e-4, random_state=42, momentum = 0.8, 
                    validation_fraction = 0.3, early_stopping = True, n_iter_no_change = 10,
                    learning_rate_init=.1)
clfC2 = MLPClassifier(hidden_layer_sizes=(10,), max_iter=100, alpha=1e-4,
                    solver='sgd', verbose=False, tol=1e-4, random_state=42, momentum = 0.8, 
                    validation_fraction = 0.3, early_stopping = True, n_iter_no_change = 20,
                    learning_rate_init=.01)
clfC3 = MLPClassifier(hidden_layer_sizes=(30,), max_iter=100, alpha=1e-4,
                    solver='sgd', verbose=False, tol=1e-4, random_state=42, momentum = 0.8, 
                    validation_fraction = 0.3, early_stopping = True, n_iter_no_change = 25,
                    learning_rate_init=.1)
clfC4 = MLPClassifier(hidden_layer_sizes=(20,), max_iter=200, alpha=1e-4,
                    solver='sgd', verbose=False, tol=1e-4, random_state=42, momentum = 0.8, 
                    validation_fraction = 0.3, early_stopping = True, n_iter_no_change = 10,
                    learning_rate_init=.01)
clfC5 = MLPClassifier(hidden_layer_sizes=(25,), max_iter=100, alpha=1e-4,
                    solver='sgd', verbose=False, tol=1e-4, random_state=42, momentum = 0.8, 
                    validation_fraction = 0.3, early_stopping = True, n_iter_no_change = 10,
                    learning_rate_init=.01)
clfC6 = MLPClassifier(hidden_layer_sizes=(50,), max_iter=150, alpha=1e-4,
                    solver='sgd', verbose=False, tol=1e-4, random_state=42, momentum = 0.8, 
                    validation_fraction = 0.2, early_stopping = True, n_iter_no_change = 40,
                    learning_rate_init=.01)
clfC7 = MLPClassifier(hidden_layer_sizes=(30,), max_iter=100, alpha=1e-4,
                    solver='sgd', verbose=False, tol=1e-5, random_state=42, momentum = 0.8, 
                    validation_fraction = 0.2, early_stopping = True, n_iter_no_change = 30,
                    learning_rate_init=.1)
clfC8 = MLPClassifier(hidden_layer_sizes=(15,), max_iter=150, alpha=1e-4,
                    solver='sgd', verbose=False, tol=1e-4, random_state=42, momentum = 0.8, 
                    validation_fraction = 0.3, early_stopping = False,
                    learning_rate_init=.01)
clfC9 = MLPClassifier(hidden_layer_sizes=(20,), max_iter=100, alpha=1e-4,
                    solver='sgd', verbose=False, tol=1e-4, random_state=42, momentum = 0.8, 
                    validation_fraction = 0.3, early_stopping = False,
                    learning_rate_init=.1)
clfC10 = MLPClassifier(hidden_layer_sizes=(40,), max_iter=100, alpha=1e-4,
                    solver='sgd', verbose=False, tol=1e-4, random_state=42, momentum = 0.8, 
                    validation_fraction = 0.2, early_stopping = False,
                    learning_rate_init=.1)
clfC11 = MLPClassifier(hidden_layer_sizes=(5,), max_iter=100, alpha=1e-4,
                    solver='sgd', verbose=False, tol=1e-4, random_state=42, momentum = 0.8, 
                    validation_fraction = 0.4, early_stopping = True, n_iter_no_change = 10,
                    learning_rate_init=.1)
clfC12 = MLPClassifier(hidden_layer_sizes=(10,), max_iter=50, alpha=1e-4,
                    solver='sgd', verbose=False, tol=1e-4, random_state=42, momentum = 0.8, 
                    validation_fraction = 0.4, early_stopping = True, n_iter_no_change = 20,
                    learning_rate_init=.1)
clfC13 = MLPClassifier(hidden_layer_sizes=(30,), max_iter=150, alpha=1e-4,
                    solver='sgd', verbose=False, tol=1e-4, random_state=42, momentum = 0.8, 
                    validation_fraction = 0.5, early_stopping = True, n_iter_no_change = 25,
                    learning_rate_init=.01)
clfC14 = MLPClassifier(hidden_layer_sizes=(20,), max_iter=200, alpha=1e-4,
                    solver='sgd', verbose=False, tol=1e-4, random_state=42, momentum = 0.8, 
                    validation_fraction = 0.4, early_stopping = True, n_iter_no_change = 10,
                    learning_rate_init=.01)
clfC15 = MLPClassifier(hidden_layer_sizes=(25,), max_iter=75, alpha=1e-4,
                    solver='sgd', verbose=False, tol=1e-4, random_state=42, momentum = 0.8, 
                    validation_fraction = 0.2, early_stopping = False, n_iter_no_change = 10,
                    learning_rate_init=.1)
clfC16 = MLPClassifier(hidden_layer_sizes=(50,), max_iter=200, alpha=1e-4,
                    solver='sgd', verbose=False, tol=1e-4, random_state=42, momentum = 0.8, 
                    validation_fraction = 0.3, early_stopping = True, n_iter_no_change = 40,
                    learning_rate_init=.01)
clfC17 = MLPClassifier(hidden_layer_sizes=(30,), max_iter=100, alpha=1e-4,
                    solver='sgd', verbose=False, tol=1e-5, random_state=42, momentum = 0.8, 
                    validation_fraction = 0.2, early_stopping = True, n_iter_no_change = 30,
                    learning_rate_init=.01)
clfC18 = MLPClassifier(hidden_layer_sizes=(15,), max_iter=150, alpha=1e-4,
                    solver='sgd', verbose=False, tol=1e-4, random_state=42, momentum = 0.8, 
                    validation_fraction = 0.3, early_stopping = False,
                    learning_rate_init=.01)
clfC19 = MLPClassifier(hidden_layer_sizes=(5,), max_iter=100, alpha=1e-4,
                    solver='sgd', verbose=False, tol=1e-4, random_state=42, momentum = 0.8, 
                    validation_fraction = 0.3, early_stopping = False,
                    learning_rate_init=.1)
clfC20 = MLPClassifier(hidden_layer_sizes=(5,), max_iter=100, alpha=1e-4,
                    solver='sgd', verbose=False, tol=1e-4, random_state=42, momentum = 0.8, 
                    validation_fraction = 0.3, early_stopping = False,
                    learning_rate_init=.01)

clfD1 = GaussianNB(priors=None, var_smoothing=1e-1)
clfD2 = GaussianNB(priors=None, var_smoothing=1e-1.4)
clfD3 = GaussianNB(priors=None, var_smoothing=1e-1.8)
clfD4 = GaussianNB(priors=None, var_smoothing=1e-2.2)
clfD5 = GaussianNB(priors=None, var_smoothing=1e-2.6)
clfD6 = GaussianNB(priors=None, var_smoothing=1e-3)
clfD7 = GaussianNB(priors=None, var_smoothing=1e-3.4)
clfD8 = GaussianNB(priors=None, var_smoothing=1e-3.8)
clfD9 = GaussianNB(priors=None, var_smoothing=1e-4.2)
clfD10 = GaussianNB(priors=None, var_smoothing=1e-4.6)
clfD11 = GaussianNB(priors=None, var_smoothing=1e-5)
clfD12 = GaussianNB(priors=None, var_smoothing=1e-5.4)
clfD13 = GaussianNB(priors=None, var_smoothing=1e-5.8)
clfD14 = GaussianNB(priors=None, var_smoothing=1e-6.2)
clfD15 = GaussianNB(priors=None, var_smoothing=1e-6.6)
clfD16 = GaussianNB(priors=None, var_smoothing=1e-7)
clfD17 = GaussianNB(priors=None, var_smoothing=1e-7.4)
clfD18 = GaussianNB(priors=None, var_smoothing=1e-7.8)
clfD19 = GaussianNB(priors=None, var_smoothing=1e-8.2)
clfD20 = GaussianNB(priors=None, var_smoothing=1e-8.6)


#Meta-classificador
xgb = XGBClassifier(random_state=0, n_jobs=-1, learning_rate=0.1, 
                      n_estimators=100, max_depth=3)

SyntaxError: ignored

### Stacking Homogêneo: *AD* - - 10

In [0]:
sclf = StackingClassifier(classifiers=[clfA1, clfA2, clfA3, clfA4, clfA5, clfA6, clfA7, clfA8, clfA9, clfA10], 
                          meta_classifier=xgb)

In [0]:
scores = cross_val_score(sclf, X_resampled, target, 
                                              cv=5, scoring='accuracy')


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [0]:
print("Accuracy: %f (+/- %f)" 
          % (scores.mean(), scores.std()))

Accuracy: 0.924562 (+/- 0.001365)


### Stacking Homogêneo: *AD* - - 15

In [0]:
sclf = StackingClassifier(classifiers=[clfA1, clfA2, clfA3, clfA4, clfA5, clfA6, 
                                       clfA7, clfA8, clfA9, clfA10, clfA11, clfA12, clfA13, clfA14, clfA15], 
                          meta_classifier=xgb)

In [0]:
scores = cross_val_score(sclf, X_resampled, target, 
                                              cv=5, scoring='accuracy')


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [0]:
print("Accuracy: %f (+/- %f)" 
          % (scores.mean(), scores.std()))

Accuracy: 0.923885 (+/- 0.005092)


### Stacking Homogêneo: *AD* - - 20

In [0]:
sclf = StackingClassifier(classifiers=[clfA1, clfA2, clfA3, clfA4, clfA5, clfA6, 
                                       clfA7, clfA8, clfA9, clfA10, clfA11, clfA12, 
                                       clfA13, clfA14, clfA15, clfA16, clfA17, clfA18, clfA19, clfA20], 
                          meta_classifier=xgb)

In [0]:
scores = cross_val_score(sclf, X_resampled, target, 
                                              cv=5, scoring='accuracy')

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [0]:
print("Accuracy: %f (+/- %f)" 
          % (scores.mean(), scores.std()))

Accuracy: 0.924421 (+/- 0.001138)


### Stacking Homogêneo: *KNN* - - 10

In [0]:
sclf = StackingClassifier(classifiers=[clfB1, clfB2, clfB3, clfB4, clfB5, clfB6, 
                                       clfB7, clfB8, clfB9, clfB10], 
                          meta_classifier=xgb)

In [0]:
scores = cross_val_score(sclf, X_resampled, target, 
                                              cv=5, scoring='accuracy')

  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)


In [0]:
print("Accuracy: %f (+/- %f)" 
          % (scores.mean(), scores.std()))

Accuracy: 0.607679 (+/- 0.005384)


### Stacking Homogêneo: *KNN* - - 15

In [0]:
sclf = StackingClassifier(classifiers=[clfB1, clfB2, clfB3, clfB4, clfB5, clfB6, 
                                       clfB7, clfB8, clfB9, clfB10, clfB11, clfB12, 
                                       clfB13, clfB14, clfB15], 
                          meta_classifier=xgb)

In [0]:
scores = cross_val_score(sclf, X_resampled, target, 
                                              cv=5, scoring='accuracy')

  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  

In [0]:
print("Accuracy: %f (+/- %f)" 
          % (scores.mean(), scores.std()))

Accuracy: 0.610841 (+/- 0.003160)


### Stacking Homogêneo: *KNN* - - 20

In [0]:
sclf = StackingClassifier(classifiers=[clfB1, clfB2, clfB3, clfB4, clfB5, clfB6, 
                                       clfB7, clfB8, clfB9, clfB10, clfB11, clfB12, 
                                       clfB13, clfB14, clfB15, clfB16, clfB17, clfB18, clfB19, clfB20], 
                          meta_classifier=xgb)

In [0]:
scores = cross_val_score(sclf, X_resampled, target, 
                                              cv=5, scoring='accuracy')

  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  clf.fit(X, y)
  cl

In [0]:
print("Accuracy: %f (+/- %f)" 
          % (scores.mean(), scores.std()))

Accuracy: 0.610841 (+/- 0.003160)


### Stacking Homogêneo: *MLP* - - 10

In [0]:
sclf = StackingClassifier(classifiers=[clfC1, clfC2, clfC3, clfC4, clfC5, clfC6, 
                                       clfC7, clfC8, clfC9, clfC10], 
                          meta_classifier=xgb)

In [0]:
scores = cross_val_score(sclf, X_resampled, target, 
                                              cv=3, scoring='accuracy')

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = colu

In [0]:
print("Accuracy: %f (+/- %f)" 
          % (scores.mean(), scores.std()))

Accuracy: 0.135094 (+/- 0.002494)


### Stacking Homogêneo: *MLP* - - 15

In [0]:
sclf = StackingClassifier(classifiers=[clfC1, clfC2, clfC3, clfC4, clfC5, clfC6, 
                                       clfC7, clfC8, clfC9, clfC10, clfC11, clfC12, 
                                       clfC13, clfC14, clfC15], 
                          meta_classifier=xgb)

In [0]:
scores = cross_val_score(sclf, X_resampled, target, 
                                              cv=5, scoring='accuracy')

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = colu

In [0]:
print("Accuracy: %f (+/- %f)" 
          % (scores.mean(), scores.std()))

Accuracy: 0.170271 (+/- 0.028613)


### Stacking Homogêneo: *MLP* - - 20

In [0]:
sclf = StackingClassifier(classifiers=[clfC1, clfC2, clfC3, clfC4, clfC5, clfC6, 
                                       clfC7, clfC8, clfC9, clfC10, clfC11, clfC12, 
                                       clfC13, clfC14, clfC15, clfC16, clfC17, clfC18, clfC19, clfC20], 
                          meta_classifier=xgb)

In [0]:
scores = cross_val_score(sclf, X_resampled, target, 
                                              cv=5, scoring='accuracy')

In [0]:
print("Accuracy: %f (+/- %f)" 
          % (scores.mean(), scores.std()))

Accuracy: 0.177132 (+/- 0.030947)


### Stacking Homogêneo: *NB* - - 10

In [0]:
sclf = StackingClassifier(classifiers=[clfD1, clfD2, clfD3, clfD4, clfD5, clfD6, 
                                       clfD7, clfD8, clfD9, clfD10], 
                          meta_classifier=xgb)

In [0]:
scores = cross_val_score(sclf, X_resampled, target, 
                                              cv=5, scoring='accuracy')

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = colu

In [0]:
print("Accuracy: %f (+/- %f)" 
          % (scores.mean(), scores.std()))

Accuracy: 0.247826 (+/- 0.021182)


### Stacking Homogêneo: *NB* - - 15

In [0]:
sclf = StackingClassifier(classifiers=[clfD1, clfD2, clfD3, clfD4, clfD5, clfD6, 
                                       clfD7, clfD8, clfD9, clfD10, clfD11, clfD12, 
                                       clfD13, clfD14, clfD15], 
                          meta_classifier=xgb)

In [0]:
scores = cross_val_score(sclf, X_resampled, target, 
                                              cv=5, scoring='accuracy')

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = colu

In [0]:
print("Accuracy: %f (+/- %f)" 
          % (scores.mean(), scores.std()))

Accuracy: 0.673715 (+/- 0.005278)


### Stacking Homogêneo: *NB* - - 20

In [0]:
sclf = StackingClassifier(classifiers=[clfD1, clfD2, clfD3, clfD4, clfD5, clfD6, 
                                       clfD7, clfD8, clfD9, clfD10, clfD11, clfD12, 
                                       clfD13, clfD14, clfD15, clfD16, clfD17, clfD18, clfD19, clfD20], 
                          meta_classifier=xgb)

In [0]:
scores = cross_val_score(sclf, X_resampled, target, 
                                              cv=5, scoring='accuracy')

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = colu

In [0]:
print("Accuracy: %f (+/- %f)" 
          % (scores.mean(), scores.std()))

Accuracy: 0.674478 (+/- 0.004297)
