In [12]:
%pip install imblearn

Note: you may need to restart the kernel to use updated packages.


## expand a labeled dataset to SMOTE

magnify the number of minority/exceptional cases within the sequence dataset, ideally targets the binary binned dataset.

[reference 1](<ver5-ordinal-binning-grid-searches/step 2-0, ranged clustering, with time.ipynb>)

different oversampling tools: Naive random oversampling, SMOTE, ADASYN, SMOTENC

In [31]:
from imblearn.over_sampling import RandomOverSampler, SMOTE, ADASYN, BorderlineSMOTE, SVMSMOTE,SMOTENC, KMeansSMOTE
from imblearn.combine import SMOTEENN, SMOTETomek
from collections import Counter
import pandas as pd 

from sklearn.metrics import balanced_accuracy_score
from imblearn.ensemble import BalancedBaggingClassifier,BalancedRandomForestClassifier,RUSBoostClassifier

from sklearn.neighbors import KNeighborsClassifier

from sklearn.svm import SVC
from sklearn.ensemble import BaggingClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler

from sklearn.ensemble import HistGradientBoostingClassifier, GradientBoostingClassifier, RandomForestClassifier, \
ExtraTreesClassifier, RandomTreesEmbedding, BaggingClassifier
from sklearn.linear_model import RidgeCV, LassoCV
from sklearn.neighbors import KNeighborsClassifier

from sklearn.model_selection import train_test_split
from sklearn.metrics import multilabel_confusion_matrix
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import classification_report
from scipy import stats

from sklearn.tree import DecisionTreeClassifier
#import shap
import pandas as pd
import time
from sklearn.inspection import permutation_importance
import matplotlib.pyplot as plt
import seaborn as sns
#import altair as alt

from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import BernoulliNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import  VotingClassifier 
from sklearn.model_selection import cross_val_score


In [32]:
m2_pipeline = pd.read_csv("binary_binned_pipeline.csv")

In [33]:
m2_pipeline.columns #do this to identify the index of the categorical feature, for below setup

Index(['Unnamed: 0', 'precursor_buy_cap_pct_change',
       'precursor_ask_cap_pct_change', 'precursor_bid_vol_pct_change',
       'precursor_ask_vol_pct_change', 'sum_change', 'length',
       'surge_targets_met_pct', 'time', 'label'],
      dtype='object')

In [34]:
keepable = ['precursor_buy_cap_pct_change', 
            'precursor_ask_cap_pct_change',
            'precursor_bid_vol_pct_change', 
            'precursor_ask_vol_pct_change',
            'sum_change','length','time']

In [35]:
y = m2_pipeline['label'] #per https://stackoverflow.com/a/73095562/12001832
X = m2_pipeline[keepable]
# Performing the test/train split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#normalize all numeric columns
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
    

In [37]:
X

Unnamed: 0,precursor_buy_cap_pct_change,precursor_ask_cap_pct_change,precursor_bid_vol_pct_change,precursor_ask_vol_pct_change,sum_change,length,time
0,-0.000798,0.000046,-0.000173,0.006228,-0.004876,6,1.660222e+12
1,-0.003129,-0.000032,-0.001081,-0.004203,-0.000134,1,1.660222e+12
2,0.000440,-0.000007,0.000294,-0.000717,-0.004013,3,1.660222e+12
3,-0.003818,0.000013,-0.001505,0.001104,-0.000300,2,1.660222e+12
4,0.009044,0.000025,0.004399,0.007863,-0.013463,7,1.660223e+12
...,...,...,...,...,...,...,...
5821,-0.127842,0.226017,0.815279,0.144134,-0.045466,5,1.693073e+12
5822,-0.003810,0.000425,-0.000934,0.002042,-0.000498,5,1.693073e+12
5823,0.003263,0.000408,0.000792,0.001959,-0.000598,6,1.693073e+12
5824,0.004464,0.000645,0.001073,0.003142,-0.000996,11,1.693074e+12


In [38]:
#set up toolsets as functions to build separate datasets, bin_Naive, bin_SMOTE, bin_ADASYN

def build_naive():  #https://imbalanced-learn.org/stable/over_sampling.html#naive-random-over-sampling
    ros = RandomOverSampler(random_state=42, sampling_strategy='minority')
    X_resampled, y_resampled = ros.fit_resample(X, y)
    print("ROS",sorted(Counter(y_resampled).items()))
    return X_resampled, y_resampled

def build_smote(): #https://imbalanced-learn.org/stable/references/generated/imblearn.over_sampling.SMOTE.html#smote
    X_resampled, y_resampled = SMOTE(random_state=42 ).fit_resample(X, y)
    print("SMOTE",sorted(Counter(y_resampled).items()))
    return X_resampled, y_resampled

def build_adasyn(): #https://imbalanced-learn.org/stable/references/generated/imblearn.over_sampling.ADASYN.html#adasyn
    X_resampled, y_resampled = ADASYN(random_state=42,sampling_strategy='minority').fit_resample(X, y)
    print("ADASYN",sorted(Counter(y_resampled).items()))
    return X_resampled, y_resampled

def build_borderline(): #https://imbalanced-learn.org/stable/references/generated/imblearn.over_sampling.BorderlineSMOTE.html#borderlinesmote
    X_resampled, y_resampled = BorderlineSMOTE(random_state=42,sampling_strategy='minority').fit_resample(X, y)
    print("BORDERLINE",sorted(Counter(y_resampled).items())) 
    return X_resampled, y_resampled

def build_smotenc(): #https://imbalanced-learn.org/stable/references/generated/imblearn.over_sampling.SMOTENC.html#smotenc
    smote_nc = SMOTENC( random_state=42,sampling_strategy='minority')
    X_resampled, y_resampled = smote_nc.fit_resample(X, y)
    print("SMOTENC",sorted(Counter(y_resampled).items()))
    return X_resampled, y_resampled

def build_svmsmote():
    sm = SVMSMOTE(random_state=42)
    X_res, y_res = sm.fit_resample(X, y)
    print("SVMSMOTE",'Resampled dataset shape %s' % Counter(y_res))
    return X_res, y_res

def build_kmsmote(): #https://imbalanced-learn.org/stable/combine.html#combination-of-over-and-under-sampling
    m = KMeansSMOTE( random_state=42,sampling_strategy='minority')
    X_res, y_res = m.fit_resample(X, y)
    # Find the number of new samples in the middle blob
    n_res_in_middle = ((X_res[:, 0] > -5) & (X_res[:, 0] < 5)).sum()
    print("KMSMOTE","Samples in the middle blob: %s" % n_res_in_middle)
    return X_res, y_res

def build_smoteenn():
    smote_enn = SMOTEENN(random_state=42,sampling_strategy='minority')
    X_resampled, y_resampled = smote_enn.fit_resample(X, y)
    print("SMOTEENN",sorted(Counter(y_resampled).items()))
    return X_resampled, y_resampled

def build_smotetomek():
    smote_tomek = SMOTETomek(random_state=42,sampling_strategy='minority')
    X_resampled, y_resampled = smote_tomek.fit_resample(X, y)
    print("SMOTETOMEK",sorted(Counter(y_resampled).items()))
    return X_resampled, y_resampled

### take optimal classifier parameters and technique

In [20]:
def getBestClassifier(oversampler, dataset):   #'kmsmote', build_svmsmote(source)
    classifiers = [  # Define the classifiers and their respective hyperparameters
        LogisticRegression(),
        BernoulliNB(),
        KNeighborsClassifier(),
    ]
    params = {
        'LogisticRegression':{'C': [0.1, 1, 10], 'penalty':['l1','l2','elasticnet','None'], 'multi_class':['ovr','auto'],\
                              'random_state':[42]},
        'BernoulliNB':{'fit_prior':[True, False]},
        'KNeighborsClassifier':{'n_neighbors':[3,4,5,6,7,8], 'algorithm':['auto'], 'n_jobs':[1,2,3,4]}
    }
    comparative = []
    # Perform the grid search
    for clf in classifiers:
        name = clf.__class__.__name__
        if name in params:
            grid_search = GridSearchCV(clf, params[name], cv=5)
            grid_search.fit(X_train_scaled, y_train)
            
            print(f"Best parameters for {name}: {grid_search.best_params_}")
            accuracy = grid_search.score(X_test, y_test)
            
            dict = {"classifier":name, "best_params":grid_search.best_params_, "accuracy":accuracy, "oversampler":oversampler}
            comparative.append(dict)
    return(comparative)

In [21]:
samplers = [build_naive(),build_smote(),build_adasyn(), build_borderline(), build_svmsmote(), build_smoteenn(),build_smotetomek()]
samplers_string = ["build_naive","build_smote","build_adasyn", "build_borderline", "build_svmsmote", "build_smoteenn","build_smotetomek"]
samplers_tup = zip(samplers_string,samplers)

ROS [(0, 5716), (1, 5716)]
SMOTE [(0, 5716), (1, 5716)]
ADASYN [(0, 5716), (1, 5701)]
BORDERLINE [(0, 5716), (1, 5716)]
SVMSMOTE Resampled dataset shape Counter({0: 5716, 1: 3193})
SMOTEENN [(0, 2972), (1, 2334)]
SMOTETOMEK [(0, 4377), (1, 4377)]


In [39]:
# n_clusters = 2
from sklearn.cluster import KMeans
import numpy as np
from sklearn.metrics import silhouette_score, silhouette_samples
def silhouette_scorer(estimator, X, y=None):
    labels = estimator.fit_predict(X)
    score = silhouette_score(X, labels)
    return score

In [11]:
for pair in samplers_tup:
    sampler_string, sampler = pair
    X_sampled, y_sampled = sampler
    #normalize all numeric columns
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_sampled)
    clf = KMeans(n_clusters =2, init='k-means++', random_state = 42)
    labels = clf.fit_predict(X_train_scaled)

    df = pd.DataFrame({'y_sampled': y_sampled, 'Cluster_Label': labels})
    
    # Calculate the ratio of the larger count of labels (0 or 1) to the total count in each cluster
    cluster_ratios = df.groupby('Cluster_Label')['y_sampled'].value_counts().unstack().fillna(0)
    cluster_max_ratio = cluster_ratios.max(axis=1)
    cluster_total_count = cluster_ratios.sum(axis=1)
    cluster_profit_scores = cluster_max_ratio / cluster_total_count
    
    # Calculate the average profit score across clusters
    average_profit_score = np.mean(cluster_profit_scores)
    sihlouette_score = silhouette_score(X_train_scaled, labels)
    print(f"{str(sampler_string)} sihlouette score is {sihlouette_score} and profit score is {average_profit_score}.")

build_naive sihlouette score is 0.6843542974684024 and profit score is 0.690628255221736.
build_smote sihlouette score is 0.7934490264209996 and profit score is 0.7154194361373594.
build_adasyn sihlouette score is 0.7303924816391958 and profit score is 0.6832464734271906.
build_borderline sihlouette score is 0.5941545457892622 and profit score is 0.6739884023965783.
build_svmsmote sihlouette score is 0.6278030339457474 and profit score is 0.6982836479341944.
build_smoteenn sihlouette score is 0.6308211271722189 and profit score is 0.6726724663335935.
build_smotetomek sihlouette score is 0.7815931034255095 and profit score is 0.6582383332931642.


In [51]:
samplers = [build_naive,build_smote,build_adasyn,build_borderline,build_svmsmote,build_smoteenn,build_smotetomek]
samplers_string = ["build_naive","build_smote","build_adasyn", "build_borderline", "build_svmsmote", "build_smoteenn","build_smotetomek"]
sampler_dict = {k: v for k, v in zip(samplers_string, samplers )}

In [53]:
sampler_dict

{'build_naive': <function __main__.build_naive()>,
 'build_smote': <function __main__.build_smote()>,
 'build_adasyn': <function __main__.build_adasyn()>,
 'build_borderline': <function __main__.build_borderline()>,
 'build_svmsmote': <function __main__.build_svmsmote()>,
 'build_smoteenn': <function __main__.build_smoteenn()>,
 'build_smotetomek': <function __main__.build_smotetomek()>}

In [43]:
# n_clusters = 10
for pair in samplers_tup:
    sampler_string, sampler = pair
    X_sampled, y_sampled = sampler
    #normalize all numeric columns
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_sampled)
    clf = KMeans(n_clusters =4, init='k-means++', random_state = 42)
    labels = clf.fit_predict(X_train_scaled)

    df = pd.DataFrame({'y_sampled': y_sampled, 'Cluster_Label': labels})
    
    # Calculate the ratio of the larger count of labels (0 or 1) to the total count in each cluster
    cluster_ratios = df.groupby('Cluster_Label')['y_sampled'].value_counts().unstack().fillna(0)
    cluster_max_ratio = cluster_ratios.max(axis=1)
    cluster_total_count = cluster_ratios.sum(axis=1)
    cluster_profit_scores = cluster_max_ratio / cluster_total_count
    
    # Calculate the average profit score across clusters
    average_profit_score = np.mean(cluster_profit_scores)
    sihlouette_score = silhouette_score(X_train_scaled, labels)
    print(f"{str(sampler_string)} sihlouette score is {sihlouette_score} and profit score is {average_profit_score}.")


build_naive sihlouette score is 0.5589780011241178 and profit score is 0.7272286121639467.
build_smote sihlouette score is 0.295312906627782 and profit score is 0.7206832500545338.
build_adasyn sihlouette score is 0.289928919274235 and profit score is 0.7219462502320038.
build_borderline sihlouette score is 0.29103835498727487 and profit score is 0.71706226287071.
build_svmsmote sihlouette score is 0.2922209036457123 and profit score is 0.7312030720456777.
build_smoteenn sihlouette score is 0.296901224155592 and profit score is 0.7325067190264691.
build_smotetomek sihlouette score is 0.3008564059891155 and profit score is 0.7264925722739589.


In [55]:
samplers = [build_naive,build_smote,build_adasyn,build_borderline,build_svmsmote,build_smoteenn,build_smotetomek]
samplers_string = ["build_naive","build_smote","build_adasyn", "build_borderline", "build_svmsmote", "build_smoteenn","build_smotetomek"]
sampler_dict = {k: v for k, v in zip(samplers_string, samplers )}
n_clusters = [2,4,6,8,10,12]
for clus in n_clusters:
    print(f"N_clusters is equal to {clus}.")
    for key, value in sampler_dict.items():
        X_sampled, y_sampled = value()
        #normalize all numeric columns
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_sampled)
        clf = KMeans(n_clusters =clus, init='k-means++', random_state = 42)
        labels = clf.fit_predict(X_train_scaled)

        df = pd.DataFrame({'y_sampled': y_sampled, 'Cluster_Label': labels})

        # Calculate the ratio of the larger count of labels (0 or 1) to the total count in each cluster
        cluster_ratios = df.groupby('Cluster_Label')['y_sampled'].value_counts().unstack().fillna(0)
        cluster_max_ratio = cluster_ratios.max(axis=1)
        cluster_total_count = cluster_ratios.sum(axis=1)
        cluster_profit_scores = cluster_max_ratio / cluster_total_count

        # Calculate the average profit score across clusters
        average_profit_score = np.mean(cluster_profit_scores)
        silhouette_score_1 = silhouette_score(X_train_scaled, labels)
        print(f"{str(key)} sihlouette score is {silhouette_score_1} and profit score is {average_profit_score}.")

N_clusters is equal to 2.
ROS [(0, 5716), (1, 5716)]
build_naive sihlouette score is 0.6843542974684024 and profit score is 0.690628255221736.
SMOTE [(0, 5716), (1, 5716)]
build_smote sihlouette score is 0.7934490264209996 and profit score is 0.7154194361373594.
ADASYN [(0, 5716), (1, 5701)]
build_adasyn sihlouette score is 0.7303924816391958 and profit score is 0.6832464734271906.
BORDERLINE [(0, 5716), (1, 5716)]
build_borderline sihlouette score is 0.5941545457892622 and profit score is 0.6739884023965783.
SVMSMOTE Resampled dataset shape Counter({0: 5716, 1: 3193})
build_svmsmote sihlouette score is 0.6278030339457474 and profit score is 0.6982836479341944.
SMOTEENN [(0, 2972), (1, 2334)]
build_smoteenn sihlouette score is 0.6308211271722189 and profit score is 0.6726724663335935.
SMOTETOMEK [(0, 4377), (1, 4377)]
build_smotetomek sihlouette score is 0.7815931034255095 and profit score is 0.6582383332931642.
N_clusters is equal to 4.
ROS [(0, 5716), (1, 5716)]
build_naive sihlouett

In [56]:
X_sampled, y_sampled = build_smote()
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_sampled)
clf = KMeans(n_clusters =2, init='k-means++', random_state = 42)
labels = clf.fit_predict(X_train_scaled)

SMOTE [(0, 5716), (1, 5716)]


In [58]:
X_sampled

Unnamed: 0,precursor_buy_cap_pct_change,precursor_ask_cap_pct_change,precursor_bid_vol_pct_change,precursor_ask_vol_pct_change,sum_change,length,time
0,-0.000798,0.000046,-0.000173,0.006228,-0.004876,6,1.660222e+12
1,-0.003129,-0.000032,-0.001081,-0.004203,-0.000134,1,1.660222e+12
2,0.000440,-0.000007,0.000294,-0.000717,-0.004013,3,1.660222e+12
3,-0.003818,0.000013,-0.001505,0.001104,-0.000300,2,1.660222e+12
4,0.009044,0.000025,0.004399,0.007863,-0.013463,7,1.660223e+12
...,...,...,...,...,...,...,...
11427,0.041228,0.000184,0.014091,0.008272,-0.008161,2,1.663482e+12
11428,0.048216,-0.001025,0.007441,-0.003788,-0.012447,4,1.679312e+12
11429,-0.021210,0.040257,0.026512,0.114004,-0.034555,5,1.686772e+12
11430,0.128101,0.000938,0.039594,0.155690,-0.004880,1,1.660483e+12


In [59]:
df = pd.concat([X_sampled.reset_index(drop=True), pd.Series(labels, name='Cluster_Label')], axis=1)

In [64]:
df = pd.concat([df, y_sampled], axis =1)

In [66]:
df.to_csv("binary_clustered_resampled_pipeline.csv")

In [19]:
from sklearn.cluster import KMeans
for pair in samplers_tup:
    sampler_string, sampler = pair
    X_sampled, y_sampled = sampler
    #normalize all numeric columns
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_sampled)
    clf = KMeans(n_clusters =2, init='k-means++', random_state = 42)
    labels = clf.fit_predict(X_train_scaled)
    df = pd.concat([y_sampled.reset_index(drop=True), pd.Series(labels, name='Cluster_Label')], axis=1)
    cluster_means = df.groupby('Cluster_Label')['label'].mean()
    score = cluster_means.std() 
    sihlouette_score = silhouette_score(X_train_scaled, labels)
    print(f"{sampler_string} sihlouette score is {sihlouette_score} and profit score is {score}.")

## trigger the search mechanism, per SMOTE method

loop through each oversampled dataset, then run the classifier search on that set, outlining each set, before hand

for each set, run the classifier search

In [20]:

source = 'binary_binned_pipeline.csv'
resultSet = []
resultSet.append(getBestClassifier('naive', build_naive()))  #many rows
resultSet.append(getBestClassifier('smote', build_smote()))
resultSet.append(getBestClassifier('adasyn', build_adasyn()))
resultSet.append(getBestClassifier('borderline', build_borderline()))
# resultSet.append(getBestClassifier('smotenc', build_smotenc()))
resultSet.append(getBestClassifier('svmsmote', build_svmsmote()))
# resultSet.append(getBestClassifier('kmsmote', build_kmsmote()))
resultSet.append(getBestClassifier('smoteenn', build_smoteenn()))
resultSet.append(getBestClassifier('smotetomek', build_smotetomek()))

optimals = pd.DataFrame(resultSet)

ROS [(0, 5716), (1, 5716)]


ValueError: Found input variables with inconsistent numbers of samples: [8754, 4660]

In [22]:
optimals

NameError: name 'optimals' is not defined

In [23]:
print_dict = lambda x: print(x)

# Apply the lambda function to each row of the data frame
optimals.applymap(print_dict)

NameError: name 'optimals' is not defined

[pitfalls of oversampling](https://imbalanced-learn.org/stable/common_pitfalls.html#data-leakage)

## sampling based ensemble methods

score each, can you combine into a voter?

[validation curve model selection](https://imbalanced-learn.org/stable/auto_examples/model_selection/plot_validation_curve.html#plotting-validation-curves)

In [24]:
# balanced bagger

bbc = BalancedBaggingClassifier(base_estimator=LogisticRegression(),
                                sampling_strategy='auto',
                                replacement=False,
                                random_state=42)
bbc.fit(X_train_scaled, y_train)
y_pred = bbc.predict(X_test)
balanced_accuracy_score(y_test, y_pred)

ValueError: Found input variables with inconsistent numbers of samples: [8754, 4660]

In [54]:
#balanced tree estimator
brf = BalancedRandomForestClassifier(
    n_estimators=100, random_state=42, sampling_strategy="all", replacement=True
)
brf.fit(X_train_scaled, y_train)
y_pred = brf.predict(X_test)
balanced_accuracy_score(y_test, y_pred)

0.5864746945898779

In [60]:
rusboost = RUSBoostClassifier(n_estimators=200, algorithm='SAMME.R',
                              random_state=42)
rusboost.fit(X_train_scaled, y_train)
y_pred = rusboost.predict(X_test)
balanced_accuracy_score(y_test, y_pred)

0.5

In [59]:

X = m2_pipeline[keepable].values  #.drop(columns=['label']).values #per https://stackoverflow.com/a/73095562/12001832
y = m2_pipeline['label'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#normalize all numeric columns
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
vc = VotingClassifier(estimators=[('bbc',bbc),('brf',brf),('rusboost',rusboost)], voting='soft')#  , weights=weights) 
# #fit all, voting classifier scoring
for clf, label in zip([bbc,brf,rusboost,vc], ['BalancedBaggingClassifier','BalancedRandomForestClassifier','RUSBoostClassifier','Voting']):
    scores = cross_val_score(clf, X_train_scaled, y_train, scoring='accuracy', cv=25)
    print("Accuracy: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std(), label))

`base_estimator` was renamed to `estimator` in version 0.10 and will be removed in 0.12.
`base_estimator` was renamed to `estimator` in version 0.10 and will be removed in 0.12.
`base_estimator` was renamed to `estimator` in version 0.10 and will be removed in 0.12.
`base_estimator` was renamed to `estimator` in version 0.10 and will be removed in 0.12.
`base_estimator` was renamed to `estimator` in version 0.10 and will be removed in 0.12.
`base_estimator` was renamed to `estimator` in version 0.10 and will be removed in 0.12.
`base_estimator` was renamed to `estimator` in version 0.10 and will be removed in 0.12.
`base_estimator` was renamed to `estimator` in version 0.10 and will be removed in 0.12.
`base_estimator` was renamed to `estimator` in version 0.10 and will be removed in 0.12.
`base_estimator` was renamed to `estimator` in version 0.10 and will be removed in 0.12.
`base_estimator` was renamed to `estimator` in version 0.10 and will be removed in 0.12.
`base_estimator` was 

Accuracy: 0.82 (+/- 0.05) [BalancedBaggingClassifier]
Accuracy: 0.77 (+/- 0.03) [BalancedRandomForestClassifier]
Accuracy: 0.88 (+/- 0.06) [RUSBoostClassifier]


`base_estimator` was renamed to `estimator` in version 0.10 and will be removed in 0.12.
`base_estimator` was renamed to `estimator` in version 0.10 and will be removed in 0.12.
`base_estimator` was renamed to `estimator` in version 0.10 and will be removed in 0.12.
`base_estimator` was renamed to `estimator` in version 0.10 and will be removed in 0.12.
`base_estimator` was renamed to `estimator` in version 0.10 and will be removed in 0.12.
`base_estimator` was renamed to `estimator` in version 0.10 and will be removed in 0.12.
`base_estimator` was renamed to `estimator` in version 0.10 and will be removed in 0.12.
`base_estimator` was renamed to `estimator` in version 0.10 and will be removed in 0.12.
`base_estimator` was renamed to `estimator` in version 0.10 and will be removed in 0.12.
`base_estimator` was renamed to `estimator` in version 0.10 and will be removed in 0.12.
`base_estimator` was renamed to `estimator` in version 0.10 and will be removed in 0.12.
`base_estimator` was 

Accuracy: 0.82 (+/- 0.03) [Voting]
