# TPS Feb 2022 Multi-Classifier Stacked Ensemble

#### Random Forrest Classifer + KNeighbors Classifier + DecisionTree Classifier

# Multi-Classifier Stacked Ensemble

Credits: 
- [@lucamassaron](https://www.kaggle.com/lucamassaron/basic-eda-and-model-to-start)  Notebook.
- [@pjofrelora](https://www.kaggle.com/pjofrelora/hybrid-classifier-solution-11th-place) Notebook.


In [None]:
!pip install --upgrade scikit-learn
!pip install mlxtend 

### Imports

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.svm import SVC
import optuna
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
# from sklearn.model_selection import cross_val_score

import re
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        
import warnings
warnings.filterwarnings('ignore')

### Data

In [None]:
train = pd.read_csv("../input/tabular-playground-series-feb-2022/train.csv")
test = pd.read_csv("../input/tabular-playground-series-feb-2022/test.csv")
submission = pd.read_csv("../input/tabular-playground-series-feb-2022/sample_submission.csv")

### Features

In [None]:
# credit to Luca Massaron https://www.kaggle.com/lucamassaron/basic-eda-and-model-to-start
features = train.columns[1:-1]

def split_feature(st):
    counts = list(map(int, re.split('A|T|G|C', st)[1:]))
    return counts

feat2counts = {c: split_feature(c) for c in features}

a = [0 for i in range(11)]
t = [0 for i in range(11)]
g = [0 for i in range(11)]
c = [0 for i in range(11)]

for feat in features:
    xa, xt, xg, xc = feat2counts[feat]
    a[xa] += 1
    t[xt] += 1
    g[xt] += 1
    c[xc] += 1

In [None]:
features.shape

In [None]:
def scaling(data):
    scaler = StandardScaler()
    scaler.fit_transform(data)
    return data

### Splits

In [None]:
label_encoder = LabelEncoder()
X = train[features]
y = label_encoder.fit_transform(train['target'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [None]:
# scaler_f = StandardScaler()
# X_train_scaled = scaler_f.fit_transform(X_train)
# X_f_test_scaled = scaler_f.transform(X_test)

# SGDClassifier

In [None]:
# SGDCLASSIFIER WITH DEFAULT PARAMETERS
model = SGDClassifier()
model.fit(X_train,y_train)
preds = model.predict(X_test)
score = accuracy_score(y_test,preds)
print(f'The  SGDClassifier Score with Default Settings is: {score}')

# The  SGDClassifier Score with Default Settings is: 0.53248

In [None]:
params = {'penalty':'l1',
          'alpha': 1.0021057779463507e-06,
          'l1_ratio': 0.24745913938509045,
          'fit_intercept':False,
          'max_iter':1118,
          'tol':0.001972030065242609,
          'epsilon':0.017820792086488825,
          'learning_rate':'optimal',
          'eta0':0.009335633911125081,
          'power_t':0.5706596138303384,
    }

model = SGDClassifier(**params)
model.fit(X_train,y_train)
preds = model.predict(X_test)
score = accuracy_score(y_test,preds)
print(f'The  SGDClassifier Score with Optimized Settings is: {score}')

# The  SGDClassifier Score with Optimized Settings is: 0.81652

In [None]:
def objective(trial):
    
    params = {'loss':'log',
                   # TRY USING:  'hinge','log','modified_huber','squared_hinge','perceptron',
                   # 'squared_error','huber','epsilon_insensitive','squared_epsilon_insensitive']),  # default = hinge
                   # The ‘log’ loss gives logistic regression, a probabilistic classifier.
                   # ‘modified_huber’ is another smooth loss that brings tolerance to outliers as well as probability estimates.
                   # ‘squared_hinge’ is like hinge but is quadratically penalized. ‘perceptron’ is the linear loss used by
                   # the perceptron algorithm.   The other losses are designed for regression but can be useful in
                   # classification as well; see SGDRegressor for a description.
              'penalty':trial.suggest_categorical('penalty',['l2','l1','elasticnet']),                                          # default=’l2’
                   # Defaults to ‘l2’ which is the standard regularizer for linear SVM models.
                   # ‘l1’ and ‘elasticnet’ might bring sparsity to the model (feature selection) not achievable with ‘l2’.                         
              'alpha':trial.suggest_float('alpha',low=.000001,high=.00001),                                                        # default = .0001
                   # Constant that multiplies the regularization term. The higher the value, the stronger the regularization.
                   # Also used to compute the learning rate when set to learning_rate is set to ‘optimal’.
              'l1_ratio':trial.suggest_float('l1_ratio',low=.01,high=.3),                                                        # default =0.15
                   # The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1. l1_ratio=0 corresponds to L2 penalty,
                   # l1_ratio=1 to L1. Only used if penalty is ‘elasticnet’.
              'fit_intercept':trial.suggest_categorical('fit_intercept',[True,False]),                                                   # default=True
              'max_iter':trial.suggest_int('max_iter', low=750,high=1250),                                                      # default =1000
              'tol':trial.suggest_float('tol', low=1e-5,high=1e-2),                                                             # default ==1e-3
              'shuffle':False,                                                                                                  # default = True
              'random_state':21,                                                                      
              'epsilon':trial.suggest_float('epsilon', .0001,.02),                                                                 # default=0.1 
                   # insensitive loss functions; only if loss is ‘huber’, ‘epsilon_insensitive’, or
                   # ‘squared_epsilon_insensitive’. For ‘huber’, determines the threshold at which it becomes less important 
                   # to get the prediction exactly right. For epsilon-insensitive, any differences between
                   # the current prediction and the correct label are ignored if they are less than this threshold.                                 
              'n_jobs':-1,                                                                                                        # default = None
              'learning_rate':trial.suggest_categorical('learning_rate',['optimal','constant','invscaling','adaptive',]),         # default = optimal
                   
                   # The learning rate schedule:  ‘constant’: eta = eta0, ‘optimal’: eta = 1.0 / (alpha * (t + t0)) where t0 is 
                   #  chosen by a heuristic proposed by Leon Bottou.  ‘invscaling’: eta = eta0 / pow(t, power_t),  
                   # ‘adaptive’: eta = eta0, as long as the training keeps decreasing. Each time n_iter_no_change consecutive 
                   # epochs fail to decrease the training loss by tol or fail to increase validation score by tol if 
                   # early_stopping is True, the current learning rate is divided by 5.   # The stopping criterion. 
                   # If it is not None, training will stop when (loss > best_loss - tol) for n_iter_no_change consecutive epochs.
                   # Convergence is checked against the training loss or the validation loss depending on the early_stopping parameter.

              'eta0':trial.suggest_float('eta0', 0.0,.01),  # default = 0.0
                   # The initial learning rate for the ‘constant’, ‘invscaling’ or ‘adaptive’ schedules. The default value is
                   # 0.0 as eta0 is not used by the default schedule ‘optimal’.
              'power_t':trial.suggest_float('power_t',low=.3,high=.6),                                                        # default = 0.5
                   # The exponent for inverse scaling learning rate
              'early_stopping':False,                                                                                           # default = False
              'validation_fraction':.1,
              'n_iter_no_change':5,
              'class_weight':None,                                                                                               # default = None 
              'warm_start':False,                                                                                                # default = False
              'average':False,                                                                                                   # default = False
                   # When set to True, computes the averaged SGD weights across all updates and stores the result in 
                   # the coef_ attribute. If set to an int greater than 1, averaging will begin once the total number
                   # of samples seen reaches average. So average=10 will begin averaging after seeing 10 samples.
             }
    
    model = SGDClassifier(**params)
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    score = accuracy_score(y_test,preds)
    return score

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=500)

#Trial 290 finished with value: 0.81474 and parameters: {'penalty': 'l1', 'alpha': 1.0021057779463507e-06, 
# 'l1_ratio': 0.24745913938509045, 'fit_intercept': False, 'max_iter': 1118, 'tol': 0.001972030065242609, 'epsilon': 0.017820792086488825,
# 'learning_rate': 'optimal', 'eta0': 0.009335633911125081, 'power_t': 0.5706596138303384}. Best is trial 290 with value: 0.81474.

# Trial 94 finished with value: 0.81424 and parameters: {'penalty': 'l1', 'alpha':
# 1.0006350245834825e-06, 'l1_ratio': 0.057477672736118965, 'fit_intercept': False, 'max_iter': 868, 'tol': 0.0031220764708228548, 
# 'epsilon': 0.007338482082787139, 'learning_rate': 'optimal', 'eta0': 0.0007703062792938397, 'power_t': 0.43945672471052566}.
# Best is trial 94 with value: 0.81424.

# Trial 123 finished with value: 0.81384 and parameters: {'penalty': 'l1', 'alpha': 1.0023399588644886e-06,
# 'l1_ratio': 0.18197411568391297, 'fit_intercept': False, 'max_iter': 785, 'tol': 0.009833159597166106, 'epsilon': 0.005376745902229565,
# 'learning_rate': 'optimal', 'eta0': 0.007795334297890297, 'power_t': 0.3451170716923369}. Best is trial 123 with value: 0.81384.

# Trial 265 finished with value: 0.81748 and parameters: {'penalty': 'l1', 'alpha': 
# 1.0014951755556242e-06, 'l1_ratio': 0.14740252540229948, 'fit_intercept': False, 'max_iter': 783, 'tol': 0.0001231328444699676, 
# 'shuffle': True, 'epsilon': 0.015069998046005633, 'learning_rate': 'optimal', 'eta0': 0.008821276696654192, 'ccp_alpha': 
# 0.595934263631081}. Best is trial 265 with value: 0.81748.

# Trial 105 finished with value: 0.8137 and parameters: {'penalty': 'l1', 'alpha': 1.0349576851162957e-05,
# 'l1_ratio': 0.15237129057146087, 'fit_intercept': False, 'max_iter': 813, 'tol': 0.004471825647149898, 'shuffle': True, 'epsilon':
# 0.02652709040960106, 'learning_rate': 'optimal', 'eta0': 0.003962948802914954, 'ccp_alpha': 0.4071901065880417}. Best is trial 105 with value: 0.8137.

# Logistic Regression Classifier

In [None]:
# Errors = PROBLEMS WITH THE SKLEARN LOGISTICREGRESSION MODULE

model = LogisticRegression()
model.fit(X_train,y_train)
preds = model.predict(X_test)
score = accuracy_score(y_test,preds)
print(f'The Logistic Regression Score with Default Settings and "solver" = "" is: {score}')

# The Logistic Regression Score with Default Settings and "solver" = "saga" is: 0.64502
# The Logistic Regression Score with Default Settings and "solver" = "liblinear" is: 0.63484
# The Logistic Regression Score with Default Settings and "solver" = "sag" is: 0.64556
# The Logistic Regression Score with Default Settings and "solver" = "lbfgs" is: error
# The Logistic Regression Score with Default Settings and "solver" = "newton-cg" is: 0.64504

In [None]:
params = {'solver':'lbfgs',
          'penalty':'l2',
          ''
    
}

model = LogisticRegression()
model.fit(X_train,y_train)
preds = model.predict(X_test)
score = accuracy_score(y_test,preds)
print(f'The Logistic Regression Score is: {score}')

### Optuna Optimization for Logistic Regression Classifier

In [None]:
def objective(trial):
    
    params = {'solver':'lbfgs',       # trial.suggest_categorical('solver','newton-cg','lbfgs','liblinear','sag','saga','sagd'), # default=’lbfgs’
                    # For small datasets, ‘liblinear’ is a good choice, 
                    # whereas ‘sag’ and ‘saga’ are faster for large ones;
                    # For multiclass problems, only ‘newton-cg’, ‘sag’, ‘saga’ and ‘lbfgs’ handle multinomial loss;
                    # ‘liblinear’ is limited to one-versus-rest schemes. 
                    # Warning The choice of the algorithm depends on the penalty chosen: Supported penalties by solver:
                    # ‘newton-cg’ - [‘l2’, ‘none’] ,  ‘lbfgs’ - [‘l2’, ‘none’] , ‘liblinear’ - [‘l1’, ‘l2’], ‘sag’ - [‘l2’, ‘none’]
                    # ‘saga’ - [‘elasticnet’, ‘l1’, ‘l2’, ‘none’]
                    # Note ‘sag’ and ‘saga’ fast convergence is only guaranteed on features with approximately 
                    # the same scale. You can preprocess the data with a scaler from sklearn.preprocessing.
              'multi_class':'multinomial',                 # trial.suggest_categorical('multi_class',['auto','ovr','multinomial']), # default=’auto’
                    # If the option chosen is ‘ovr’, then a binary problem is fit for each label.
                    # For ‘multinomial’ the loss minimised is the multinomial loss fit across the entire 
                    # probability distribution, even when the data is binary. ‘multinomial’ is unavailable 
                    # when solver=’liblinear’. ‘auto’ selects ‘ovr’ if the data is binary, or if solver=’liblinear’,
                    # and otherwise selects ‘multinomial’. Stochastic Average Gradient descent solver for ‘multinomial’ case.
              'penalty':'l2', # trial.suggest_categorical('penalty',['l2',None]),                                  # default = l2
              'dual':False,                                            # trial.suggest_categorical('dual',True,False),  # default = False
              'tol':1e-4,                                            # trial.suggest_int('tol',low=1e-5,high=1e-3),            # default = 1e-4
              'C':trial.suggest_float('C',low=.95,high=1.05),                                                                    # default = 1.0
              'fit_intercept':trial.suggest_categorical('fit_intercept',[True,False]),                                                  # default = True
              # 'intercept_scaling':trial.suggest_float('intercept_scaling', low=.8,high=1.2),                                   # default = 1.0
                     # Useful only when the solver ‘liblinear’ is used and
                     # self.fit_intercept is set to True. 
              # 'class_weight':'auto',                                                                                          # default = None
              'random_state':21, 
              'verbose': 0,
              'warm_start':False,
              'n_jobs':-1,
              'max_iter':trial.suggest_int('max_iter',95,150),                                                                  # default = 100
              # 'l1_ratio':trial.suggest_float('l1_ration',low=0, high=1),                                                        # default = None
                   # The Elastic-Net mixing parameter, with 0 <= l1_ratio <= 1. Only used 
                   # if penalty='elasticnet'. Setting l1_ratio=0 is equivalent to using penalty='l2',
                   # while setting l1_ratio=1 is equivalent to using penalty='l1'. For 0 < l1_ratio <1,
                   # the penalty is a combination of L1 and L2.
             #  'class_weight':'balanced',                                                                                        # default = balanced
              }

    model = LogisticRegression(**params)
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    score = accuracy_score(y_test,preds)
    
    return score

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=500)

# Random Forest Classifier

In [None]:
# RANDOM FOREST CLASSIFIER WITH DEFAULT SETTINGS
model = RandomForestClassifier()
model.fit(X_train, y_train)
preds = model.predict(X_test)
score = accuracy_score(y_test,preds)
print(f'Random Forrest Classifier Score with Default Settings is {score}')

# Random Forrest Classifier Score with Default Settings is 0.99242

In [None]:
# tuned params for RandomForestClassifier
params = {'n_estimators':300,
          'criterion':'gini',
          'max_depth':None,
          'min_samples_split': 2,
          'min_samples_leaf':9,
          'min_weight_fraction_leaf':0.0006226172051287282,
          'max_leaf_nodes':574,
          'min_impurity_decrease':0,
          'bootstrap':'True',
          'oob_score':'True',
          'n_jobs':-1,
          'verbose':0, 
          'warm_start':False,
          'class_weight':'balanced', 
          'ccp_alpha':0,
          'max_samples':None,
         }


model = RandomForestClassifier(**params)
model.fit(X_train, y_train)
preds = model.predict(X_test)

score = accuracy_score(y_test,preds)
print(f'Random Forrest Classifier Score is {score}')


### Optuna Optimization Random Forrest Classifier

In [None]:
def objective(trial):
    params = {'n_estimators':trial.suggest_int('n_estimators', low=90,high=110),       # default = 100
             # 'max_features':16,
              'criterion':'gini',                                                       # default = gini
             #'criterion':trial.suggest_categorical('criterion',['gini','entropy']),
              'max_depth':None,
            #'max_depth':trial.suggest_int('max_depth',low=1,high=50),
              'min_samples_split': 2,              #'min_samples_split':trial.suggest_float('min_samples_split',low=0,high=1.0),                                              
              'min_samples_leaf':1,                # trial.suggest_int('min_samples_leaf', low=1,high=25),         
              
              'min_weight_fraction_leaf': 0.0,  # trial.suggest_float('min_weight_fraction_leaf', low=0,high=.01),
            # 'max_features':'auto',
              'max_leaf_nodes': None,                               # trial.suggest_int('max_leaf_nodes', low=200,high=600,step=1), default = None
              'min_impurity_decrease':0.0,
             #'min_impurity_decrease':trial.suggest_int('min_impurity_decrease',low=0, high=100),
             #'bootstrap':'True',
              'bootstrap':False,                                 # trial.suggest_categorical('bootstrap',[True,False]),
             #'oob_score':'False',
              'n_jobs':-1,
              'random_state':21,
              'verbose':0, 
              'warm_start':False,
              'class_weight':'balanced_subsample',       # trial.suggest_categorical('balanced',['balanced','balanced_subsample', None]),
              'ccp_alpha':0,
            # 'ccp_alpha':trial.suggest_float('ccp_alpha',low=0.0,high=2.0),
              'max_samples':None,
             #'max_samples':trial.suggest_int('max_samples',low=1,high=500,step=10),
             }
    
    model = RandomForestClassifier(**params)
    model.fit(X_train, y_train)
    print(model.score(X_test,y_test))
    
    return model.score(X_test, y_test)

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=500)

# [I 2022-02-11 00:50:20,713] Trial 15 finished with value: 0.99348 and parameters: {'n_estimators': 107}. Best is trial 15 with value: 0.99348.
# [I 2022-02-11 00:21:36,903] Trial 11 finished with value: 0.99346 and parameters: {'n_estimators': 106, 'bootstrap': 
# False, 'balanced': 'balanced_subsample'}. Best is trial 11 with value: 0.99346.

# [I 2022-02-11 00:06:10,228] Trial 10 finished with value: 0.99332 and parameters: {'n_estimators': 97, 'bootstrap': False}.
# Best is trial 10 with value: 0.99332.

# MultinomialNB Classifier

In [None]:
# MULTINOMIAL NB WITH DEFAULT PARAMETERS
model = MultinomialNB()
model.fit(X_train, y_train)
preds = model.predict(X_test)

score = accuracy_score(y_test,preds)
print(f'KNeighbors Classifier Score is {score}')

In [None]:
# MULTINOMIAL NB WITH TUNED PARAMS
params = {
    
}

model = MultinomialNB()
model.fit(X_train, y_train)
preds = model.predict(X_test)

score = accuracy_score(y_test,preds)
print(f'KNeighbors Classifier Score is {score}')

## Optuna Optimized MultiNomial NB

In [None]:
def objective(trial):
    params = {'alpha':trial.suggest_float('alpha',.8,1.2),                                            # default=1.0
          'fit_prior':trial.suggest_bool('fit_prior', True,False),                                # default= True
          # 'class_prior':trial.suggest_int()                                                     # default = False
         }

    model = MultinomialNB()
    model.fit(X_train, y_train)
    preds = model.predict(X_test)

    score = accuracy_score(y_test,preds)
    
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=500)

## KNeighbors Classifier

In [None]:
# no parameter input
model = KNeighborsClassifier()
model.fit(X_train, y_train)
preds = model.predict(X_test)
score = accuracy_score(y_test,preds)
print(f'KNeighbors Classifier Score is {score}')

# KNeighbors Classifier Score is 0.95542 with default settings

In [None]:
# parameter input
params = {'n_neighbors':7,
          'weights':'distance',
          'algorithm':'brute', # try 'auto','ball_tree','kd_tree','brute'
          'leaf_size':34,
          'p':2,
          'metric':'minkowski',
          'n_jobs':-1,
          }

model = KNeighborsClassifier(**params)
model.fit(X_train, y_train)
preds = model.predict(X_test)

score = accuracy_score(y_test,preds)
print(f'KNeighbors Classifier Score is {score}')


### Optuna Optimization for KNeighbors Classifier

In [None]:
# Optuna optimization function for KNeighborsClassifier
def objective(trial):
    params = {'n_neighbors':trial.suggest_int('n_neighbors', low=3,high=8),                 # default = 
              'weights':trial.suggest_categorical('weights',['uniform','distance']),        # default = 
              'algorithm':trial.suggest_categorical('algorithm',['auto','ball_tree','kd_tree','brute']), # default = 
              'leaf_size':trial.suggest_int('leaf_size',20,40),                              # default = 
              'p':trial.suggest_int('p',low=1,high=3),                                       # default = 
              'metric':'minkowski',                                                          # default = 
              'n_jobs':-1,
             }
       
    model = KNeighborsClassifier(**params)
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    score = accuracy_score(y_test,preds)
    
    return score

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=500)


# Trial 0 finished with value: 0.97026 and parameters: {'n_neighbors': 3, 'weights': 'uniform', 'algorithm': 
# 'auto', 'leaf_size': 25, 'p': 2}. Best is trial 0 with value: 0.97026.
# [I 2022-02-09 13:46:47,630] Trial 1 finished with value: 0.98774 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'brute',
# 'leaf_size': 34, 'p': 2}. Best is trial 1 with value: 0.98774.

# Decision Tree Classifier

In [None]:
# DECISION TREE CLASSIFIER WITH DEFAULT SETTINGS
model = DecisionTreeClassifier()
model.fit(X_train, y_train)
preds = model.predict(X_test)
score = accuracy_score(y_test,preds)
print(f'Decision Tree Classifier Score with Default Settings is {score}')

# Decision Tree Classifier Score with Default Settings is 0.9712

In [None]:
params = {'splitter':'best', # try 'random'
          'criterion':'gini', # try 'entropy'
          'max_depth':None, # try values 1 to 50
         #'min_samples_split':0.0, # try values 0 to 1.0
          'min_samples_leaf':1,  # try values 1 to 5
         #'min_weight_fraction_leaf':None, # try values 0 to 5
          'max_features':'auto',
          'random_state':21,
          'max_leaf_nodes':None,
          'min_impurity_decrease':0, # try values 0 to 100
          'class_weight':'balanced',
          'ccp_alpha':0, # try values 0 to 2
         }

model = DecisionTreeClassifier(**params)
model.fit(X_train, y_train)
preds = model.predict(X_test)
score = accuracy_score(y_test,preds)
print(f'Decision Tree Classifier Score is {score}')

# Decision Tree Classifier Score is 0.97048 - none
# Decision Tree Classifier Score is 0.9691 'optimized'

In [None]:
def objective(trial):
    
    params = {'splitter':trial.suggest_categorical('splitter',['best', 'random']),                  # default = 
              'criterion':'gini',                                                                   # default =
              'max_depth':trial.suggest_int('max_depth',low=1,high=10),                             # default =
              'min_samples_split':trial.suggest_float('min_samples_split',low=0,high=1.0),          # default =
              'min_samples_leaf':trial.suggest_int('min_samples_leaf', low=1,high=5),               # default =
              'min_weight_fraction_leaf':trial.suggest_float('min_weight_fraction_leaf', low=0,high=.5),# default =
              'max_features':'auto',                                                                  # default =
              'random_state':21,                                                                      # default =
              'max_leaf_nodes':,                                                                      # default =
              'min_impurity_decrease':trial.suggest_int('min_impurity_decrease',low=0, high=1),       # default =
              'class_weight':'balanced',                                                              # default =
              'ccp_alpha':trial.suggest_float('ccp_alpha',low=0.0,high=.001),                         # default =
              }
    
    model = DecisionTreeClassifier(**params)
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    score = accuracy_score(y_test,preds)
    return score

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=500)

# SVC Classifier

In [None]:
# svc_pipe = Pipeline(steps=[('standardscaler', StandardScaler()),('svc', SVC(gamma='auto'))])

In [None]:
# SVC CLASSIFIER WITH TUNED PARAMS
# params = {
    
# }


# svc_classifier = make_pipeline(svc_pipe)
# svc_classifier.fit(X_train,y_train)
# preds = svc_classifier.predict(X_test)
# score = accuracy_score(y_test,preds)
# print(f'The SVC Score with Parameter Tuning is: {score}')

In [None]:
# svc_classifier with default parameters
# svc_classifier = make_pipeline(svc_pipe)
# svc_classifier.fit(X_train,y_train)
# preds = svc_classifier.predict(X_test)
# score = accuracy_score(y_test,preds)
# print(f'The SVC Score with Default Parameters is: {score}')

# The SVC Score with Default Parameters is: 0.94672

## Optuna Optimized SVC Classifier

In [None]:
# svc_pipe = Pipeline(steps=[('standardscaler', StandardScaler()),('svc', SVC())])   
# svc_classifier = make_pipeline(svc_pipe)

# def objective(trial):
    
#     params = {'C':trial.suggest_float('C',.1,2.0),                                                           # default = 1.0
#                    # Regularization parameter. The strength of the regularization is inversely 
#                    # proportional to C. Must be strictly positive. The penalty is a squared l2 penalty.
#               'kernel':trial.suggest_categorical('kernal',['linear','poly','rbf','sigmoid','precomputed']),      # default = rbf
#                    # Specifies the kernel type to be used in the algorithm. If none is given,
#                    # ‘rbf’ will be used. If a callable is given it is used to pre-compute 
#                    # the kernel matrix from data matrices; that matrix should be an array 
#                    # of shape (n_samples, n_samples).
#               'degree':trial.suggest_int('degree',2,5),                                                    # default = 3
#                    # Degree of the polynomial kernel function (‘poly’).\
#                    # Ignored by all other kernels. 
#               'gamma':trial.suggest_categorical('gamma',['scale','auto']),                                         # default = scale
#                    # Kernel coefficient for ‘rbf’, ‘poly’ and ‘sigmoid’.
#                    # if gamma='scale' (default) is passed then it uses
#                    # 1 / (n_features * X.var()) as value of gamma,
#                    # if ‘auto’, uses 1 / n_features. 
#               'random_state':21,                                                                            # default = None
#               'coef0':trial.suggest_float('coef0',0,1),                                                     # default = 0.0                          
#                    # It is only significant in ‘poly’ and ‘sigmoid’.
#               'shrinking':trial.suggest_categorical('shrinking',[True,False]),                                               # default = True
#               'tol':trial.suggest_float('tol',1e-4,1e-2),                                            # default = 1e-3
#               'class_weight':'balanced',                         # default = None
#                    # Set the parameter C of class i to class_weight[i]*C for SVC. 
#                    # If not given, all classes are supposed to have weight one. 
#                    # The “balanced” mode uses the values of y to automatically adjust 
#                    # weights inversely proportional to class frequencies in the input 
#                    # data as n_samples / (n_classes * np.bincount(y)).
              
#               'max_iter':-1,                                                                         # default = -1
#               'decision_function_shape':trial.suggest_categorical('decision_function_shape',['ovo','ovr']),                    # default = ovr
#                    # decision_function_shape{‘ovo’, ‘ovr’}, default=’ovr’
#                    # Whether to return a one-vs-rest (‘ovr’) decision function of
#                    # shape (n_samples, n_classes) as all 
#                    # other classifiers, or the original one-vs-one (‘ovo’) decision 
#                    # function of libsvm which has shape
#                    # (n_samples, n_classes * (n_classes - 1) / 2). However, one-vs-one
#                    # (‘ovo’) is always used as multi-class strategy.
#                    # The parameter is ignored for binary classification.
             
#              }

    
#     svc_classifier.fit(X_train,y_train)
#     preds = svc_classifier.predict(X_test)
#     score = accuracy_score(y_test,preds)
    
#     print(f'The SVC Score with Tuned Parameters is: {score}')
    
#     return score
    

# study = optuna.create_study(direction='maximize')
# study.optimize(objective, n_trials=500)
    

# ExtraTreesClassifier

In [None]:
# EXTRA TREES CLASSIFIER WITH DEFAULT SETTINGS
# model = ExtraTreesClassifier()
# model.fit(X_train, y_train)
# preds = model.predict(X_test)
# score = accuracy_score(y_test,preds)
# print(f'Extra Trees Classifier Score is {score}')

# Extra Trees Classifier Score is 0.99324 with default settings

In [None]:
# EXTRA TREES CLASSIFIER WITH TUNED PARAMS
params = {
    
}

In [None]:
#OPTUNA TUNING FOR EXTRA TREES CLASSIFIER
def objective(trial):
    
    params = {'n_estimators':trial.suggest_int('n_estimators',low=95,high=105),   # default = 100
              'criterion':'gini',              # trial.suggest_categorical('criterion',['gini','entropy']),  #  default = 'gini'
              # 'max_depth':None,                # trial.suggest_float('max_depth',0.000001,2),
              # 'min_samples_split':None,        # trial.suggest_float('min_samples_split',low=0.0,high=1),
              # 'min_samples_leaf':None,         # trial.suggest_int('min_samples_leaf',1,3),
              'min_weight_fraction_leaf':trial.suggest_float('min_weight_fraction_leaf', low=0,high=.001),
              'max_features': 'auto',          # trial.suggest_categorical('max_features',['auto','sqrt','log2']),
              'random_state':21,
              'n_jobs':-1,
              'bootstrap':trial.suggest_categorical('bootstrap',[True,False]),
             }
    
    
    model = ExtraTreesClassifier(**params)
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    score = accuracy_score(y_test,preds)
    
    return score

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=500)

# Trial 0 finished with value: 0.99292 and parameters: {'n_estimators': 95, 'bootstrap': False}. Best is trial 0 with value: 0.99292.
# Trial 10 finished with value: 0.9929 and parameters: {'n_estimators': 105, 'min_weight_fraction_leaf':
# 6.534368055766463e-06, 'bootstrap': True}. Best is trial 10 with value: 0.9929.

# StackedClassifier Optimization and Parameters

In [None]:




# PARAMETERS AND OTHER SETTINGS FOR STACKEDCLASSIFIER **************** KEEP *******************
# estimators = ['str',estimator]
    # Base estimators which will be stacked together. Each element of the list is defined as 
    # a tuple of string (i.e. name) and an estimator instance. An estimator can be set to ‘drop’ using set_params.

# final_estimatorestimator = final_estimator  -  default=None
    # A classifier which will be used to combine the base estimators. The default classifier is a LogisticRegression.

# cv = int, cross-validation generator or an iterable  -   default=None
     # Determines the cross-validation splitting strategy used in cross_val_predict
     # to train final_estimator. Possible inputs for cv are:
            # None, to use the default 5-fold cross validation,
            # integer, to specify the number of folds in a (Stratified) KFold,
            # An object to be used as a cross-validation generator,
            # An iterable yielding train, test splits.
            # For integer/None inputs, if the estimator is a classifier and y is either binary or multiclass,
            # StratifiedKFold is used. In all other cases, KFold is used.
            # These splitters are instantiated with shuffle=False so the splits will be the same across calls.


# stack_method{‘auto’, ‘predict_proba’, ‘decision_function’, ‘predict’}, default=’auto’
      # Methods called for each base estimator. It can be:
            # if ‘auto’, it will try to invoke, for each estimator, 'predict_proba', 'decision_function' or 'predict' in that order.
            # otherwise, one of 'predict_proba', 'decision_function' or 'predict'. If the method is not implemented by the estimator, it will raise an error.

# n_jobs, default=None
     # passthrough, default=False
     # When False, only the predictions of estimators will be used as training data for final_estimator.
     # When True, the final_estimator is trained on the predictions as well as the original training data.

# verbose, default=0

## Optuna Tuned Extra Trees Classifier

## Classifiers

#### Random Forrest Classifier, KNeighbors Classifier, and Decision Tree Classifier

In [None]:
label_encoder = LabelEncoder()
X = train[features]
y = label_encoder.fit_transform(train['target'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify = y)

## Stacked Ensemble

In [None]:
svc_pipe = Pipeline(steps=[('standard_scaler', StandardScaler()),('svc', SVC(gamma='auto'))])

In [None]:
estimators = [('RFC',RandomForestClassifier()),
              ('KNC',KNeighborsClassifier()),
              ('ETC',DecisionTreeClassifier()),
              ('SVC', svc_pipe),]


In [None]:
%%time
stacked_classifier = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression(solver='lbfgs'))

In [None]:
%%time
stacked_classifier.fit(X_train, y_train)

# Submission

In [None]:
preds = stacked_classifier.predict(test[features])

submission.target = label_encoder.inverse_transform(preds)
submission.to_csv("submission.csv", index=False)