In [13]:
import numpy as np
from sklearn.metrics import f1_score, classification_report

from sklearn.feature_selection import VarianceThreshold, RFECV
from sklearn.decomposition import PCA, KernelPCA
from sklearn.preprocessing import StandardScaler, Normalizer, MinMaxScaler

from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB, BernoulliNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC, LinearSVC, NuSVC, OneClassSVM
from sklearn.ensemble import RandomForestClassifier, IsolationForest, GradientBoostingClassifier

from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import HalvingGridSearchCV

from imblearn.over_sampling import RandomOverSampler, SMOTE, KMeansSMOTE, ADASYN, BorderlineSMOTE, \
                                    SMOTENC, SVMSMOTE
from imblearn.under_sampling import RandomUnderSampler, CondensedNearestNeighbour, TomekLinks, \
                                    EditedNearestNeighbours, NeighbourhoodCleaningRule, OneSidedSelection
from imblearn.metrics import classification_report_imbalanced
from imblearn.combine import SMOTEENN, SMOTETomek
from imblearn.pipeline import Pipeline

import xgboost as xgb



In [17]:
rfecv = RFECV(estimator=LogisticRegression(class_weight="balanced"), step=1, scoring="neg_mean_squared_error")


In [32]:
decompositions = { 'pca1': {'decomposition': PCA(),
                            'decomposition__n_components': sorted(set(np.logspace(0.4, 4, 100, dtype='int', endpoint=False))),
                            'decomposition__whiten': [False, True],
                            'decomposition__svd_solver': ['auto', 'full', 'arpack', 'randomized']},
                   'pca2': {'decomposition': PCA(),
                            'decomposition__n_components': np.logspace(0,1,10, endpoint=False)/10,
                            'decomposition__whiten': [False, True],
                            'decomposition__svd_solver': ['auto', 'full', 'arpack', 'randomized']},
                   'kpca': {'decomposition': KernelPCA(),
                            'decomposition__kernel': ['linear', 'poly', 'rbf', 'sigmoid', 'cosine', 'precomputed'],
                            'decomposition__gamma': np.linspace(0.03, 0.05, 5)},
                 }

scalers = { 'StandardScaler': {'scaler': StandardScaler()},
            'Normalizer': {'scaler': Normalizer()},
            'MinMaxScaler': {'scaler': MinMaxScaler()}
          }

oversamplings = { 'RandomOverSampler': {'sampler': RandomOverSampler()},
              'SMOTE': {'sampler': SMOTE(),
                        'sampler__k_neighbors': [2,3,4,5,10,15,20],
                        'sampler__m_neighbors': [2,3,4,5,10,15,20]},
              'SVMSMOTE': {'sampler': SVMSMOTE(),
                           'sampler__k_neighbors': [2,3,4,5,10,15,20],
                           'sampler__m_neighbors': [2,3,4,5,10,15,20]},
              'BorderlineSMOTE': {'sampler': BorderlineSMOTE(),
                                  'sampler__k_neighbors': [2,3,4,5,10,15,20],
                                  'sampler__m_neighbors': [2,3,4,5,10,15,20],
                                  'sampler__kind': ['borderline-1', 'borderline-2']},
              'KMeansSMOTE': {'sampler': KMeansSMOTE(),
                              'sampler__k_neighbors': [2,3,4,5,10,15,20]},
                 'ADASYN':  {'sampler': ADASYN(),
                              'sampler__n_neighbors': [2,3,4,5,10,15,20]}
                                  
            }

undersamplings = { 'RandomUnderSampler' : {'sampler': RandomUnderSampler()},
                   'CondensedNearestNeighbour' : {'sampler': CondensedNearestNeighbour()},
                   'TomekLinks' : {'sampler': TomekLinks()},
                   'EditedNearestNeighbours' : {'sampler': EditedNearestNeighbours(),
                                                'sampler__n_neighbors': [2,3,4,5,10,15,20]},
                   'NeighbourhoodCleaningRule': {'sampler': NeighbourhoodCleaningRule(),
                                                 'sampler__n_neighbors': [2,3,4,5,10,15,20]},
                   'OneSidedSelection': {'sampler': OneSidedSelection(),
                                         'sampler__n_neighbors': [2,3,4,5,10,15,20]} 
                 }

combosamplings = {'SMOTEENN': {'sampler': SMOTEENN()},
                  'SMOTETomek': {'sampler': SMOTETomek()}
                 }
samplings = {**oversamplings, **undersamplings, **combosamplings}

estimators = {'LogisticRegression': {'estimator': LogisticRegression(),
                                     'estimator__penalty':['l1', 'l2', 'elasticnet', 'none'],
                                     'estimator__class_weight': ['balanced'],
                                     'estimator__C': np.logspace(0, 4, 10000),
                                     'estimator__solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']},
              'LinearDiscriminantAnalysis': {'estimator': LinearDiscriminantAnalysis(),
                                             'estimator__solver': ['svd', 'lsqr', 'eigen']},
              'QuadraticDiscriminantAnalysis' :{'estimator': QuadraticDiscriminantAnalysis()},
              'GaussianNB': {'estimator': GaussianNB()},
              'BernoulliNB': {'estimator': BernoulliNB(),
                              'estimator__alpha': np.logspace(0,1,10, endpoint=False)/10},
              'DecisionTreeClassifier': {'estimator': DecisionTreeClassifier(),
                                         'estimator__criterion': ['gini', 'entropy'],
                                         'estimator__class_weight': ['balanced'],
                                         'estimator__splitter': ['best', 'random'],
                                         'estimator__max_depth': [2,5,10,None]},
              'SVC': {'estimator': SVC(),
                      'estimator__C': np.logspace(0, 4, 10000),
                      'estimator__class_weight': ['balanced'],
                      'estimator__kernel': ['linear', 'poly', 'rbf', 'sigmoid', 'precomputed'],
                      'estimator__gamma': ['scale', 'auto']},
              'LinearSVC': {'estimator': LinearSVC(),
                            'estimator__penalty': ['l1','l2'],
                            'estimator__loss': ['hinge', 'squared_hinge'],
                            'estimator__class_weight': ['balanced'],
                            'estimator__C': np.logspace(0, 4, 10000)},
              'NuSVC': {'estimator': NuSVC(),
                        'estimator__kernel': ['linear', 'poly', 'rbf', 'sigmoid', 'precomputed'],
                        'estimator__gamma': ['scale', 'auto'],
                        'estimator__class_weight': ['balanced']},
              'OneClassSVM': {'estimator': OneClassSVM(),
                              'estimator__kernel': ['linear', 'poly', 'rbf', 'sigmoid', 'precomputed'],
                              'estimator__gamma': ['scale', 'auto'],
                              'estimator__class_weight': ['balanced']},
              'RandomForestClassifier': {'estimator': RandomForestClassifier(),
                                         'estimator__n_estimators': [10,20,50,100,200,500,1000],
                                         'estimator__criterion': ['gini', 'entropy'],
                                         'estimator__max_depth': [2,5,10,None],
                                         'estimator__max_features': ['auto', 'sqrt', 'log2'],
                                         'estimator__class_weight': ['balanced', 'balanced_subsample']},
              'IsolationForest': {'estimator': IsolationForest(),
                                  'estimator__n_estimators': [10,20,50,100,200,500,1000]},
              'GradientBoostingClassifier': {'estimator': GradientBoostingClassifier(),
                                             'estimator__n_estimators': [10,20,50,100,200,500,1000],
                                             'estimator__learning_rate': np.logspace(0,1,10, endpoint=False)/10,
                                             'estimator__criterion': ['friedman_mse', 'mse', 'mae'],
                                             'estimator__max_features': ['auto', 'sqrt', 'log2']}
             }

In [None]:
pipe = Pipeline(steps=[('prep', StandardScaler()), ('prep2', PCA()), ('classifier', SVC())])

# Add a dict of classifier and classifier related parameters in this list
params_grid = [{
                'prep': StandardScaler(),
                'prep2': PCA(),
                'prep2__n_estimators': [np.logspace()]
                'classifier':[SVC()],
                'classifier__C': [1, 10, 100, 1000],
                'classifier__gamma': [0.001, 0.0001],
                },
                {
                'classifier': [DecisionTreeClassifier()],
                'classifier__max_depth': [1,2,3,4,5],
                'classifier__max_features': [None, "auto", "sqrt", "log2"],
                },
               # {'classifier':[Any_other_classifier_you_want],
               #  'classifier__valid_param_of_your_classifier':[valid_values]

              ]

grid = GridSearchCV(pipe, params_grid)

In [21]:
estimators['LogisticRegression']

{'estimator': LogisticRegression(),
 'estimator__penalty': ['l1', 'l2', 'elasticnet', 'none'],
 'estimator__class_weight': ['balanced'],
 'estimator_C': array([1.00000000e+00, 1.00092155e+00, 1.00184395e+00, ...,
        9.98159444e+03, 9.99079298e+03, 1.00000000e+04]),
 'estimator__solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']}

In [22]:
dict4 = {**scalers['StandardScaler'], **decompositions['kpca'], **estimators['LogisticRegression']}

In [23]:
dict4

{'scaler': StandardScaler(),
 'decomposition': KernelPCA(),
 'decomposition__kernel': ['linear',
  'poly',
  'rbf',
  'sigmoid',
  'cosine',
  'precomputed'],
 'decomposition__gamma': array([0.03 , 0.035, 0.04 , 0.045, 0.05 ]),
 'estimator': LogisticRegression(),
 'estimator__penalty': ['l1', 'l2', 'elasticnet', 'none'],
 'estimator__class_weight': ['balanced'],
 'estimator_C': array([1.00000000e+00, 1.00092155e+00, 1.00184395e+00, ...,
        9.98159444e+03, 9.99079298e+03, 1.00000000e+04]),
 'estimator__solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']}

In [26]:
dict2 = {**oversamplings, **undersamplings, **combosamplings}

In [27]:
dict2

{'RandomOverSampler': {'sampler': RandomOverSampler()},
 'SMOTE': {'sampler': SMOTE(),
  'sampler__k_neighbors': [2, 3, 4, 5, 10, 15, 20],
  'sampler__m_neighbors': [2, 3, 4, 5, 10, 15, 20]},
 'SVMSMOTE': {'sampler': SVMSMOTE(),
  'sampler__k_neighbors': [2, 3, 4, 5, 10, 15, 20],
  'sampler__m_neighbors': [2, 3, 4, 5, 10, 15, 20]},
 'BorderlineSMOTE': {'sampler': BorderlineSMOTE(),
  'sampler__k_neighbors': [2, 3, 4, 5, 10, 15, 20],
  'sampler__m_neighbors': [2, 3, 4, 5, 10, 15, 20],
  'sampler__kind': ['borderline-1', 'borderline-2']},
 'KMeansSMOTE': {'sampler': KMeansSMOTE(),
  'sampler__k_neighbors': [2, 3, 4, 5, 10, 15, 20]},
 'ADASYN': {'sampler': ADASYN(),
  'sampler__n_neighbors': [2, 3, 4, 5, 10, 15, 20]},
 'RandomUnderSampler': {'sampler': RandomUnderSampler()},
 'CondensedNearestNeighbour': {'sampler': CondensedNearestNeighbour()},
 'TomekLinks': {'sampler': TomekLinks()},
 'EditedNearestNeighbours': {'sampler': EditedNearestNeighbours(),
  'sampler__n_neighbors': [2, 3, 4, 

In [28]:
dict3  = {**dict2['SMOTETomek'],**scalers['StandardScaler'], **decompositions['kpca'], **estimators['SVC']}
dict3

{'sampler': SMOTETomek(),
 'scaler': StandardScaler(),
 'decomposition': KernelPCA(),
 'decomposition__kernel': ['linear',
  'poly',
  'rbf',
  'sigmoid',
  'cosine',
  'precomputed'],
 'decomposition__gamma': array([0.03 , 0.035, 0.04 , 0.045, 0.05 ]),
 'estimator': SVC(),
 'estimator__C': array([1.00000000e+00, 1.00092155e+00, 1.00184395e+00, ...,
        9.98159444e+03, 9.99079298e+03, 1.00000000e+04]),
 'estimator__class_weight': ['balanced'],
 'estimator__kernel': ['linear', 'poly', 'rbf', 'sigmoid', 'precomputed'],
 'estimator__gamma': ['scale', 'auto']}

In [31]:
for k,v in dict2.items():
    print(v)

{'sampler': RandomOverSampler()}
{'sampler': SMOTE(), 'sampler__k_neighbors': [2, 3, 4, 5, 10, 15, 20], 'sampler__m_neighbors': [2, 3, 4, 5, 10, 15, 20]}
{'sampler': SVMSMOTE(), 'sampler__k_neighbors': [2, 3, 4, 5, 10, 15, 20], 'sampler__m_neighbors': [2, 3, 4, 5, 10, 15, 20]}
{'sampler': BorderlineSMOTE(), 'sampler__k_neighbors': [2, 3, 4, 5, 10, 15, 20], 'sampler__m_neighbors': [2, 3, 4, 5, 10, 15, 20], 'sampler__kind': ['borderline-1', 'borderline-2']}
{'sampler': KMeansSMOTE(), 'sampler__k_neighbors': [2, 3, 4, 5, 10, 15, 20]}
{'sampler': ADASYN(), 'sampler__n_neighbors': [2, 3, 4, 5, 10, 15, 20]}
{'sampler': RandomUnderSampler()}
{'sampler': CondensedNearestNeighbour()}
{'sampler': TomekLinks()}
{'sampler': EditedNearestNeighbours(), 'sampler__n_neighbors': [2, 3, 4, 5, 10, 15, 20]}
{'sampler': NeighbourhoodCleaningRule(), 'sampler__n_neighbors': [2, 3, 4, 5, 10, 15, 20]}
{'sampler': OneSidedSelection(), 'sampler__n_neighbors': [2, 3, 4, 5, 10, 15, 20]}
{'sampler': SMOTEENN()}
{'s

In [33]:
def create_params_grid():
    params_grid_without_samplings=[]
    
    for _,i in scalers.items():
        for _,j in decompositions.items():
            for _,k in estimators.items():
                params_grid_without_samplings.append({**i,**j,**k})
    return params_grid_without_samplings

In [34]:
params_grid = create_params_grid()

In [37]:
params_grid

[{'scaler': StandardScaler(),
  'decomposition': PCA(),
  'decomposition__n_components': [2,
   3,
   4,
   5,
   6,
   7,
   8,
   9,
   10,
   11,
   12,
   13,
   14,
   15,
   16,
   18,
   19,
   21,
   23,
   25,
   27,
   30,
   32,
   35,
   38,
   42,
   45,
   49,
   53,
   58,
   63,
   69,
   75,
   81,
   88,
   96,
   104,
   113,
   123,
   134,
   145,
   158,
   172,
   187,
   203,
   220,
   239,
   260,
   283,
   307,
   334,
   363,
   394,
   428,
   465,
   505,
   549,
   597,
   648,
   704,
   765,
   831,
   903,
   981,
   1066,
   1158,
   1258,
   1367,
   1485,
   1614,
   1753,
   1905,
   2070,
   2249,
   2443,
   2654,
   2884,
   3133,
   3404,
   3698,
   4017,
   4365,
   4742,
   5152,
   5597,
   6081,
   6606,
   7177,
   7798,
   8472,
   9204],
  'decomposition__whiten': [False, True],
  'decomposition__svd_solver': ['auto', 'full', 'arpack', 'randomized'],
  'estimator': LogisticRegression(),
  'estimator__penalty': ['l1', 'l2', 'elasticnet'