In [1]:
import import_ipynb
import logging
import numpy as np
from optparse import OptionParser
import sys
from time import time
import matplotlib.pyplot as plt
import scipy
import pandas as pd
from scipy.sparse import csr_matrix

from sklearn.utils.estimator_checks import check_estimator
from sklearn.model_selection import cross_validate
from sklearn.calibration import CalibratedClassifierCV
from mlxtend.classifier import StackingClassifier
from sklearn.decomposition import TruncatedSVD, NMF, KernelPCA, LatentDirichletAllocation, PCA
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.feature_selection import SelectKBest, chi2, mutual_info_classif
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.decomposition import FastICA
from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier
import xgboost as xgb
from catboost import CatBoostClassifier, Pool, cv
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier
import random  

from random import sample 
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import HashingVectorizer
from sklearn.feature_selection import SelectFromModel
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.linear_model import RidgeClassifier
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.svm import LinearSVC
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import Perceptron
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.naive_bayes import BernoulliNB, MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import NearestCentroid
from sklearn.ensemble import RandomForestClassifier
from sklearn.utils.extmath import density
from sklearn import metrics
from sklearn.metrics import balanced_accuracy_score, recall_score, precision_score, confusion_matrix, make_scorer

from torch import nn
from collections import OrderedDict
from skorch import NeuralNetClassifier

# Display progress logs on stdout
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s %(levelname)s %(message)s')

In [2]:
# Global vars:
RANDOM_STATE     = 0
KBEST_FEATURES   = 2500
CV_SCORERS = {
    'precision_score':         make_scorer (precision_score),
    'recall_score':            make_scorer (recall_score),
    'balanced_accuracy_score': make_scorer (balanced_accuracy_score)
}

# Custom Transformers

In [3]:
class DenseTransformer (BaseEstimator, TransformerMixin):

    def fit (self, X, y=None, **fit_params):
        return self

    def transform (self, X, y=None, **fit_params):
        if type (X) == scipy.sparse.csr.csr_matrix:
            return X.todense ()
        return X
    
    def set_params (self, **parameters):
        for parameter, value in parameters.items ():
            setattr (self, parameter, value)
        return self

    def get_params (self, deep=True):
        params = {}
        return params    

In [4]:
# check_estimator (DenseTransformer ())

In [5]:
class IdentityTransformer (BaseEstimator, TransformerMixin):
      
    def fit (self, X, y=None):
        print ('IdentityTransformer: type(X), X.shape =', type (X), X.shape)
        return self
    
    def transform (self, X, y=None):
        print ('IdentityTransformer: type(X), X.shape =', type (X), X.shape)
        return X
    
    def set_params (self, **parameters):
        for parameter, value in parameters.items ():
            setattr (self, parameter, value)
        return self

    def get_params (self, deep=True):
        params = {}
        return params

In [6]:
class Get_Train_From_TrainTest_Data (BaseEstimator, TransformerMixin):
      
    def fit (self, X, y):
        print ('Get_Train_From_TrainTest_Data.fit (): type(X), X.shape =', type (X), X.shape)
        train_len = len (y[pd.isnull(y)==False])
        X, y = X[:train_len], y[:train_len]
        self.train_len = train_len
        return self
    
    def transform (self, X, y):
        
        train_len = self.train_len
        X, y = X[:train_len], y[:train_len]
        print ('Get_Train_From_TrainTest_Data.transform (): type(X), X.shape =', type (X), X.shape)
        return X, y
    
    def set_params (self, **parameters):
        for parameter, value in parameters.items ():
            setattr (self, parameter, value)
        return self

    def get_params (self, deep=True):
        params = {}
        return params

In [7]:
class FitOnce_Transformer (BaseEstimator, TransformerMixin):
    
    def __init__(self, transformer, isFit=False):
        
        self.isFit = isFit
        self.transformer = transformer
        return
    
    def fit (self, X, y=None, **fit_params):
        
        if not self.isFit:
            self.transformer.fit (X, y, **fit_params)
            # self.isFit = True                           # TODO: uncomment this !!!!!!!!!!
        return self

    def transform (self, X, y=None, **fit_params):
        
        X = self.transformer.transform (X, **fit_params)
        return X
    
    def set_params (self, **parameters):
        for parameter, value in parameters.items ():
            setattr (self, parameter, value)
        return self

    def get_params (self, deep=True):
        params = {'transformer': self.transformer, 'isFit': self.isFit}
        return params

# Find optimal PCA dims and the same no. of NMF features
The PCA does an unsupervised dimensionality reduction, while the logistic regression does the prediction.
We use a GridSearchCV to set the dimensionality of the PCA

In [8]:
class PCA_NMF_FeatureTransformer (BaseEstimator, TransformerMixin):
    
    def __init__(self, n_components=[0.90, 0.95, 0.99], C=[0.1, 1, 10], whiten=True, isNMF=True, isSparseOut=False):
        
        self.pca = None
        self.nmf = None
        self.n_components = n_components
        self.C   = C
        self.whiten = whiten
        self.isNMF = isNMF
        self.isSparseOut = isSparseOut
        return

    def fit (self, X, Y, **fit_params):
        
        if self.isNMF:
            print ('Find optimal PCA dims and the same no. of NMF features for X.shape =', X.shape)
        else:
            print ('Find optimal PCA dims for X.shape =', X.shape)
        self.pca = PCA (whiten=self.whiten, random_state=0)
        classifier = LinearSVC (penalty="l2", class_weight='balanced', random_state=0)
        pipeline = Pipeline (steps=[('pca', self.pca), ('classifier', classifier)])
        param_grid = {
            'pca__n_components' : self.n_components,
            'classifier__C'     : self.C
        }
        if type (X) == scipy.sparse.csr.csr_matrix:
            X = X.todense ()
        gridSearchCV = GridSearchCV (pipeline, param_grid, iid=False, cv=5, scoring=CV_SCORERS, 
                                     refit='balanced_accuracy_score', n_jobs=-1)
        gridSearchCV.fit (X, Y)                   # Does not automatically sets the params of the pca or pipe
        print ("Best parameter (CV score=%0.3f):" % gridSearchCV.best_score_)
        print (gridSearchCV.best_params_)
        
        # Now use the optimal params to fit the PCA
        self.pca = PCA (n_components=gridSearchCV.best_params_['pca__n_components'], whiten=self.whiten, random_state=0)
        self.pca.fit (X)
        pcaDim = self.pca.transform (X[:2,:]).shape[1]
        # or simply use to get already fitted best estimator: self.pca = gridSearchCV.best_estimator_
        print("PCA dimensionality, explainedVarRatio = ", pcaDim, self.pca.n_components)
        # global RESULTS
        # RESULTS.append(benchmark(gridSearchCV.best_estimator_))
        
        if self.isNMF :
            self.nmf = NMF (n_components=pcaDim, random_state=1, alpha=.1, l1_ratio=.5)
            self.nmf.fit (X)        
        return self
    
    
    def transform (self, X, y=None, **fit_params):
        
        if type (X) == scipy.sparse.csr.csr_matrix:
            
            X = X.todense()
        X_pca = self.pca.transform (X)
        if self.isNMF :
            
            X_nmf = self.nmf.transform (X)
            if X.ndim==1 :

                X = np.concatenate ([X_pca, X_nmf])
            else:

                X = np.hstack ([X_pca, X_nmf])
        else:
            X = X_pca
        if self.isSparseOut and type (X) is np.ndarray:
            
            X = csr_matrix (X)
        return X
    
    def set_params (self, **parameters):
        for parameter, value in parameters.items ():
            setattr (self, parameter, value)
        return self

    def get_params (self, deep=True):
        params = {'n_components': self.n_components, 'C': self.C, 'whiten': self.whiten, 
                  'isNMF': self.isNMF, 'isSparseOut': self.isSparseOut}
        return params

In [9]:
class PCA_NMF_TrainTest_FeatureTransformer (BaseEstimator, TransformerMixin):
    
    def __init__(self, n_components=[0.90, 0.95, 0.99], C=[0.1, 1, 10], isNMF=True, whiten=True, isSparseOut=False):
        
        self.pca = None
        self.nmf = None
        self.n_components = n_components
        self.C   = C
        self.whiten = whiten
        self.isNMF = isNMF
        self.isSparseOut = isSparseOut
        return

    def fit (self, X, Y, **fit_params):
        
        if self.isNMF:
            print ('Find optimal PCA dims and the same no. of NMF features for X.shape =', X.shape)
        else:
            print ('Find optimal PCA dims for X.shape =', X.shape)
        if type (X) == scipy.sparse.csr.csr_matrix:
            X = X.todense()
        train_len = len (Y[pd.isnull(Y)==False])
        best_params_score = []
        for nc in self.n_components:
            
            pca = PCA (nc, whiten=self.whiten, random_state=0)
            X_pca = pca.fit_transform (X)        
            classifier = LinearSVC (penalty="l2", class_weight='balanced', random_state=0)
            param_grid = {'C' : self.C}
            gridSearchCV = GridSearchCV (classifier, param_grid, iid=False, cv=5, scoring=CV_SCORERS, 
                                         refit='balanced_accuracy_score', n_jobs=-1)
            gridSearchCV.fit (X_pca[:train_len], Y[:train_len])        # Does not automatically sets the params of the pca or pipe
            best_params_score.append (((nc, gridSearchCV.best_params_['C']), gridSearchCV.best_score_))
        best_params_score = max (best_params_score, key = lambda i : i[1])
        print ("--------------------- Best:  parameters =", best_params_score[0], ", CV =", best_params_score[1])
        
        # Now use the optimal params to fit the PCA
        self.pca = PCA (n_components=best_params_score[0][0], whiten=self.whiten, random_state=0)
        self.pca.fit (X)
        pcaDim = self.pca.transform (X[:2,:]).shape[1]
        # or simply use to get already fitted best estimator: self.pca = gridSearchCV.best_estimator_
        print("PCA dimensionality, explainedVarRatio = ", pcaDim, self.pca.n_components)
        # global RESULTS
        # RESULTS.append(benchmark(gridSearchCV.best_estimator_))
        
        if self.isNMF :
            self.nmf = NMF (n_components=pcaDim, random_state=1, alpha=.1, l1_ratio=.5)
            self.nmf.fit (X)        
        return self
    
    
    def transform (self, X, y=None, **fit_params):
        
        if type (X) == scipy.sparse.csr.csr_matrix:
            
            X = X.todense()
        X_pca = self.pca.transform (X)
        if self.isNMF :
            
            X_nmf = self.nmf.transform (X)
            if X.ndim==1 :

                X = np.concatenate ([X_pca, X_nmf])
            else:

                X = np.hstack ([X_pca, X_nmf])
        else:
            X = X_pca
        if self.isSparseOut and type (X) is np.ndarray:
            
            X = csr_matrix (X)
        return X
    
    def set_params (self, **parameters):
        for parameter, value in parameters.items ():
            setattr (self, parameter, value)
        return self

    def get_params (self, deep=True):
        params = {'n_components': self.n_components, 'C': self.C, 'whiten': self.whiten, 
                  'isNMF': self.isNMF, 'isSparseOut': self.isSparseOut}
        return params

# PCA is for dense matrices. For sparse matrices use SVD below or SparsePCA in scikit

In [10]:
class SVD_NMF_FeatureTransformer (BaseEstimator, TransformerMixin):
    
    def __init__(self, n_components=[0.001, 0.01, 0.1, 0.3], C=[0.1, 1, 10], isNMF=True, whiten=True, isSparseOut=False):
        
        self.svd = None
        self.nmf = None
        self.n_components = n_components
        self.C   = C
        self.isNMF = isNMF
        self.whiten = whiten
        self.isSparseOut = isSparseOut
        return

    def fit (self, X, Y, **fit_params):
        
        if self.isNMF:
            print ('Find optimal SVD dims and the same no. of NMF features for X.shape =', X.shape)
        else:
            print ('Find optimal SVD dims for X.shape =', X.shape)
        self.svd = TruncatedSVD (whiten=self.whiten, random_state=0)
        n_components = [int (i * X.shape[1]) for i in self.n_components if int (i * X.shape[1]) > 0]
        # if too many components then limit upto 3000 due to memory constraints
        if n_components[-1] > 3000:
            n_components = [100, 800, 2000, 3000]
        classifier = LinearSVC (penalty="l2", class_weight='balanced', random_state=0)
        pipeline   = Pipeline (steps=[('svd', self.svd), ('classifier', classifier)])
        param_grid = {
            'svd__n_components' : n_components,
            'classifier__C'     : self.C
        }
        gridSearchCV = GridSearchCV (pipeline, param_grid, iid=False, cv=5, scoring=CV_SCORERS, 
                                     refit='balanced_accuracy_score', n_jobs=-1)
        gridSearchCV.fit (X, Y)                # Does not automatically sets the params of the pca or pipe
        print ("Best parameter (CV score=%0.3f):" % gridSearchCV.best_score_)
        print (gridSearchCV.best_params_)
        
        # Now use the optimal params to fit the SVD
        self.svd = TruncatedSVD (n_components=gridSearchCV.best_params_['svd__n_components'], whiten=self.whiten, random_state=0)
        self.svd.fit (X)
        svdDim = self.svd.transform (X[:2,:]).shape[1]
        # or simply use to get already fitted best estimator: self.pca = gridSearchCV.best_estimator_
        print("SVD dimensionality, explainedVarRatio = ", svdDim, self.svd.explained_variance_ratio_.sum())
        # global RESULTS
        # RESULTS.append(benchmark(gridSearchCV.best_estimator_))
        
        if self.isNMF :
            
            self.nmf = NMF (n_components=svdDim, random_state=1, alpha=.1, l1_ratio=.5)
            self.nmf.fit (X)        
        return self
    
    
    def transform (self, X, y=None, **fit_params):
        
        X_svd = self.svd.transform (X)
        if self.isNMF:
            X_nmf = self.nmf.transform (X)
            if X.ndim==1 :

                X = np.concatenate ([X_svd, X_nmf])
            else:

                X = np.hstack ([X_svd, X_nmf])
        else:
            X = X_svd
        if self.isSparseOut and type (X) is np.ndarray:
            
            X = csr_matrix (X)
        return X
    
    def set_params (self, **parameters):
        for parameter, value in parameters.items ():
            setattr (self, parameter, value)
        return self

    def get_params (self, deep=True):
        params = {'n_components': self.n_components, 'C': self.C, 'whiten': self.whiten, 
                  'isNMF': self.isNMF, 'isSparseOut': self.isSparseOut}
        return params

In [11]:
class SVD_NMF_TrainTest_FeatureTransformer (BaseEstimator, TransformerMixin):
    
    def __init__(self, n_components=[0.001, 0.01, 0.1, 0.3], C=[0.1, 1, 10], isNMF=True, whiten=True, isSparseOut=False):
        
        self.svd = None
        self.nmf = None
        self.n_components = n_components
        self.C   = C
        self.isNMF = isNMF
        self.whiten = whiten
        self.isSparseOut = isSparseOut
        return

    def fit (self, X, Y, **fit_params):
        
        if self.isNMF:
            print ('Find optimal SVD dims and the same no. of NMF features for X.shape =', X.shape)
        else:
            print ('Find optimal SVD dims for X.shape =', X.shape)
        n_components = [int (i * X.shape[1]) for i in self.n_components if int (i * X.shape[1]) > 0]
        # if too many components then limit upto 3000 due to memory constraints
        if n_components[-1] > 3000:
            n_components = [100, 800, 2000, 3000]
        train_len = len (Y[pd.isnull(Y)==False])
        best_params_score = []
        for nc in n_components:
            
            svd = TruncatedSVD (nc, whiten=self.whiten, random_state=0)
            X_svd = svd.fit_transform (X)        
            classifier = LinearSVC (penalty="l2", class_weight='balanced', random_state=0)
            param_grid = {'C' : self.C}
            gridSearchCV = GridSearchCV (classifier, param_grid, iid=False, cv=5, scoring=CV_SCORERS, 
                                         refit='balanced_accuracy_score', n_jobs=-1)
            gridSearchCV.fit (X_svd[:train_len], Y[:train_len])        # Does not automatically sets the params of the pca or pipe
            best_params_score.append (((nc, gridSearchCV.best_params_['C']), gridSearchCV.best_score_))
        best_params_score = max (best_params_score, key = lambda i : i[1])
        print ("--------------------- Best:  parameters =", best_params_score[0], ", CV =", best_params_score[1])
        
        # Now use the optimal params to fit the SVD
        self.svd = TruncatedSVD (n_components=best_params_score[0][0], whiten=self.whiten, random_state=0)
        self.svd.fit (X)
        svdDim = self.svd.transform (X[:2,:]).shape[1]
        # or simply use to get already fitted best estimator: self.pca = gridSearchCV.best_estimator_
        print("SVD dimensionality, explainedVarRatio = ", svdDim, self.svd.explained_variance_ratio_.sum())
        # global RESULTS
        # RESULTS.append(benchmark(gridSearchCV.best_estimator_))
        
        if self.isNMF :
            
            self.nmf = NMF (n_components=svdDim, random_state=1, alpha=.1, l1_ratio=.5)
            self.nmf.fit (X)        
        return self
    
    
    def transform (self, X, y=None, **fit_params):
        
        X_svd = self.svd.transform (X)
        if self.isNMF:
            X_nmf = self.nmf.transform (X)
            if X.ndim==1 :

                X = np.concatenate ([X_svd, X_nmf])
            else:

                X = np.hstack ([X_svd, X_nmf])
        else:
            X = X_svd
        if self.isSparseOut and type (X) is np.ndarray:
            
            X = csr_matrix (X)
        return X
    
    def set_params (self, **parameters):
        for parameter, value in parameters.items ():
            setattr (self, parameter, value)
        return self

    def get_params (self, deep=True):
        params = {'n_components': self.n_components, 'C': self.C, 'whiten': self.whiten, 
                  'isNMF': self.isNMF, 'isSparseOut': self.isSparseOut}
        return params

In [12]:
class ICA_FeatureTransformer (BaseEstimator, TransformerMixin):
    
    def __init__(self, n_components=[0.1, 0.8], C=[0.1, 1, 10], whiten=True, isSparseOut=False):
        
        self.ica = None
        self.n_components = n_components
        self.C = C
        self.whiten = whiten
        self.isSparseOut = isSparseOut
        return

    def fit (self, X, Y, **fit_params):
        
        print ('ICA_FeatureTransformer: type(X), X.shape =', type(X), X.shape)
        self.ica = FastICA (random_state=0, whiten=self.whiten)
        classifier = LinearSVC (penalty="l2", class_weight='balanced', random_state=0)
        pipeline = Pipeline (steps=[('ica', self.ica), ('classifier', classifier)])
        n_components = [int (i* X.shape[1]) for i in self.n_components if int (i* X.shape[1]) > 0]
        if not n_components:
            n_components = [2]
        param_grid = {
            'ica__n_components' : n_components,
            'classifier__C'     : self.C
        }
        if type (X) == scipy.sparse.csr.csr_matrix:
            
            X = X.todense ()
        gridSearchCV = GridSearchCV (pipeline, param_grid, iid=False, cv=5, scoring=CV_SCORERS, 
                                     refit='balanced_accuracy_score', n_jobs=-1)
        gridSearchCV.fit (X, Y)  # Does not automatically sets the params of the pca or pipe
        print ("Best parameter (CV score=%0.3f):" % gridSearchCV.best_score_)
        print (gridSearchCV.best_params_)
        
        # Now use the optimal params to fit the PCA
        self.ica = FastICA (n_components=gridSearchCV.best_params_['ica__n_components'], whiten=self.whiten, random_state=0)
        self.ica.fit (X)
        icaDim = self.ica.transform (X[:2,:]).shape[1]
        # or simply use to get already fitted best estimator: self.pca = gridSearchCV.best_estimator_
        print ("ICA dimensionality, explainedVarRaio = ", icaDim, self.ica.n_components)
        # global RESULTS
        # RESULTS.append (benchmark (gridSearchCV.best_estimator_))
        return self
    
    
    def transform (self, X, y=None, **fit_params):
        
        if type (X) == scipy.sparse.csr.csr_matrix:
            X = X.todense ()
        X = self.ica.transform (X)
        if self.isSparseOut and type (X) is np.ndarray:
            
            X = csr_matrix (X)
        return X
    
    def set_params (self, **parameters):
        for parameter, value in parameters.items ():
            setattr (self, parameter, value)
        return self

    def get_params (self, deep=True):
        params = {'n_components': self.n_components, 'C': self.C, 'whiten': self.whiten, 'isSparseOut': self.isSparseOut}
        return params

In [13]:
class ICA_TrainTest_FeatureTransformer (BaseEstimator, TransformerMixin):
    
    def __init__(self, n_components=[0.8, 0.9], C=[0.1, 1, 10], whiten=True, isSparseOut=False):
        
        self.ica = None
        self.n_components = n_components
        self.C = C
        self.whiten = whiten
        self.isSparseOut = isSparseOut
        return

    def fit (self, X, Y, **fit_params):
        
        print ('ICA_FeatureTransformer: type(X), X.shape =', type(X), X.shape)
        if type (X) == scipy.sparse.csr.csr_matrix:
            
            X = X.todense ()
        n_components = [int (i* X.shape[1]) for i in self.n_components if int (i* X.shape[1]) > 0]
        if not n_components:
            n_components = [2]
        train_len = len (Y[pd.isnull(Y)==False])
        best_params_score = []
        for nc in n_components:
            
            ica = FastICA (nc, random_state=0, whiten=self.whiten)
            X_ica = ica.fit_transform (X)        
            classifier = LinearSVC (penalty="l2", class_weight='balanced', random_state=0)
            param_grid = {'C' : self.C}
            gridSearchCV = GridSearchCV (classifier, param_grid, iid=False, cv=5, scoring=CV_SCORERS, 
                                         refit='balanced_accuracy_score', n_jobs=-1)
            gridSearchCV.fit (X_ica[:train_len], Y[:train_len])        # Does not automatically sets the params of the pca or pipe
            best_params_score.append (((nc, gridSearchCV.best_params_['C']), gridSearchCV.best_score_))
        best_params_score = max (best_params_score, key = lambda i : i[1])
        print ("--------------------- Best:  parameters =", best_params_score[0], ", CV =", best_params_score[1])
        
        # Now use the optimal params to fit the PCA
        self.ica = FastICA (n_components=best_params_score[0][0], whiten=self.whiten, random_state=0)
        self.ica.fit (X)
        icaDim = self.ica.transform (X[:2,:]).shape[1]
        # or simply use to get already fitted best estimator: self.pca = gridSearchCV.best_estimator_
        print ("ICA dimensionality, explainedVarRaio = ", icaDim, self.ica.n_components)
        # global RESULTS
        # RESULTS.append (benchmark (gridSearchCV.best_estimator_))
        return self
    
    
    def transform (self, X, y=None, **fit_params):
        
        if type (X) == scipy.sparse.csr.csr_matrix:
            X = X.todense ()
        X = self.ica.transform (X)
        if self.isSparseOut and type (X) is np.ndarray:
            
            X = csr_matrix (X)
        return X
    
    def set_params (self, **parameters):
        for parameter, value in parameters.items ():
            setattr (self, parameter, value)
        return self

    def get_params (self, deep=True):
        params = {'n_components': self.n_components, 'C': self.C, 'whiten': self.whiten, 'isSparseOut': self.isSparseOut}
        return params

# Kernel PCA features
The KPCA does an unsupervised dimensionality reduction, while the logistic regression does the prediction. We use a GridSearchCV to set the dimensionality of the KPCA

In [14]:
class KPCA_FeatureTransformer (BaseEstimator, TransformerMixin):
    
    def __init__(self, kernel=['linear', 'poly', 'rbf', 'cosine'], C=[0.1, 1, 10], isSparseOut=False):
        
        self.kpca   = None
        self.kernel = kernel 
        self.C = C
        self.isSparseOut = isSparseOut
        return

    def fit (self, X, Y, **fit_params):
        
        print ('Kernel PCA features for X.shape =', X.shape)
        self.kpca   = KernelPCA (remove_zero_eig=True, random_state=0)
        classifier = LinearSVC (penalty="l2", class_weight='balanced', random_state=0)
        pipeline = Pipeline (steps=[('kpca', self.kpca), ('classifier', classifier)])
        param_grid = {
            'kpca__kernel': self.kernel,
            'classifier__C' : self.C
        }        
        gridSearchCV = GridSearchCV (pipeline, param_grid, iid=False, cv=5, scoring=CV_SCORERS, 
                                     refit='balanced_accuracy_score', n_jobs=-1)
        gridSearchCV.fit (X, Y)       # Does not automatically sets the params of the pca
        print ("Best parameter (CV score=%0.3f):" % gridSearchCV.best_score_)
        print (gridSearchCV.best_params_)
        
        # Now use the optimal params to fit the KPCA
        self.kpca = KernelPCA (kernel=gridSearchCV.best_params_['kpca__kernel'], remove_zero_eig=True, random_state=0)
        self.kpca.fit (X)
        kpcaDim = self.kpca.transform (X[:2,:]).shape[1]
        print ('KPCA dim =', kpcaDim)
        # or simply use to get already fitted best estimator: self.kpca = gridSearchCV.best_estimator_
        # global RESULTS
        # RESULTS.append(benchmark(gridSearchCV.best_estimator_))
        return self
    
    
    def transform (self, X, y=None, **fit_params):
        """
        X = sparse matrix
        """
        
        X = self.kpca.transform (X)        
        if self.isSparseOut and type (X) is np.ndarray:
            
            X = csr_matrix (X)
        return X
    
    def set_params (self, **parameters):
        for parameter, value in parameters.items ():
            setattr (self, parameter, value)
        return self

    def get_params (self, deep=True):
        params = {'kernel': self.kernel, 'C': self.C, 'isSparseOut': self.isSparseOut}
        return params

In [15]:
class KPCA_TrainTest_FeatureTransformer (BaseEstimator, TransformerMixin):
    
    def __init__(self, kernel=['linear', 'poly', 'rbf', 'cosine'], C=[0.1, 1, 10], isSparseOut=False):
        
        self.kpca   = None
        self.kernel = kernel 
        self.C = C
        self.isSparseOut = isSparseOut
        return

    def fit (self, X, Y, **fit_params):
        
        print ('Kernel PCA features for X.shape =', X.shape)
        train_len = len (Y[pd.isnull(Y)==False])
        best_params_score = []
        for k in self.kernel:
            
            kpca = KernelPCA (kernel=k, random_state=0)
            X_kpca = kpca.fit_transform (X)        
            classifier = LinearSVC (penalty="l2", class_weight='balanced', random_state=0)
            param_grid = {'C' : self.C}
            gridSearchCV = GridSearchCV (classifier, param_grid, iid=False, cv=5, scoring=CV_SCORERS, 
                                         refit='balanced_accuracy_score', n_jobs=-1)
            gridSearchCV.fit (X_kpca[:train_len], Y[:train_len])        # Does not automatically sets the params of the pca or pipe
            best_params_score.append (((k, gridSearchCV.best_params_['C']), gridSearchCV.best_score_))
        best_params_score = max (best_params_score, key = lambda i : i[1])
        print ("--------------------- Best:  parameters =", best_params_score[0], ", CV =", best_params_score[1])
        
        # Now use the optimal params to fit the KPCA
        self.kpca = KernelPCA (kernel=best_params_score[0][0], random_state=0)
        self.kpca.fit (X)
        kpcaDim = self.kpca.transform (X[:2,:]).shape[1]
        print ('KPCA dim =', kpcaDim)
        # or simply use to get already fitted best estimator: self.kpca = gridSearchCV.best_estimator_
        # global RESULTS
        # RESULTS.append(benchmark(gridSearchCV.best_estimator_))
        return self
    
    
    def transform (self, X, y=None, **fit_params):
        """
        X = sparse matrix
        """
        
        X = self.kpca.transform (X)        
        if self.isSparseOut and type (X) is np.ndarray:
            
            X = csr_matrix (X)
        return X
    
    def set_params (self, **parameters):
        for parameter, value in parameters.items ():
            setattr (self, parameter, value)
        return self

    def get_params (self, deep=True):
        params = {'kernel': self.kernel, 'C': self.C, 'isSparseOut': self.isSparseOut}
        return params

# Latent Dirichlet Allocation Features
The LDA does an unsupervised dimensionality reduction, while the logistic regression does the prediction. We use a GridSearchCV to set the dimensionality of the LDA

In [16]:
class LDA_FeatureTransformer (BaseEstimator, TransformerMixin):
    
    def __init__(self, n_components=[0.05, 0.1], C=[0.1, 1, 10]):
        
        self.lda = None
        self.C = C
        self.n_components = n_components
        return

    def fit (self, X, Y, **fit_params):
        
        print ('LDA Features for X.shape =', X.shape)
        self.lda          = LatentDirichletAllocation (max_iter=6, learning_offset=50.0, random_state=0)
        # convert fractions to int feature count
        n_components = ((np.array(self.n_components) * min (X.shape[0],X.shape[1])).astype (int)).tolist ()
        n_components = [nc for nc in n_components if nc>0]
        if not n_components:
            n_components = [2]
        classifier = LinearSVC (penalty="l2", class_weight='balanced', random_state=0)
        pipeline = Pipeline (steps=[('LDA', self.lda), ('classifier', classifier)])
        param_grid = {'LDA__n_components' : n_components, 'classifier__C': self.C}
        gridSearchCV = GridSearchCV (pipeline, param_grid, iid=False, cv=5, scoring=CV_SCORERS, 
                                     refit='balanced_accuracy_score', n_jobs=-1)
        gridSearchCV.fit (X, Y)              # Does not automatically sets the params of the pca
        print ("Best parameter (CV score=%0.3f):" % gridSearchCV.best_score_)
        print (gridSearchCV.best_params_)
        
        # Now use the optimal params to fit the LDA
        n_components = gridSearchCV.best_params_['LDA__n_components']
        self.lda     = LatentDirichletAllocation (n_components=n_components, max_iter=5, learning_offset=50., random_state=0)
        self.lda.fit (X) 
        # or simply use to get already fitted best estimator: self.lda = gridSearchCV.best_estimator_
        # global RESULTS
        # RESULTS.append(benchmark(gridSearchCV.best_estimator_))
        return self
    
    
    def transform (self, X, y=None, **fit_params):
        
        return self.lda.transform (X)
    
    def set_params (self, **parameters):
        for parameter, value in parameters.items ():
            setattr (self, parameter, value)
        return self

    def get_params (self, deep=True):
        params = {'n_components': self.n_components, 'C': self.C}
        return params

In [17]:
class LDA_TrainTest_FeatureTransformer (BaseEstimator, TransformerMixin):
    
    def __init__(self, n_components=[0.05, 0.2], C=[0.1, 1, 10]):
        
        self.lda          = None
        self.n_components = n_components
        self.C = C
        return

    def fit (self, X, Y, **fit_params):
        
        print ('LDA Features for X.shape =', X.shape)
        # convert fractions to int feature count
        n_components = ((np.array(self.n_components) * min (X.shape[0],X.shape[1])).astype (int)).tolist ()
        n_components = [nc for nc in n_components if nc>0]
        if not n_components:
            n_components = [2]
        train_len = len (Y[pd.isnull(Y)==False])
        best_params_score = []
        for nc in n_components:
            
            lda = LatentDirichletAllocation (max_iter=6, learning_offset=50.0, random_state=0)
            X_lda = lda.fit_transform (X)        
            classifier = LinearSVC (penalty="l2", class_weight='balanced', random_state=0)
            param_grid = {'C' : self.C}
            gridSearchCV = GridSearchCV (classifier, param_grid, iid=False, cv=5, scoring=CV_SCORERS, 
                                         refit='balanced_accuracy_score', n_jobs=-1)
            gridSearchCV.fit (X_lda[:train_len], Y[:train_len])        # Does not automatically sets the params of the pca or pipe
            best_params_score.append (((nc, gridSearchCV.best_params_['C']), gridSearchCV.best_score_))
        best_params_score = max (best_params_score, key = lambda i : i[1])
        print ("--------------------- Best:  parameters =", best_params_score[0], ", CV =", best_params_score[1])
        
        # Now use the optimal params to fit the LDA
        n_components = best_params_score[0][0]
        self.lda     = LatentDirichletAllocation (n_components=n_components, max_iter=5, learning_offset=50., random_state=0)
        self.lda.fit (X) 
        # or simply use to get already fitted best estimator: self.lda = gridSearchCV.best_estimator_
        # global RESULTS
        # RESULTS.append(benchmark(gridSearchCV.best_estimator_))
        return self
    
    
    def transform (self, X, y=None, **fit_params):
        
        return self.lda.transform (X)
    
    def set_params (self, **parameters):
        for parameter, value in parameters.items ():
            setattr (self, parameter, value)
        return self

    def get_params (self, deep=True):
        params = {'n_components': self.n_components, 'C': self.C}
        return params

# Find K-best features based on Mutual-Info / F-Score / Chi^2
Use K-best feature selection with logistic regression classifier in GridSearchCV to find optimal val of K

In [19]:
class SelectKBest_feature_selector (BaseEstimator, TransformerMixin):
    
    def __init__(self, n_components=[0.75, 0.80, 0.90, 0.95, 1.0], score_func=[chi2, mutual_info_classif], C=[0.001, 0.005, 0.01, 0.1, 1, 10]):
        
        self.score_func   = score_func
        self.n_components = n_components
        self.selectKBest  = None
        self.C = C
        return

    def fit (self, X, Y, **fit_params):
        
        train_len = len (Y[pd.isnull(Y)==False])
        X, Y = X[:train_len], Y[:train_len]
        print ('Find K-best features based on Mutual-Info / Chi^2 for X.shape =', X.shape)
        self.selectKBest  = SelectKBest ()
        # this works only when X >= 0, hence shift by a constant so that it is >=0
        if X.min () < 0:
            
            shift_k = np.abs (X.min ())
            X[X.nonzero ()] = X[X.nonzero ()] + shift_k
        # convert fractions to int feature count
        n_components = ((np.array(self.n_components) * X.shape[1]).astype (int)).tolist ()
        n_components = [nc for nc in n_components if nc>0]
        if not n_components:
            n_components = [2]
        lsvc = LinearSVC (penalty="l2", class_weight='balanced', random_state=0)  
        pipeline = Pipeline (steps=[('selectKBest', self.selectKBest), ('classifier', lsvc)])
        param_grid = {            
            'selectKBest__k'          : n_components,
            'selectKBest__score_func' : self.score_func,
            'classifier__C'           : self.C
        }
        gridSearchCV = GridSearchCV (pipeline, param_grid, iid=False, cv=5, scoring=CV_SCORERS, 
                                     refit='balanced_accuracy_score', n_jobs=-1)
        gridSearchCV.fit (X, Y)  # Does not automatically sets the params of the models oe pipeline
        print ("--------------------- Best parameter (CV score=%0.3f):" % gridSearchCV.best_score_)
        print (gridSearchCV.best_params_)
        
        # Now use the optimal params to fit the model
        k          = gridSearchCV.best_params_['selectKBest__k']
        score_func = gridSearchCV.best_params_['selectKBest__score_func']
        # print(gridSearchCV.grid_scores_)
        # Now create the model with the best params and fit over the data
        self.selectKBest = SelectKBest (score_func, k)
        self.selectKBest.fit (X, Y)
        # or simply use to get already fitted best estimator: self.selectKBest = gridSearchCV.best_estimator_
        # global RESULTS
        # RESULTS.append(benchmark(gridSearchCV.best_estimator_))
        return self
    
    def transform (self, X, y=None, **fit_params):
        
        return self.selectKBest.transform (X)
    
    def set_params (self, **parameters):
        for parameter, value in parameters.items ():
            setattr (self, parameter, value)
        return self

    def get_params (self, deep=True):
        params = {'n_components': self.n_components, 'score_func': self.score_func, 'C': self.C}
        return params

# Through L1-regularized SVM similar to Lasso, identify the useful sparse features

In [20]:
class SparseSVM_feature_selector (BaseEstimator, TransformerMixin):
    
    def __init__(self, tol=[1e-3, 1e-4, 1e-5, 1e-6], C=[0.001, 0.005, 0.01, 0.1, 1, 10]): 
        
        self.tol = tol
        self.C   = C
        self.sfm = None
        return

    def fit (self, X, Y, **fit_params):
        
        train_len = len (Y[pd.isnull(Y)==False])
        X, Y = X[:train_len], Y[:train_len]
        print ('LinearSVC with L1-based feature selection for X.shape =', X.shape)
        # The smaller C, the stronger the regularization.
        # The more regularization, the more sparsity.
        pipeline = Pipeline ([
          ('feature_selection', SelectFromModel (LinearSVC (penalty="l1", dual=False, class_weight='balanced', random_state=0))),
          ('classifier', LinearSVC (penalty="l2", class_weight='balanced', random_state=0))  ])
        param_grid = {'feature_selection__estimator__tol' : self.tol, 'classifier__C': self.C}
        gridSearchCV = GridSearchCV (pipeline, param_grid, iid=False, cv=5, scoring=CV_SCORERS, 
                                     refit='balanced_accuracy_score', n_jobs=-1)
        gridSearchCV.fit (X, Y)  # Does not automatically sets the params of the models
        print ("--------------------- Best parameter (CV score=%0.3f):" % gridSearchCV.best_score_)
        print (gridSearchCV.best_params_)
        
        # Now use the optimal params to fit the model
        tol = gridSearchCV.best_params_['feature_selection__estimator__tol']
        svc = LinearSVC (penalty="l1", dual=False, tol=tol, random_state=0)
        # svc.fit (X,Y)
        self.sfm = SelectFromModel (svc)
        self.sfm.fit (X, Y)
        # or simply use to get already fitted best estimator: self.sfm = gridSearchCV.best_estimator_
        print ('New #features = ', self.sfm.transform (X[:2,:]).shape[1] )
        # global RESULTS
        # RESULTS.append(benchmark(gridSearchCV.best_estimator_))
        return self
    
    def transform (self, X, y=None, **fit_params):
        
        return self.sfm.transform (X)
    
    def set_params (self, **parameters):
        for parameter, value in parameters.items ():
            setattr (self, parameter, value)
        return self

    def get_params (self, deep=True):
        params = {'tol': self.tol}
        return params

# Feature Selection based on clf.feature_importances_ with CV on threshold
clf = RandomForest / Xgboost / CatBoost
we use CV to determine threshold for SelectFromModel( rf_clf, threshold=? )

@param: clf: an already fitted(X,Y) clf.

In [21]:
class CLF_importance_feature_selector (BaseEstimator, TransformerMixin):
    
    def __init__(self, threshold=[1e-9, 1e-8, 1e-7]):
        
        self.clf             = None
        self.selectFromModel = None
        self.threshold       = threshold
        return

    def fit (self, X, Y, **fit_params):       
        
        train_len = len (Y[pd.isnull(Y)==False])
        X, Y = X[:train_len], Y[:train_len]
        print ('Feature Selection based on rf.feature_importances_ for X.shape =', X.shape)
        self.clf = RandomForestClassifier ()
        pipeline = Pipeline ([
          ('feature_selection', SelectFromModel (self.clf)),
          ('classification', LinearSVC (penalty="l2", class_weight='balanced', random_state=0))  ])
        param_grid = {'feature_selection__threshold' : self.threshold}        
        gridSearchCV = GridSearchCV (pipeline, param_grid, iid=False, cv=5, scoring=CV_SCORERS, 
                                     refit='balanced_accuracy_score', n_jobs=-1)
        gridSearchCV.fit (X, Y)  
        print ("--------------------- Best parameter (CV score=%0.3f):" % gridSearchCV.best_score_)
        print (gridSearchCV.best_params_)
        
        self.selectFromModel = SelectFromModel (self.clf, gridSearchCV.best_params_['feature_selection__threshold'])
        self.selectFromModel.fit (X, Y)
        clfDim = self.selectFromModel.transform (X[:2,:]).shape[1]        
        print ('clfDim = ', clfDim)                
        # global RESULTS
        # RESULTS.append (benchmark (gridSearchCV.best_estimator_))
        return self
        
    def transform (self, X, y=None, **fit_params):
        
        return self.selectFromModel.transform (X)
    
    def set_params (self, **parameters):
        for parameter, value in parameters.items ():
            setattr (self, parameter, value)
        return self

    def get_params (self, deep=True):
        params = {'threshold': self.threshold}
        return params

# Ada-Boosted Classifiers from a base clf after CV over boosting params

# ab_params     = { 'n_estimators'  :  50 }
# ab_param_grid = { 'learning_rate' : [0.5, 0.75, 1.0] }

def get_clf_adaBoosted_cv (X, Y, clf, params=None, param_grid=None):
    
    ab_clf = AdaBoostClassifier(base_estimator=clf)
    if params:
        ab_clf.set_params(**params)
    
    if param_grid:
        gridSearchCV = GridSearchCV (ab_clf, param_grid, iid=False, cv=5)
        gridSearchCV.fit (X, Y)  
        print ("get_clf_adaBoosted_cv(): Best parameter (CV score=%0.3f):" % gridSearchCV.best_score_)
        print (gridSearchCV.best_params_)
        ab_clf = gridSearchCV.best_estimator_
        global RESULTS
        RESULTS.append(benchmark(gridSearchCV.best_estimator_))
        # TODO: Plot scores for each split, and get its' variance
    
    return ab_clf

# Create a fully connected FCNN clf

In [22]:
class MyModule (nn.Module):
    
    def __init__(self, inputCount=1024, outputCount=2, hiddenLayerCounts=[128], drop_prob=0.2, nonlin=nn.ReLU ()):
        
        super(MyModule, self).__init__()

        self.nonlin  = nonlin
        self.dropout = nn.Dropout (drop_prob)
        
        self.dense1     = nn.Linear (inputCount, hiddenLayerCounts[0])
        self.batchnorm1 = nn.BatchNorm1d (hiddenLayerCounts[0])
        # self.dense2     = nn.Linear(hiddenLayerCounts[0], hiddenLayerCounts[1])
        # self.batchnorm2 = nn.BatchNorm1d (hiddenLayerCounts[1])
        # self.dense3     = nn.Linear(hiddenLayerCounts[1], hiddenLayerCounts[2])
        # self.batchnorm3 = nn.BatchNorm1d (hiddenLayerCounts[2])        
        self.outDense   = nn.Linear (hiddenLayerCounts[-1], outputCount)
        
        self.outActivtn = None
        if outputCount == 1 or outputCount == 2:
            self.outActivtn = nn.Sigmoid ()
        else:
            self.outActivtn = nn.Softmax (dim=-1)
        return

    def forward (self, X, **kwargs):
        
        X = self.dropout (self.nonlin (self.batchnorm1 (self.dense1 (X))))
        # X = self.dropout (self.nonlin (self.batchnorm2 (self.dense2 (X))))
        # X = self.dropout (self.nonlin (batchnorm3 (self.dense3 (X))))
        X = self.outActivtn (self.outDense (X))
        return X

# Get optimal Classifier after CV

In [23]:
class Best_clf_cv_transformer (BaseEstimator, TransformerMixin): 
            
    def __init__(self, myparams={'C':1}, **other_params):
        
        self.cv    = 5
        if 'cv' in myparams:
            self.cv= myparams['cv']
        clf        =  None
        name       = myparams['name']
        if name   == 'Logit':
            clf    =  LogisticRegression (random_state=0)
        elif name == 'DT':
            clf    =  DecisionTreeClassifier (random_state=0)
        elif name == 'RidgClf':
            clf    =  RidgeClassifier (random_state=0)
        elif name == 'Prcpt':
            clf    =  Perceptron (random_state=0)
        elif name == 'PssAggClf':
            clf    =  PassiveAggressiveClassifier (random_state=0)
        elif name == 'Knn':
            clf    =  KNeighborsClassifier (random_state=0)
        elif name == 'RF':
            clf    =  RandomForestClassifier (random_state=0)
        elif name == 'NearCent':
            clf    =  NearestCentroid (random_state=0)
        elif name == 'MultNB':
            clf    =  MultinomialNB (random_state=0)
        elif name == 'BernNB':
            clf    =  BernoulliNB (random_state=0)    
        elif name == 'Svc':
            clf    =  SVC (probability=True, random_state=0)
        elif name == 'LSvc':
            clf    =  LinearSVC (random_state=0)
        elif name == 'Xgb':
            clf    =  xgb.XGBClassifier (random_state=0) # XGBRFClassifier()
        elif name == 'Catb' :                            # issues with CV
            clf    =  CatBoostClassifier (verbose=False, random_state=0)
        elif name == 'FCNN':
            clf    =  None                               # a fully conn Neural net clf will be created at time of fitting
        else:
            print('ERROR Best_clf_cv_transformer: invalid @param name \n')
        self.isCV = True
        if 'isCV' in myparams:
            
            self.isCV = myparams['isCV']
        if 'params' in myparams:
            
            clf.set_params (**myparams['params'])
        if other_params:
            
            clf.set_params (**other_params)
        self.param_grid = None
        if 'param_grid' in myparams:
            
            self.param_grid = myparams['param_grid']
        self.myparams = myparams
        self.clf = clf
        self.cv_score = 0
        self.name = name
        self._estimator_type='classifier'
        return
    
    
    def fit (self, X, Y):
                
        train_len = len (Y[pd.isnull(Y)==False])
        X, Y = X[:train_len], Y[:train_len]
        print ('training', self.name, 'for X.shape =', X.shape)
        n_jobs = -1
        if self.name == 'FCNN':
            self.clf = NeuralNetClassifier (
                            module=MyModule,
                            module__inputCount=X.shape[1], 
                            module__outputCount=2, 
                            module__hiddenLayerCounts=[int (X.shape[1]/1)],
                            max_epochs=20,
                            lr=0.05,
                            # Shuffle training data on each epoch
                            iterator_train__shuffle=True,
            )
            X = X.astype (np.float32)
            Y = Y.astype (np.int64)
            n_jobs = None
            
        if self.isCV:
            if self.param_grid:

                gridSearchCV = GridSearchCV (
                    self.clf, self.param_grid, iid=False, cv=self.cv, scoring=CV_SCORERS, 
                    refit='balanced_accuracy_score', n_jobs=n_jobs
                )
                gridSearchCV.fit (X, Y)  
                print (self.name, ": Best_clf_cv_transformer: Best parameter (CV score=%0.3f):" % gridSearchCV.best_score_)
                print (gridSearchCV.best_params_)
                self.clf = gridSearchCV.best_estimator_
                self.cv_score = gridSearchCV.best_score_
                if self.name == 'LSvc':
                    
                    self.clf = CalibratedClassifierCV (self.clf)
                    self.clf.fit (X, Y)
                # global RESULTS
                # RESULTS.append(benchmark(self.clf))
                # TODO: Plot scores for each split, and get its' variance
            else:

                if self.name == 'LSvc':
                    
                    self.clf = CalibratedClassifierCV (self.clf)
                print (self.name, ': Best_clf_cv_transformer: starting CV =', self.cv)
                if self.name not in {'RF', 'Catb', 'FCNN'}:
                    
                    self.cv_score = cross_validate (self.clf, X, Y, self.cv)
                else:
                    
                    voting_clf = VotingClassifier (estimators=[(self.name, self.clf)])
                    cv_results = cross_validate (voting_clf, X, Y, cv=self.cv)
                    self.cv_score = np.mean (cv_results['test_score'])
                self.clf.fit (X, Y)
                print (self.name, ": cv_score:   %0.3f" % self.cv_score)
        else:
            if self.name == 'LSvc':
                self.clf = CalibratedClassifierCV (self.clf)
            self.clf.fit (X, Y)
        print ("Done Fitting", self.name)
        return self
    
    def get_cv_score (self):
        return self.cv_score
    
    def transform (self, X, Y=None, **fit_params):
        
        if self.name == 'FCNN':
            X = X.astype (np.float32)
            if not Y is None:
                Y = Y.astype (np.int64)        
        return self.clf.transform(X, Y)
    
    def predict (self, X, **fit_params):
        
        if self.name == 'FCNN':
            X = X.astype (np.float32)      
        return self.clf.predict(X)
    
    def predict_proba (self, X):
        
        if self.name == 'FCNN':
            X = X.astype (np.float32)      
        return self.clf.predict_proba (X)
    
    def predict_log_proba (self, X):
        
        if self.name == 'FCNN':
            X = X.astype (np.float32)
        return self.clf.predict_log_proba (X)
    
    def score (self, X, Y, **fit_params):
        
        if self.name == 'FCNN':
            X = X.astype (np.float32)
        return self.clf.score (X, Y, **fit_params)
    
    def decision_function (self, X, **fit_params):
        
        if self.name == 'FCNN':
            X = X.astype (np.float32)
        return self.clf.decision_function (X)
    
    def set_params (self, **params):
        
        myparams = params['myparams']
        self.cv    = 5
        if 'cv' in myparams:
            self.cv= myparams['cv']
        clf        =  None
        name       = myparams['name']
        if name   == 'Logit':
            clf    =  LogisticRegression (random_state=0)
        elif name == 'DT':
            clf    =  DecisionTreeClassifier (random_state=0)
        elif name == 'RidgClf':
            clf    =  RidgeClassifier (random_state=0)
        elif name == 'Prcpt':
            clf    =  Perceptron (random_state=0)
        elif name == 'PssAggClf':
            clf    =  PassiveAggressiveClassifier (random_state=0)
        elif name == 'Knn':
            clf    =  KNeighborsClassifier (random_state=0)
        elif name == 'RF':
            clf    =  RandomForestClassifier (random_state=0)
        elif name == 'NearCent':
            clf    =  NearestCentroid (random_state=0)
        elif name == 'MultNB':
            clf    =  MultinomialNB (random_state=0)
        elif name == 'BernNB':
            clf    =  BernoulliNB (random_state=0)    
        elif name == 'Svc':
            clf    =  SVC (probability=True, random_state=0)
        elif name == 'LSvc':
            clf    =  LinearSVC (random_state=0)
        elif name == 'Xgb':
            clf    =  xgb.XGBClassifier (random_state=0) # XGBRFClassifier()
        elif name == 'Catb':                             # issues with CV
            clf    =  CatBoostClassifier (verbose=False, random_state=0)
        elif name != 'FCNN':
            print('ERROR Best_clf_cv_transformer: invalid @param name \n')
        self.isCV = True
        if 'isCV' in myparams:
            
            self.isCV = myparams['isCV']
        if 'params' in myparams:
            
            clf.set_params (**myparams['params'])
        self.param_grid = None
        if 'param_grid' in myparams:
            
            self.param_grid = myparams['param_grid']
        self.myparams = myparams
        self.clf = clf
        self.cv_score = 0
        self.name = name
        self._estimator_type='classifier'
        return self
    
    def get_params (self, deep=True):
        # params = self.clf.get_params(deep)
        # params['myparams'] = self.myparams
        params = {'myparams': self.myparams}
        return params
    
    def apply (self, X):
        return self.clf.apply(X)
    
    def decision_path (self, X):
        return self.clf.decision_path (X)
    
    def staged_decision_function (self, X):
        return self.clf.staged_decision_function (X)
    
    def staged_predict (self, X):
        return self.clf.staged_predict (X)
    
    def staged_predict_proba (self, X):
        return self.clf.staged_predict_proba (X)
    
    def staged_score (self, X):
        return self.clf.staged_score (X)

# Get Bagging Classifier from a base clf, after CV on boosting params

# params     = { 'max_samples'  : 1.0,  'n_estimators' : 10 }
# param_grid = { 'max_features' : [0.7, 0.8, 0.9, 1.0] }

def get_bagging_clf_cv (X, Y, clf, params=None, param_grid=None): 
    
    bag_clf = BaggingClassifier(base_estimator=clf)
    if params:
        bag_clf.set_params(**params)
    
    if param_grid:
        gridSearchCV = GridSearchCV (bag_clf, param_grid, iid=False, cv=5)
        gridSearchCV.fit (X, Y)  
        print ("get_bagging_clf_cv(): Best parameter (CV score=%0.3f):" % gridSearchCV.best_score_)
        print (gridSearchCV.best_params_)
        bag_clf = gridSearchCV.best_estimator_
        global RESULTS
        RESULTS.append(benchmark(gridSearchCV.best_estimator_))
        # TODO: Plot scores for each split, and get its' variance
    
    return bag_clf

def getBaggedXGB_RESULTS(Xtrain, Xtest, Ytrain):
    
    param = {}
    param['booster'] = 'gbtree'
    param['objective'] = 'multi:softprob'
    param['num_class'] = 9
    param['eval_metric'] = 'logloss'
    param['scale_pos_weight'] = 1.0
    param['bst:eta'] = 0.3
    param['bst:max_depth'] = 6
    param['bst:colsample_bytree'] = 0.5
    param['silent'] = 1
    param['nthread'] = 16
    num_round = 100
    plst = list(param.items())
    watchlist = []
    
    time0 = time()
    idxTrain = range(len(Ytrain))
    Ytestxg  = np.zeros((Xtest.shape[0], 9))
    
    bgs = 2 # 20
    for bg in range(bgs):
        param['seed'] = bg + 1
        plst = list(param.items())
        
        newidxTrain = random.sample(idxTrain, int(len(idxTrain) * 1.0))
        
        for i in range(int(len(idxTrain) * 7.0)):
            newidxTrain.append(random.choice(idxTrain))
        
        Xdatatrain = xgb.DMatrix(data = Xtrain[newidxTrain], label = Ytrain[newidxTrain])
        Xdatatest = xgb.DMatrix(data = Xtest)
        
        bst = xgb.train(plst, Xdatatrain, num_round, watchlist)
        
        curpred = bst.predict(Xdatatest).reshape((Xtest.shape[0], 9))        
        Ytestxg += curpred
        
        print (bg, (time() - time0) / 60.)
        
    Ytestxg /= bgs
    return Ytestxg

Ytestxg = getBaggedXGB_RESULTS(X_TRAIN, X_TEST, Y_TRAIN)
Ytestxg

from sklearn.datasets import make_classification

X, y = make_classification(1000, 20, n_informative=10, random_state=0)
X = X.astype(np.float32)
y = y.astype(np.int64)

"""
clf = NeuralNetClassifier (
                module=MyModule,
                module__inputCount=X.shape[1], 
                module__outputCount=2, 
                module__hiddenLayerCounts=[int (X.shape[1]/4), int (X.shape[1]/8)],
                max_epochs=20,
                lr=0.1,
                # Shuffle training data on each epoch
                iterator_train__shuffle=True,
) """

clf = Best_clf_cv_transformer ({ 'name': 'FCNN'})
clf.fit (X, y)
y_proba = clf.predict_proba (X)
print (y_proba)
# cv_score = cross_validate (clf, X, y, 2)

# Very useful grid search CV strategy:

In [24]:
def grid_search_wrapper(X_train, y_train, refit_score='precision_score'):
    """
    fits a GridSearchCV classifier using refit_score for optimization
    prints classifier performance metrics
    """
    
    skf = StratifiedKFold (n_splits=10)
    grid_search = GridSearchCV (clf, param_grid, scoring=scorers, refit=refit_score,
                                cv=skf, return_train_score=True, n_jobs=-1)
    grid_search.fit (X_train.values, y_train.values)

    # make the predictions
    y_pred = grid_search.predict (X_test.values)

    print('Best params for {}'.format (refit_score))
    print(grid_search.best_params_)

    # confusion matrix on the test data.
    print ('\nConfusion matrix of Random Forest optimized for {} on the test data:'.format(refit_score))
    print (pd.DataFrame(confusion_matrix(y_test, y_pred), columns=['pred_neg', 'pred_pos'], index=['neg', 'pos']))
    
    # see all the scores for the param grid. Then you can also choose the combination form the grid which max ur score
    results = pd.DataFrame (grid_search.cv_results_)
    results = results.sort_values (by='mean_test_precision_score', ascending=False)
    print (results)
    return grid_search