In [1]:
from sklearn.base import BaseEstimator  # Base class for all estimators in scikit-learn.
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.linear_model import SGDClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
import numpy as np

class BestClassifier(BaseEstimator):

    def __init__(self,X,y, models=["LogisticRegression", "LinearSVC","SGDClassifier",
                             "KNeighborsClassifier","GaussianNB","RandomForestClassifier"]):
        """
        A Custome BaseEstimator that chooses the best model and the best hyperparameters for a dataset
        """
        self.models = models
        self.parameters = {}
        self.parameters['LogisticRegression']={"penalty":["l1","l2"],"C":np.logspace(-4,2,10)}
        self.parameters['LinearSVC']={"C":np.logspace(-4,2,10), "penalty":["l1", "l2"]}
        self.parameters['SGDClassifier']={"penalty":["elasticnet"],"alpha":np.logspace(-4,2,10), "l1_ratio":np.linspace(0,1,10)} 
        self.parameters['KNeighborsClassifier']={"n_neighbors":[1,2,4,5,10.15,20],
                                                 "metric":["euclidean","minkowski","manhattan","chebyshev"]}
        self.parameters['GaussianNB']={'var_smoothing': np.logspace(0,-9, num=20)}
        self.parameters['RandomForestClassifier']={'n_estimators': [5,10,25],
                                                   'max_features': ['auto', 'sqrt', 'log2'],
                                                   'min_samples_split':[2,5,10,20,30],
                                                   'max_depth' : [None,5,10,25,50,100,250,500],
                                                   'criterion' :['gini', 'entropy']}
        self.decide(X,y)
        
        


    def grid(self,  X, y,classifier_type: str = 'LogisticRegression'):
        if classifier_type == 'LogisticRegression':
            self.classifier_ = LogisticRegression(max_iter=5000)
            search=GridSearchCV(LogisticRegression(max_iter=5000) , self.parameters['LogisticRegression'],
                                n_jobs=-1, cv=5,verbose=0)
            search.fit(X, y)
            self.classifier_ = LogisticRegression(max_iter=5000,**search.best_params_)

            
            
            
        elif classifier_type == 'LinearSVC':
            self.classifier_ = LinearSVC(max_iter=5000)
            search=GridSearchCV(self.classifier_ , self.parameters['LinearSVC'], n_jobs=-1, cv=5,verbose=0)
            search.fit(X, y)
            self.classifier_ = LinearSVC(max_iter=5000,**search.best_params_)

            
            
        elif classifier_type == 'SGDClassifier':
            self.classifier_ = SGDClassifier(max_iter=5000)
            search=GridSearchCV(self.classifier_ , self.parameters['SGDClassifier'], n_jobs=-1, cv=5,verbose=0)
            search.fit(X, y)
            self.classifier_ = SGDClassifier(max_iter=5000,**search.best_params_)

            
            
        elif classifier_type == 'KNeighborsClassifier':
            self.classifier_ = KNeighborsClassifier()
            search=GridSearchCV(self.classifier_ , self.parameters['KNeighborsClassifier'], n_jobs=-1, cv=5,verbose=0)
            search.fit(X, y)
            self.classifier_ = KNeighborsClassifier(**search.best_params_)

            
            
        elif classifier_type == 'GaussianNB':
            self.classifier_ = GaussianNB()
            search=GridSearchCV(self.classifier_ , self.parameters['GaussianNB'], n_jobs=-1, cv=5,verbose=0)
            search.fit(X, y)
            self.classifier_ = GaussianNB(**search.best_params_)

            
            
        elif classifier_type == 'RandomForestClassifier':
            self.classifier_ = RandomForestClassifier()
            search=GridSearchCV(self.classifier_ , self.parameters['RandomForestClassifier'], n_jobs=-1, cv=5,verbose=0)
            search.fit(X, y)
            self.classifier_ = RandomForestClassifier(**search.best_params_)
            
            
        else:
            raise ValueError('Unkown classifier type.')

        
        return self.classifier_.fit(X, y),search.best_score_
    
    
    
    
    def decide(self,X,y):
        
        score=0
        for model in self.models:
            clf,score_t=self.grid(X, y,model)
            if score < score_t:
                score=score_t
                print("New one:"+model)
                self.bestclassifier_=clf
        
        
    
    def fit(self, X, y=None):
        return self.bestclassifier_.fit(X, y)
    
    def predict(self, X, y=None):
        return self.bestclassifier_.predict(X)
    
    def predict_proba(self, X):
        return self.bestclassifier_.predict_proba(X)


    def score(self, X, y):
        return self.bestclassifier_.score(X, y)

In [2]:
from sklearn.datasets import load_iris
X, y = load_iris(return_X_y=True)

In [4]:
clf=BestClassifier(X,y)

        nan 0.86              nan 0.93333333        nan 0.96
        nan 0.97333333        nan 0.97333333        nan 0.98
        nan 0.98      ]
        nan 0.73333333        nan 0.94              nan 0.95333333
        nan 0.96666667        nan 0.96666667        nan 0.97333333
        nan 0.96666667]


New one:LogisticRegression




New one:SGDClassifier


 0.96       0.94666667 0.97333333 0.97333333        nan 0.96
 0.96       0.93333333 0.95333333 0.96              nan 0.97333333
 0.96       0.96       0.97333333 0.98666667        nan 0.92666667]


In [5]:
clf.bestclassifier_

SGDClassifier(alpha=0.01, l1_ratio=0.8888888888888888, max_iter=5000,
              penalty='elasticnet')

In [6]:
clf.score(X,y)

0.9733333333333334