### Objective 

- Testing the BaseModel class that will be loaded into a CrossValidator object  

In [8]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import MinMaxScaler
from src.config import MLCONFIG
from sklearn import datasets
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler

# basemodel class from model.py 
from src.model import BaseModel

In [9]:
# load some sample data
iris = datasets.load_iris() 
X = pd.DataFrame(iris.data[:, :2], columns=["a", "b"])
y = iris.target

In [10]:
# setup the hyperparameter grid
log_reg_param_grid = {
    "clf": [LogisticRegression(max_iter=1000, solver="liblinear")],
    # regularization param: higher C = less regularization
    "clf__C": [0.001, 0.01, 0.1, 1, 10, 100, 1000],
    # specifies kernel type to be used
    "clf__penalty": ["l1", "l2"]
}

In [11]:
# build the pipe
mm_scale = MinMaxScaler(feature_range=(0, 1))
mdl = BaseModel()

log_reg_pipe = Pipeline([("mm", mm_scale), ("clf", mdl)])

In [12]:
# perform a gridsearchCV 
log_reg_cv = GridSearchCV(
        estimator=log_reg_pipe,
        param_grid=log_reg_param_grid,
        scoring=MLCONFIG.BASE_SCORER,
        refit="F_score",
        cv=MLCONFIG.CV_SPLIT,
        return_train_score=True,
        n_jobs=-1,
        verbose=10,
    )

# fitting model
log_reg_cv.fit(X, y)

Fitting 5 folds for each of 14 candidates, totalling 70 fits
[CV 1/5; 1/14] START clf=LogisticRegression(max_iter=1000, solver='liblinear'), clf__C=0.001, clf__penalty=l1
[CV 2/5; 1/14] START clf=LogisticRegression(max_iter=1000, solver='liblinear'), clf__C=0.001, clf__penalty=l1
[CV 3/5; 1/14] START clf=LogisticRegression(max_iter=1000, solver='liblinear'), clf__C=0.001, clf__penalty=l1
[CV 1/5; 1/14] END clf=LogisticRegression(max_iter=1000, solver='liblinear'), clf__C=0.001, clf__penalty=l1; AUC: (train=0.500, test=0.500) F_score: (train=0.167, test=0.167) total time=   0.0s
[CV 4/5; 1/14] START clf=LogisticRegression(max_iter=1000, solver='liblinear'), clf__C=0.001, clf__penalty=l1
[CV 5/5; 1/14] START clf=LogisticRegression(max_iter=1000, solver='liblinear'), clf__C=0.001, clf__penalty=l1
[CV 1/5; 2/14] START clf=LogisticRegression(max_iter=1000, solver='liblinear'), clf__C=0.001, clf__penalty=l2
[CV 2/5; 1/14] END clf=LogisticRegression(max_iter=1000, solver='liblinear'), clf__C=

In [13]:
print(log_reg_cv.best_estimator_)
print(log_reg_cv.best_params_)

Pipeline(steps=[('mm', MinMaxScaler()),
                ('clf',
                 LogisticRegression(C=10, max_iter=1000, solver='liblinear'))])
{'clf': LogisticRegression(C=10, max_iter=1000, solver='liblinear'), 'clf__C': 10, 'clf__penalty': 'l2'}
