 # Import Modules

In [None]:
import logging
import warnings

import pandas as pd
from sklearn import svm
from sklearn.base import BaseEstimator
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_validate
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

from {{cookiecutter.repo_name}} import pipelines
from {{cookiecutter.repo_name}}.utils import mlflow

logging.basicConfig(level=logging.WARN)
logger = logging.getLogger(__name__)
warnings.filterwarnings("ignore")

 # Load Data

In [None]:
df = pipelines.make_dataset()
X, y = df.drop('species', axis=1), df.species
df.head()

 # Select Model

 ## Set Validation Code

In [None]:
def validate(X, y, model: BaseEstimator):
    score_metrics = ['accuracy', 'precision_macro', 'recall_macro', 'f1_macro']
    scoring = dict([(s, s) for s in score_metrics])
    score_metrics = ['test_' + metric for metric in score_metrics]
    scores = cross_validate(model, X, y, cv=5, scoring=scoring)
    return dict([(metric, scores[metric]) for metric in score_metrics])

 ## Set Logging Code

In [None]:
def log(model: BaseEstimator,
        X,
        y,
        model_name,
        parameters: dict = {},
        draw=False):
    with mlflow.start_run(run_name=model_name):
        results = validate(X, y, model)
        for param, value in parameters.items():
            mlflow.log_param(param, value)
        mlflow.log_param('model', model_name)
        for metric, values in results.items():
            mlflow.log_metric(metric, values.mean())
        mlflow.sklearn.log_model(model, "model")
    if draw:
        pd.DataFrame(results).plot(figsize=(10, 5), kind='box')

 ## Test Models

 ### Logistic Regeression

In [None]:
parameters = dict(max_iter=1e4)
pipe = make_pipeline(StandardScaler(), LogisticRegression(**parameters))
model_name = pipe[-1].__class__.__name__
log(pipe, X, y, model_name, parameters)

 ### Linear Suport Vector Machine

In [None]:
parameters = dict(dual=True, max_iter=1e4)
pipe = make_pipeline(StandardScaler(), svm.LinearSVC(**parameters))
model_name = pipe[-1].__class__.__name__
log(pipe, X, y, model_name, parameters)