Steps:
Load and preprocess data (bronze --> gold)
Use TPOT to find the optimal pipeline with cross validation (try simple_pipeline = True to avoid using StackingEstimator and CombineDFs)
Get the best pipeline from #2 as a variable

For every iteration to fit the best pipeline again to the whole training data:
- Set an experiment with ID by `EXPERIMENT_ID = mlflow.create_experiment(EXPERIMENT_NAME)`, 
- Start MLFlow run with `with mlflow.start_run(experiment_id=EXPERIMENT_ID) as run` 
- Get the prediction on the test set
- Evaluate metrics on the test set
- Log these metrics with mlflow.log_metric
- Log params with `mlflow.log_param`
- Log the model with  mlflow.sklearn.log_model(<the fitted classifier>, "classifier")

Resources:
- How to get all TPOT pipelines info and arrange them: https://github.com/EpistasisLab/tpot/issues/703


In [47]:
import mlflow
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from tpot import TPOTClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import itertools

In [2]:
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=123
)

In [3]:
tpot_config = {
    'sklearn.neighbors.KNeighborsClassifier': {
        'n_neighbors': range(1, 21),
        'weights': ["uniform", "distance"],
        'p': [1, 2]
    },

    'sklearn.svm.LinearSVC': {
        'penalty': ["l1", "l2"],
        'loss': ["hinge", "squared_hinge"],
        'dual': [True, False],
        'tol': [1e-5, 1e-4, 1e-3, 1e-2, 1e-1],
        'C': [1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1., 5., 10., 15., 20., 25.]
    },

    'sklearn.linear_model.LogisticRegression': {
        'penalty': ["l1", "l2"],
        'C': [1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1., 5., 10., 15., 20., 25.],
        'dual': [True, False]
    },
}

In [104]:
EXPERIMENT_NAME = "tpot-mlflow-iris-003"
EXPERIMENT_ID = mlflow.create_experiment(EXPERIMENT_NAME)
# use this to search for experiments with their names, id, runs...
mlflow.search_experiments()

[<Experiment: artifact_location='file:///c:/Users/JOSHUALE/Documents/Github/mlflow_tutorial/mlruns/461405648496883018', creation_time=1696917546571, experiment_id='461405648496883018', last_update_time=1696917546571, lifecycle_stage='active', name='tpot-mlflow-iris-003', tags={}>,
 <Experiment: artifact_location='file:///c:/Users/JOSHUALE/Documents/Github/mlflow_tutorial/mlruns/230552761396043462', creation_time=1696905495271, experiment_id='230552761396043462', last_update_time=1696905495271, lifecycle_stage='active', name='tpot-mlflow-iris-002', tags={}>,
 <Experiment: artifact_location='file:///c:/Users/JOSHUALE/Documents/Github/mlflow_tutorial/mlruns/0', creation_time=1696905185290, experiment_id='0', last_update_time=1696905185290, lifecycle_stage='active', name='Default', tags={}>]

In [102]:
GENERATIONS = [4, 6, 8, 10]
POPULATION_SIZE = 100
SCORING = 'accuracy'
RANDOM_STATE = 99
# Scoring metrics:
#['accuracy', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy',
            # 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted',
            # 'precision', 'precision_macro', 'precision_micro', 'precision_samples',
            # 'precision_weighted', 'recall', 'recall_macro', 'recall_micro',
            # 'recall_samples', 'recall_weighted', 'roc_auc']

# Initializing our TPOT pipeline optimizer


In [89]:
# pipeline_params = pipeline_optimizer.__dict__
# print(pipeline_params)
# config_dict = pipeline_params.pop('config_dict', None)
# print(config_dict)
# print(len(str(pipeline_params)))
# print(len(str(config_dict)))

In [103]:
for GENERATION in GENERATIONS:
    pipeline_optimizer = TPOTClassifier(generations=GENERATION, 
                                    population_size=POPULATION_SIZE, 
                                    verbosity=2,
                                    config_dict=tpot_config,
                                    random_state=RANDOM_STATE,
                                    scoring=SCORING,
                                    )
    # Start MLflow run by logging to the same experiment ID
    EXPERIMENT_ID = '230552761396043462'
    with mlflow.start_run(experiment_id=EXPERIMENT_ID) as run:
        # Retrieve run id
        RUN_ID = run.info.run_id
        # fit the pipeline with current config:
        pipeline_optimizer.fit(features=X_train, target=y_train)
        fitted_pipeline = pipeline_optimizer.fitted_pipeline_

        # predict on test set
        y_pred = fitted_pipeline.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)

        # Track pipeline parameters
        pipeline_params = pipeline_optimizer.__dict__
        # print(pipeline_params)
        config_dict = pipeline_params.pop('config_dict', None)
        print(len(str(pipeline_params)))
        print(len(str(config_dict)))
        # mlflow.log_param('tpot_pipeline_params', pipeline_params) # below 500 chars
        mlflow.set_tag('tpot_config_dict', config_dict) # above 500 chars
        mlflow.log_param("tpot_generation", GENERATION)
        mlflow.log_param("tpot_population_size", POPULATION_SIZE)
        mlflow.log_param("tpot_scoring", SCORING)
        mlflow.log_param("tpot_random_state", RANDOM_STATE)
        # track best pipeline params:
        mlflow.log_param("best_pipeline", fitted_pipeline.steps)
        # Track metrics
        mlflow.log_metric("accuracy", accuracy)

        # Track model
        mlflow.sklearn.log_model(fitted_pipeline, "classifier")

                                                                              
Generation 1 - Current best internal CV score: 0.975
                                                                              
Generation 2 - Current best internal CV score: 0.975
                                                                              
Generation 3 - Current best internal CV score: 0.975
                                                                              
Generation 4 - Current best internal CV score: 0.975
                                                                              
Best pipeline: LogisticRegression(input_matrix, C=25.0, dual=False, penalty=l2)
186405
512




                                                                              
Generation 1 - Current best internal CV score: 0.975
                                                                              
Generation 2 - Current best internal CV score: 0.975
                                                                              
Generation 3 - Current best internal CV score: 0.975
                                                                              
Generation 4 - Current best internal CV score: 0.975
                                                                              
Generation 5 - Current best internal CV score: 0.975
                                                                              
Generation 6 - Current best internal CV score: 0.975
                                                                              
Best pipeline: LogisticRegression(input_matrix, C=25.0, dual=False, penalty=l2)
244560
512




                                                                              
Generation 1 - Current best internal CV score: 0.975
                                                                              
Generation 2 - Current best internal CV score: 0.975
                                                                              
Generation 3 - Current best internal CV score: 0.975
                                                                              
Generation 4 - Current best internal CV score: 0.975
                                                                              
Generation 5 - Current best internal CV score: 0.975
                                                                              
Generation 6 - Current best internal CV score: 0.975
                                                                              
Generation 7 - Current best internal CV score: 0.975
                                                                            



                                                                               
Generation 1 - Current best internal CV score: 0.975
                                                                               
Generation 2 - Current best internal CV score: 0.975
                                                                               
Generation 3 - Current best internal CV score: 0.975
                                                                               
Generation 4 - Current best internal CV score: 0.975
                                                                               
Generation 5 - Current best internal CV score: 0.975
                                                                               
Generation 6 - Current best internal CV score: 0.975
                                                                               
Generation 7 - Current best internal CV score: 0.975
                                                                     

