# Comparing ML models

In [1]:
from mlflow_for_ml_dev.utils.utils import get_root_project

import mlflow 
from mlflow.models.signature import infer_signature

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

import pandas as pd
import os 


In [2]:
tracking_server = get_root_project() / 'traditional_ml_evaluation' / 'mlruns'

In [3]:
tracking_server

WindowsPath('C:/Users/manue/projects/mlflow_for_ml_dev/traditional_ml_evaluation/mlruns')

In [4]:
os.makedirs(tracking_server, exist_ok=True)
mlflow.set_tracking_uri(tracking_server.as_uri())

In [5]:
experiment_name = "comparing_models"
try:
    mlflow.create_experiment(name = experiment_name)
except Exception as e:
    print(e)

mlflow.set_experiment(experiment_name=experiment_name)
    

<Experiment: artifact_location='file:///C:/Users/manue/projects/mlflow_for_ml_dev/traditional_ml_evaluation/mlruns/850915442498252700', creation_time=1724018384487, experiment_id='850915442498252700', last_update_time=1724018384487, lifecycle_stage='active', name='comparing_models', tags={}>

## Data Generation

In [98]:
x, y = make_classification(n_samples=1000, n_features=5, n_classes=2, random_state=42)

x_df = pd.DataFrame(x, columns=[f"feature_{i+1}" for i in range(x.shape[1])])
y_df = pd.DataFrame(y, columns=["target"])
feature_names = x_df.columns
x_train, x_test, y_train, y_test = train_test_split(x_df, y_df, test_size=0.2, random_state=42)

model_signature = infer_signature(x_train, y_train)

print(x_train.head())
print(x_test.head())

     feature_1  feature_2  feature_3  feature_4  feature_5
29   -1.358062   0.388926  -2.219300   0.629189   0.502890
535  -1.692785   0.161613  -0.451659   0.630933   1.416180
695   3.291478  -0.557601  -0.580053  -1.342261  -2.159247
557  -2.076136   1.416809  -0.181224   1.351993  -1.239513
836  -1.348164   0.336725   0.038238   0.601181   0.619803
     feature_1  feature_2  feature_3  feature_4  feature_5
521   1.250932  -1.064291  -2.238231  -0.914547   1.261286
737  -0.196283   0.190820  -0.243384   0.154804  -0.256094
740   2.659138  -0.265773   1.072978  -0.996758  -2.195564
660   0.087778  -0.021011  -0.667780  -0.038709  -0.042586
411  -0.662457   0.741043  -0.358340   0.568499  -1.101298




## Training Baseling model

In [99]:
baseline_model = DecisionTreeClassifier(random_state=42)
baseline_model.fit(x_train, y_train)

baseline_predictions = baseline_model.predict(x_test)

# log baseline model
with mlflow.start_run(run_name="baseline") as baseline_run:
    
    mlflow.log_params(baseline_model.get_params())

    artifact_path = "baseline_model"
    mlflow.sklearn.log_model(baseline_model, artifact_path, signature=model_signature)




In [100]:
baseline_uri = f"runs:/{baseline_run.info.run_id}/{artifact_path}"

## Training Candidate Model

In [101]:
candidate = RandomForestClassifier(random_state=42)
candidate.fit(x_train, y_train)

candidate_predictions = candidate.predict(x_test)

# log candidate model
with mlflow.start_run(run_name="candidate") as candidate_run:
    
    mlflow.log_params(candidate.get_params())

    artifact_path = "candidate_model"
    mlflow.sklearn.log_model(candidate, artifact_path, signature=model_signature)

candidate_uri = f"runs:/{candidate_run.info.run_id}/{artifact_path}"

  return fit_method(estimator, *args, **kwargs)


### Wrapping models

In [102]:
def baseline_model_func(model_input):
    return baseline_model.predict(model_input)

def candidate_model_func(model_input):
    return candidate.predict(model_input)

### Comparing models

In [104]:
eval_data_for_model = x_test.copy()
eval_data_for_model["target"] = y_test

print(eval_data_for_model.head())
print(x_test.head())

     feature_1  feature_2  feature_3  feature_4  feature_5  target
521   1.250932  -1.064291  -2.238231  -0.914547   1.261286       1
737  -0.196283   0.190820  -0.243384   0.154804  -0.256094       1
740   2.659138  -0.265773   1.072978  -0.996758  -2.195564       1
660   0.087778  -0.021011  -0.667780  -0.038709  -0.042586       1
411  -0.662457   0.741043  -0.358340   0.568499  -1.101298       0
     feature_1  feature_2  feature_3  feature_4  feature_5
521   1.250932  -1.064291  -2.238231  -0.914547   1.261286
737  -0.196283   0.190820  -0.243384   0.154804  -0.256094
740   2.659138  -0.265773   1.072978  -0.996758  -2.195564
660   0.087778  -0.021011  -0.667780  -0.038709  -0.042586
411  -0.662457   0.741043  -0.358340   0.568499  -1.101298


### Creating Validation thresholds

In [105]:
from mlflow.models import MetricThreshold

f1_score_th = {
    "f1_score": MetricThreshold(
        threshold = 0.8,
        min_absolute_change=0.01,
        min_relative_change=0.01,
        greater_is_better=True,
    )
}

In [106]:
with mlflow.start_run(run_name="comparing models") as run:
    results = mlflow.evaluate(
        model = candidate_model_func,
        baseline_model = baseline_uri,
        data = eval_data_for_model,
        targets="target",
        model_type = "classifier",
        validation_thresholds=f1_score_th

    )

2024/08/18 18:56:46 INFO mlflow.models.evaluation.default_evaluator: Evaluating candidate model:
2024/08/18 18:56:46 INFO mlflow.models.evaluation.default_evaluator: Computing model predictions.
2024/08/18 18:56:46 INFO mlflow.models.evaluation.default_evaluator: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2024/08/18 18:56:46 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2024/08/18 18:56:46 INFO mlflow.models.evaluation.default_evaluator: Shap explainer ExactExplainer is used.
2024/08/18 18:56:56 INFO mlflow.models.evaluation.default_evaluator: Evaluating baseline model:
2024/08/18 18:56:56 INFO mlflow.models.evaluation.default_evaluator: Computing model predictions.
2024/08/18 18:56:56 INFO mlflow.models.evaluation.default_evaluator: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2024/08/18 18:56:56 INFO mlflow.models.evaluation.default_evaluator: Tes

### Adding Custom Metrics

In [107]:

from mlflow.metrics import make_metric
from sklearn.metrics import f1_score

def custom_accuracy(df, __builtin_metrics):
    targets = df["target"]
    predictions = df["prediction"]
    return sum(targets == predictions) / len(targets)

def custom_f1_score(df, __builtin_metrics):    
    targets = df["target"]
    predictions = df["prediction"]
    return f1_score(targets, predictions, average="weighted")

custom_metric_accuracy = make_metric(
    eval_fn = custom_accuracy,
    name = "custom_accuracy",
    greater_is_better=True,
)

custom_metric_f1_score = make_metric(
    eval_fn = custom_f1_score,
    name = "custom_f1_score",
    greater_is_better=True,
)

In [108]:
with mlflow.start_run(run_name="comparing models") as run:
    results = mlflow.evaluate(
        model = candidate_model_func,
        baseline_model = baseline_uri,
        data = eval_data_for_model,
        targets="target",
        model_type = "classifier",
        validation_thresholds=f1_score_th,
        extra_metrics=[custom_metric_accuracy, custom_metric_f1_score]
    )

2024/08/18 18:56:57 INFO mlflow.models.evaluation.default_evaluator: Evaluating candidate model:
2024/08/18 18:56:57 INFO mlflow.models.evaluation.default_evaluator: Computing model predictions.
2024/08/18 18:56:57 INFO mlflow.models.evaluation.default_evaluator: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2024/08/18 18:56:57 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2024/08/18 18:56:57 INFO mlflow.models.evaluation.default_evaluator: Shap explainer ExactExplainer is used.
2024/08/18 18:57:06 INFO mlflow.models.evaluation.default_evaluator: Evaluating baseline model:
2024/08/18 18:57:06 INFO mlflow.models.evaluation.default_evaluator: Computing model predictions.
2024/08/18 18:57:06 INFO mlflow.models.evaluation.default_evaluator: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2024/08/18 18:57:06 INFO mlflow.models.evaluation.default_evaluator: Tes