In [None]:
from datetime import datetime
from os import getenv, putenv
import os

MLFLOW_TRACKING_URI = getenv('MLFLOW_TRACKING_URI', 'http://mlflow-server:80')
MLFLOW_EXPERIMENT = getenv('MLFLOW_EXPERIMENT_NAME', 'test')

os.environ['AWS_ACCESS_KEY_ID'] = 's3-access-key'
os.environ['AWS_SECRET_ACCESS_KEY'] = 's3-secret-key'
os.environ['MLFLOW_S3_ENDPOINT_URL'] = 'http://s3.openshift-storage.svc.cluster.local/'

timestamp = datetime.now().strftime('%y%m%d%H%M%S')

In [None]:
import mlflow
import pandas as pd
from sklearn.metrics import brier_score_loss, roc_auc_score
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression

In [None]:
def get_labels():
    labels = load_breast_cancer().target
    return labels


def get_training_samples():
    training_set = load_breast_cancer()
    training_samples = training_set.data
    feature_names = training_set.feature_names
    return training_samples, feature_names

In [None]:
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
experiment_id = '1' #mlflow.get_experiment('1')

client = mlflow.tracking.MlflowClient()
try:
    mlflow.create_experiment(name = MLFLOW_EXPERIMENT)
except:
    print('Experiment exists.')
    experiment = client.get_experiment(experiment_id)

run_id = f'vrp-{MLFLOW_EXPERIMENT}-{timestamp}'

active_run = mlflow.start_run(run_name = run_id,
                              experiment_id = experiment_id)

print(active_run.info.experiment_id)
print(mlflow.get_experiment(active_run.info.experiment_id).name)
print(mlflow.get_experiment(active_run.info.experiment_id).artifact_location)
print(active_run.info.lifecycle_stage)
print(active_run.info.status)
print(active_run.info.user_id)
print(active_run.info.run_id)
print(mlflow.tracking.get_tracking_uri())

In [None]:
y = pd.Series(get_labels(),name="Target")
training_samples, feature_names = get_training_samples()
X = pd.DataFrame(training_samples,columns=feature_names)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.30, random_state=42
)
del (X,y)

pipe = Pipeline([
    ('scaler', StandardScaler()), 
    ('estimator', LogisticRegression(
        penalty="l2", C=10.0, random_state=0, solver="liblinear"
    ))
])

pipe.fit(X_train,y_train)

In [None]:
brier = brier_score_loss(y_test, pipe.predict_proba(X_test)[:, 1])
roc = roc_auc_score(y_test, pipe.predict_proba(X_test)[:, 1])

mlflow.log_param("C", "10")
mlflow.log_param("Penalty", "l2")
mlflow.log_metric("brier", brier)
mlflow.log_metric("roc", roc)

mlflow.sklearn.log_model(
    pipe, "my_model",
    registered_model_name="sk-learn-random-forest-reg-model",
)

mlflow.end_run()