# Install dependencies

# MLflow on ProKube Platform - Quick Start Guide

This notebook demonstrates how to use MLflow in the ProKube platform with Personal Access Token (PAT) authentication.

## Prerequisites

- Generate your Personal Access Token:
  - Open [MLflow UI](/mlflow) in your browser
  - Navigate to the [Permissions](/mlflow/oidc/ui/#) page
  - Click on "Create access key" button
  - Copy the generated token and store it securely
  - Note: You won't be able to see the token again!

- Configure the credentials below with your values

## Authentication

The ProKube MLflow setup uses OIDC authentication with PAT support for programmatic access.


In [None]:
import os

# Set your MLflow credentials
os.environ['MLFLOW_TRACKING_URI'] = 'https://<your-domain>/mlflow'
os.environ['MLFLOW_TRACKING_USERNAME'] = '<your-email>'  
os.environ['MLFLOW_TRACKING_PASSWORD'] = '<your-token>'

In [None]:
# Validate MLflow configuration
import os

REQUIRED_ENV_VARS = ["MLFLOW_TRACKING_URI", "MLFLOW_TRACKING_USERNAME", "MLFLOW_TRACKING_PASSWORD"]
missing_vars = [var for var in REQUIRED_ENV_VARS if not os.environ.get(var)]
placeholder_vars = [
    var for var in REQUIRED_ENV_VARS 
    if os.environ.get(var, "").startswith(("https://<", "<your-"))
]

if missing_vars or placeholder_vars:
    warning_msg = """
╔══════════════════════════════════════════════════════════════════════════════╗
║                                                                              ║
║       WARNING: MLflow environment variables not properly configured!         ║
║                                                                              ║
║   Without proper configuration, MLflow will use LOCAL storage instead of     ║
║   the remote MLflow tracking server. Your experiments will NOT be saved      ║
║   to the central MLflow instance!                                            ║
║                                                                              ║
╚══════════════════════════════════════════════════════════════════════════════╝
"""
    print(warning_msg)
    
    if missing_vars:
        print(f"Missing environment variables: {', '.join(missing_vars)}")
    if placeholder_vars:
        print(f"Variables still set to placeholder values: {', '.join(placeholder_vars)}")
    
    print("\nPlease update the cell above with your actual MLflow credentials:")
    print("  - MLFLOW_TRACKING_URI: Your MLflow server URL (e.g., https://your-domain.com/mlflow)")
    print("  - MLFLOW_TRACKING_USERNAME: Your email address")
    print("  - MLFLOW_TRACKING_PASSWORD: Your personal access token")
    
    raise EnvironmentError("MLflow environment variables not properly configured. See message above.")

print("✓ MLflow environment variables configured correctly")

In [None]:
import mlflow
from mlflow.models import infer_signature

import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


# Each user should use their own exeriment and model name to avoid conflicts
username = os.getenv('MLFLOW_TRACKING_USERNAME').split('@')[0]

# Set experiment - multiple users can use the same experiment name
mlflow.set_experiment(f"MLflow Quickstart {username}")

# Load the Iris dataset
X, y = datasets.load_iris(return_X_y=True)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Define the model hyperparameters
params = {
    "solver": "lbfgs",
    "max_iter": 1000,
    "multi_class": "auto",
    "random_state": 8888,
}

# Train the model
lr = LogisticRegression(**params)
lr.fit(X_train, y_train)

# Predict on the test set
y_pred = lr.predict(X_test)

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)


# Start an MLflow run
with mlflow.start_run():
    # Log the hyperparameters
    mlflow.log_params(params)

    # Log the loss metric
    mlflow.log_metric("accuracy", accuracy)

    # Set a tag that we can use to remind ourselves what this run was for
    mlflow.set_tag("Training Info", "Basic LR model for iris data")
    
    # Infer the model signature
    signature = infer_signature(X_train, lr.predict(X_train))

    # Log the model
    model_info = mlflow.sklearn.log_model(
        sk_model=lr,
        artifact_path="iris_model",
        signature=signature,
        input_example=X_train,
        registered_model_name=f"tracking-quickstart-{username}",
    )

After a successful run, you should see the direct link to your experiments and run above this line