In [1]:
# MLflow Plotter Demo - visualize experiment results
from pybpr.plotter import MLflowPlotter
import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
# Initialize plotter with path to mlflow.db
plotter = MLflowPlotter(tracking_uri="mlflow.db")

In [3]:
# List all experiments
experiments = plotter.get_experiments()
print("Available experiments:")
experiments

Available experiments:


Unnamed: 0,experiment_id,name,artifact_location
0,2,movielens,/kfs2/projects/zazzle/pybpr/examples/mlruns/2
1,1,movielens_pipeline,/kfs2/projects/zazzle/pybpr/examples/mlruns/1
2,0,Default,/kfs2/projects/zazzle/pybpr/examples/mlruns/0


In [4]:
# Get runs for a specific experiment
exp_name = "movielens"
runs = plotter.get_runs(experiment_name=exp_name)
print(f"Runs in '{exp_name}':")
runs

Runs in 'movielens':


Unnamed: 0,run_id,run_name,status,start_time,model.n_latent,model.use_user_bias,model.use_item_bias,model.use_global_bias,model.dropout,model.activation,...,batch_size,eval_every,eval_user_size,early_stopping_patience,test_auc,test_auc_std,test_loss,train_auc,train_auc_std,train_loss
0,81642b1cf43a44fd9ca9b796ed20c374,ml-100k_indicator,FINISHED,1770668156911,15,True,True,True,0.0,,...,100.0,10.0,,1000.0,0.666797,0.208743,0.680158,0.696439,0.147874,0.671942
1,ac256ff8812b4644bcf73c9f0750aee2,ml-100k_indicator,FINISHED,1770667892912,15,True,True,True,0.0,,...,100.0,10.0,,1000.0,0.577491,0.17897,0.6781,0.576418,0.09619,0.671344
2,19b333aab6b54ab097bc60b81d4eb161,ml-100k_metadata,FINISHED,1770667383603,15,True,True,True,0.0,,...,100.0,10.0,,1000.0,0.600266,0.14291,0.673798,0.598103,0.090908,0.672973
3,005ae4d940f04d7b9493dfb22ca268ee,ml-100k_metadata,FINISHED,1770667038972,64,True,True,True,0.0,,...,500.0,10.0,,1000.0,0.549327,0.156271,0.6899,0.545891,0.105948,0.689677
4,df3edd9ab59f45199c1fecfafaa0128e,ml-100k_metadata,FAILED,1770666925260,64,True,True,True,0.0,,...,,,,,,,,,,


In [5]:
# Create summary table with final metrics
summary = plotter.summary_table(
    experiment_name=exp_name,
    metrics=["test_auc", "train_loss"],
    params=["n_latent", "lr"]
)
print("Run Summary (sorted by test_auc):")
summary

Run Summary (sorted by test_auc):


Unnamed: 0,run_name,status,test_auc,train_loss
0,ml-100k_indicator,FINISHED,0.666797,0.671942
2,ml-100k_metadata,FINISHED,0.600266,0.672973
1,ml-100k_indicator,FINISHED,0.577491,0.671344
3,ml-100k_metadata,FINISHED,0.549327,0.689677
4,ml-100k_metadata,FAILED,,


In [None]:

fig = plotter.plot_single_run(
    run_id='81642b1cf43a44fd9ca9b796ed20c374',
    figsize=(14, 5),
    std_width=2.0,
    show_std=True,
)
plt.show()

TypeError: MLflowPlotter.plot_single_run() got an unexpected keyword argument 'save_path'

In [None]:
# Compare all runs with train_loss and test_auc side by side
fig = plotter.plot_runs_comparison(
    experiment_name=exp_name,
    metrics=["train_loss", "test_auc"],
    figsize=(14, 5),
    std_width=1.0,
    show_std=True
)
plt.show()

In [None]:
# Compare runs with different std_width
fig = plotter.plot_runs_comparison(
    experiment_name=exp_name,
    metrics=["train_loss", "test_auc"],
    figsize=(14, 5),
    std_width=2.0,
    show_std=True
)
plt.show()

In [None]:
# Plot top 3 runs by test_auc with std bands
fig = plotter.plot_best_runs(
    experiment_name=exp_name,
    metric="test_auc",
    n_best=3,
    plot_metrics=["train_loss", "test_auc"],
    figsize=(14, 5),
    std_width=2.0,
    show_std=True
)
plt.show()

In [None]:
# Get metric history for custom analysis
if len(runs) > 0:
    run_id = runs.iloc[0]["run_id"]
    histories = plotter.get_run_metrics_history(
        run_id=run_id,
        metric_keys=["train_loss", "test_auc"]
    )
    print("Train Loss history:")
    print(histories["train_loss"].head())
    print("\nTest AUC history:")
    print(histories["test_auc"].head())

In [None]:
# Analyze grid search experiment results
grid_exp_name = "example4_grid_search"

try:
    grid_runs = plotter.get_runs(
        experiment_name=grid_exp_name
    )
    print(f"Grid search runs: {len(grid_runs)}")
    
    # Create summary table
    grid_summary = plotter.summary_table(
        experiment_name=grid_exp_name,
        metrics=["test_auc", "train_loss"],
        params=["n_latent", "lr", "loss_function"]
    )
    print("\nGrid search results:")
    display(grid_summary)
    
    # Plot comparison with std bands
    fig = plotter.plot_runs_comparison(
        experiment_name=grid_exp_name,
        metrics=["train_loss", "test_auc"],
        figsize=(14, 5),
        std_width=1.0,
        show_std=True
    )
    plt.show()
    
except ValueError as e:
    msg = f"Grid search experiment not found: {e}"
    print(msg)
    print("Run Example 4 from simple_pipeline_example.py")

In [None]:
# Create and save plot to file
fig = plotter.plot_runs_comparison(
    experiment_name=exp_name,
    metrics=["train_loss", "test_auc"],
    figsize=(14, 5),
    std_width=1.5,
    show_std=True
)

# Save to file
fig.savefig(
    "experiment_comparison.png",
    dpi=300,
    bbox_inches="tight"
)
print("Plot saved to experiment_comparison.png")
plt.show()