In [67]:
from pathlib import Path
from omegaconf import OmegaConf
import numpy as np
import pandas as pd
import mlflow
from sklearn.metrics import (
    roc_auc_score,
    roc_curve,
    precision_recall_curve,
    RocCurveDisplay,
    PrecisionRecallDisplay,
    precision_score,
    recall_score,
    accuracy_score
)

filepaths = OmegaConf.load("../configs/filepaths/base.yaml")
mlflow.set_tracking_uri(filepaths.tracking_uri)

In [68]:
top_models = pd.read_csv(Path(filepaths.artifacts) / "250209_top_models.csv", sep=',')
top_models.head()

Unnamed: 0,Start Time,Duration,Run ID,Name,Source Type,Source Name,User,Status,X_d_transform,batch_norm,...,epoch,train_loss,val/accuracy,val/binary_precision,val/binary_recall,val/f1,val/mcc,val/prc,val/roc,val_loss
0,2025-02-07 13:21:04,3.9h,ff90895a6070499b8fdcdc0982526885,dazzling-rat-180,LOCAL,/home/spn1560/.conda/envs/hiec/lib/python3.11/...,spn1560,FINISHED,,True,...,24,0.025103,0.809866,0.963687,0.631624,0.762649,0.653961,0.921851,0.912527,0.762649
1,2025-02-07 13:20:39,3.1h,8a155bc5673b4c6aa8667dd55755fe0b,zealous-eel-839,LOCAL,/home/spn1560/.conda/envs/hiec/lib/python3.11/...,spn1560,FINISHED,,True,...,24,0.002602,0.809679,0.972779,0.624411,0.76016,0.656536,0.959718,0.961152,0.76016
2,2025-02-07 13:20:39,3.1h,5ca6ba7b8aec4b81a366e9138b9c09b0,smiling-colt-432,LOCAL,/home/spn1560/.conda/envs/hiec/lib/python3.11/...,spn1560,FINISHED,,True,...,24,0.004807,0.797277,0.969662,0.600536,0.741214,0.635536,0.947565,0.949833,0.741214
3,2025-02-07 13:20:35,3.4h,2241478505f9497885d25b9a0dee3ffd,fortunate-panda-724,LOCAL,/home/spn1560/.conda/envs/hiec/lib/python3.11/...,spn1560,FINISHED,,True,...,24,0.027877,0.95056,0.979499,0.919235,0.948325,0.902712,0.986876,0.986808,0.948325
4,2025-02-07 13:20:33,3.4h,e638683b53f84d0ba1c8f32794413e0e,burly-goat-264,LOCAL,/home/spn1560/.conda/envs/hiec/lib/python3.11/...,spn1560,FINISHED,,True,...,24,0.044305,0.949125,0.977389,0.918329,0.946844,0.899774,0.984206,0.986065,0.946844


In [69]:
search_cols = [
    "data/neg_multiple",
    "data/split_strategy",
    "training/pos_multiplier",
    "model/name",
    "model/d_h_encoder",
    "model/encoder_depth",
    "model/radius",
    "model/vec_len",
]

top_models.loc[:, search_cols]

Unnamed: 0,data/neg_multiple,data/split_strategy,training/pos_multiplier,model/name,model/d_h_encoder,model/encoder_depth,model/radius,model/vec_len
0,3,rcmcs,3,bom,300,6.0,,
1,3,rcmcs,3,rc_agg,300,4.0,,
2,3,rcmcs,3,rc_cxn,300,6.0,,
3,3,homology,3,rc_cxn,300,6.0,,
4,3,homology,3,rc_agg,300,4.0,,
5,3,rcmcs,3,mfp,300,,2.0,2048.0
6,3,homology,3,mfp,300,,2.0,2048.0
7,3,homology,3,bom,300,6.0,,


In [70]:
runs = []
for i, row in top_models.iterrows():
    conditions = ["params.'data/split_idx' != '-1'"]
    for col in search_cols:
        val = row[col]

        if pd.isna(val):
            continue
        
        if isinstance(val, float) and val % 1 == 0:
            val = int(val)

        conditions.append(f"params.'{col}' = '{val}'")

    filter_string = " AND ".join(conditions)
    runs.append(mlflow.search_runs(filter_string=filter_string))

In [71]:
runs = pd.concat(runs)
runs.head()

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.val/binary_recall,metrics.val/binary_precision,metrics.val/roc,metrics.val/f1,...,params.model/model,params.model/pred_head,params.training/n_epochs,params.data/subdir_patt,tags.mlflow.source.type,tags.mlflow.source.name,tags.mlflow.runName,tags.mlflow.user,params.model/radius,params.model/vec_len
0,af1e9fbcef4546e689652a15ef6123ff,0,FINISHED,file:///projects/p30041/spn1560/hiec/results/r...,2025-02-03 10:48:14.384000+00:00,2025-02-04 08:33:09.719000+00:00,0.773198,0.963145,0.95855,0.857535,...,mpnn_dim_red,DotSig,25,sprhea_v3_folded_pt_ns/rcmcs/3fold,LOCAL,/home/spn1560/.conda/envs/hiec/lib/python3.11/...,grandiose-robin-923,spn1560,,
1,eb05684d7fd846eaa95654c7814abd40,0,FINISHED,file:///projects/p30041/spn1560/hiec/results/r...,2025-02-03 10:10:25.056000+00:00,2025-02-04 10:21:46.689000+00:00,0.668354,0.966711,0.946668,0.789959,...,mpnn_dim_red,DotSig,25,sprhea_v3_folded_pt_ns/rcmcs/3fold,LOCAL,/home/spn1560/.conda/envs/hiec/lib/python3.11/...,gaudy-mule-125,spn1560,,
2,e2c936866af944a1aaa3e73380d2c072,0,FINISHED,file:///projects/p30041/spn1560/hiec/results/r...,2025-02-03 09:29:31.200000+00:00,2025-02-04 13:07:34.342000+00:00,0.595301,0.957135,0.942648,0.733743,...,mpnn_dim_red,DotSig,25,sprhea_v3_folded_pt_ns/rcmcs/3fold,LOCAL,/home/spn1560/.conda/envs/hiec/lib/python3.11/...,mysterious-fawn-710,spn1560,,
0,db987cbcf4c6492b90fd2f1adab33b96,0,FINISHED,file:///projects/p30041/spn1560/hiec/results/r...,2025-02-01 02:06:29.565000+00:00,2025-02-01 13:56:03.526000+00:00,0.690844,0.970544,0.957237,0.806823,...,mpnn_dim_red,DotSig,25,sprhea_v3_folded_pt_ns/rcmcs/3fold,LOCAL,/home/spn1560/.conda/envs/hiec/lib/python3.11/...,silent-seal-423,spn1560,,
1,04b0957ef18f4ed69ce0f114702c8f00,0,FINISHED,file:///projects/p30041/spn1560/hiec/results/r...,2025-02-01 01:46:49.096000+00:00,2025-02-02 05:38:13.628000+00:00,0.680673,0.968373,0.948078,0.79907,...,mpnn_dim_red,DotSig,25,sprhea_v3_folded_pt_ns/rcmcs/3fold,LOCAL,/home/spn1560/.conda/envs/hiec/lib/python3.11/...,valuable-chimp-577,spn1560,,


In [None]:
runs

24

In [None]:
runs = mlflow.search_runs(
        filter_string=filter_string
    )

In [66]:
runs

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.val/binary_recall,metrics.val/binary_precision,metrics.val/roc,metrics.val/f1,...,params.max_lr,params.data/toc,params.model/model,params.model/pred_head,params.training/n_epochs,params.data/subdir_patt,tags.mlflow.source.type,tags.mlflow.source.name,tags.mlflow.runName,tags.mlflow.user
0,af1e9fbcef4546e689652a15ef6123ff,0,FINISHED,file:///projects/p30041/spn1560/hiec/results/r...,2025-02-03 10:48:14.384000+00:00,2025-02-04 08:33:09.719000+00:00,0.773198,0.963145,0.95855,0.857535,...,0.001,v3_folded_pt_ns,mpnn_dim_red,DotSig,25,sprhea_v3_folded_pt_ns/rcmcs/3fold,LOCAL,/home/spn1560/.conda/envs/hiec/lib/python3.11/...,grandiose-robin-923,spn1560
1,eb05684d7fd846eaa95654c7814abd40,0,FINISHED,file:///projects/p30041/spn1560/hiec/results/r...,2025-02-03 10:10:25.056000+00:00,2025-02-04 10:21:46.689000+00:00,0.668354,0.966711,0.946668,0.789959,...,0.001,v3_folded_pt_ns,mpnn_dim_red,DotSig,25,sprhea_v3_folded_pt_ns/rcmcs/3fold,LOCAL,/home/spn1560/.conda/envs/hiec/lib/python3.11/...,gaudy-mule-125,spn1560
2,e2c936866af944a1aaa3e73380d2c072,0,FINISHED,file:///projects/p30041/spn1560/hiec/results/r...,2025-02-03 09:29:31.200000+00:00,2025-02-04 13:07:34.342000+00:00,0.595301,0.957135,0.942648,0.733743,...,0.001,v3_folded_pt_ns,mpnn_dim_red,DotSig,25,sprhea_v3_folded_pt_ns/rcmcs/3fold,LOCAL,/home/spn1560/.conda/envs/hiec/lib/python3.11/...,mysterious-fawn-710,spn1560


In [31]:
mlflow.search_runs(
    filter_string="params.'model/encoder_depth' = 6.0"
)

MlflowException: Expected a quoted string value for parameter (e.g. 'my-value'). Got value 6.0