In [1]:
import mlflow
import pandas as pd
from pathlib import Path
from hydra import compose, initialize

In [2]:
with initialize(config_path="../configs/filepaths", version_base=None):
    fps = compose(config_name="base")

def get_best_epochs(run_id: str) -> Path:
    dir = Path(fps.tracking_uri.removeprefix("file:///")) / "0" / run_id / "checkpoints"
    ckpts = list(dir.glob('*.ckpt'))
    val_epochs = [
        (float(ckpt.stem.split('-')[4]), int(ckpt.stem.split('-')[2]))
        for ckpt in ckpts
    ]
    val_epochs = sorted(val_epochs, key=lambda x: x[0], reverse=True)
    if len(val_epochs) == 0:
        return -1
    return val_epochs[0][1]

In [3]:
mlflow.set_tracking_uri(fps.tracking_uri)

In [4]:
filter_split = "params.'data/split_idx' = '0'"

runs = mlflow.search_runs(
    filter_string=filter_split,
)

In [5]:
runs

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.val_loss,metrics.val/binary_precision,metrics.epoch,metrics.train_loss,...,params.data/split_idx,params.model/encoder_depth,params.data/toc,params.data/subdir_patt,params.model/name,params.model/vec_len,tags.mlflow.user,tags.mlflow.source.type,tags.mlflow.source.name,tags.mlflow.runName
0,7725706057394618bc1c2fad58614627,0,FINISHED,file:///projects/p30041/spn1560/hiec/results/r...,2025-12-14 01:23:51.702000+00:00,2025-12-14 04:35:54.902000+00:00,0.763859,0.738416,24.0,0.064874,...,0,5.0,v3_folded_pt_ns,sprhea_v3_folded_pt_ns/rcmcs/3fold,bom,,spn1560,LOCAL,/home/spn1560/hiec/scripts/train.py,judicious-moth-841
1,0917682b20b545a59ed98c2d423c56d1,0,FINISHED,file:///projects/p30041/spn1560/hiec/results/r...,2025-12-13 21:51:51.302000+00:00,2025-12-14 01:13:13.305000+00:00,0.681207,0.778439,24.0,0.028632,...,0,4.0,v3_folded_pt_ns,sprhea_v3_folded_pt_ns/rcmcs/3fold,rc_cxn,,spn1560,LOCAL,/home/spn1560/hiec/scripts/train.py,treasured-shark-730
2,e109a0a7d3564f99bb6688796405dbf1,0,FINISHED,file:///projects/p30041/spn1560/hiec/results/r...,2025-12-13 21:51:51.302000+00:00,2025-12-14 01:20:24.607000+00:00,0.658368,0.802022,24.0,0.018697,...,0,4.0,v3_folded_pt_ns,sprhea_v3_folded_pt_ns/rcmcs/3fold,rc_agg,,spn1560,LOCAL,/home/spn1560/hiec/scripts/train.py,fearless-dog-559
3,6bb1753ed0b14a6fb6eddb0d0b3679a5,0,FINISHED,file:///projects/p30041/spn1560/hiec/results/r...,2025-12-13 21:49:12.431000+00:00,2025-12-13 22:03:39.122000+00:00,0.269219,0.240004,24.0,0.80305,...,0,,v3_folded_pt_ns,sprhea_v3_folded_pt_ns/random_reaction_center_...,rxnfp,256.0,spn1560,LOCAL,/home/spn1560/hiec/scripts/train.py,lyrical-elk-168
4,e54a25eafc65463a95dee11b4a46e4cd,0,FINISHED,file:///projects/p30041/spn1560/hiec/results/r...,2025-12-13 21:49:12.431000+00:00,2025-12-13 22:07:59.406000+00:00,0.645765,0.714372,24.0,0.777149,...,0,,v3_folded_pt_ns,sprhea_v3_folded_pt_ns/rcmcs/3fold,rxnfp,256.0,spn1560,LOCAL,/home/spn1560/hiec/scripts/train.py,grandiose-steed-931
5,f0d279b0ffed4544bc23a81327e449d0,0,FINISHED,file:///projects/p30041/spn1560/hiec/results/r...,2025-12-13 21:49:12.431000+00:00,2025-12-13 22:03:20.109000+00:00,0.508219,0.473674,24.0,0.795655,...,0,,v3_folded_pt_ns,sprhea_v3_folded_pt_ns/random_reaction_alterna...,rxnfp,256.0,spn1560,LOCAL,/home/spn1560/hiec/scripts/train.py,efficient-cub-361


In [6]:
mlflow.artifacts.list_artifacts(run_id="6bb1753ed0b14a6fb6eddb0d0b3679a5")

[]

In [7]:

runs['best_epoch'] = runs['run_id'].apply(get_best_epochs)
runs[['params.model/name', 'params.data/split_strategy', 'params.data/negative_sampling', 'best_epoch', 'run_id',]]

Unnamed: 0,params.model/name,params.data/split_strategy,params.data/negative_sampling,best_epoch,run_id
0,bom,rcmcs,random,8,7725706057394618bc1c2fad58614627
1,rc_cxn,rcmcs,random,8,0917682b20b545a59ed98c2d423c56d1
2,rc_agg,rcmcs,random,11,e109a0a7d3564f99bb6688796405dbf1
3,rxnfp,random_reaction_center,alternate_reaction_center,23,6bb1753ed0b14a6fb6eddb0d0b3679a5
4,rxnfp,rcmcs,random,11,e54a25eafc65463a95dee11b4a46e4cd
5,rxnfp,random_reaction,alternate_reaction_center,8,f0d279b0ffed4544bc23a81327e449d0


In [None]:
filter_split = "params.'data/split_idx' = '-1'"

test_runs = mlflow.search_runs(
    filter_string=filter_split,
)

test_runs[['params.model/name', 'params.data/split_strategy', 'params.data/negative_sampling', 'metrics.val/roc', 'run_id']]

Unnamed: 0,params.model/name,params.data/split_strategy,params.data/negative_sampling,metrics.val/roc,run_id
0,rc_agg,rcmcs,random,0.951293,1dc77cd293f349c8be87d9184528b7b7
1,rc_cxn,rcmcs,random,0.939263,07c2d004e027456eae081e4ac84da733
2,rxnfp,rcmcs,random,0.89531,984013781db24690a081f9ce4cb5e703
3,rxnfp,random_reaction_center,alternate_reaction_center,0.654164,81ab29180cc646229ba100fa9e74dfc4
4,rxnfp,random_reaction,alternate_reaction_center,0.885321,ca1dd48a1eb642848449ce897d943a72


In [10]:
filter_split = "params.'data/split_idx' = '-2'"

prod_runs = mlflow.search_runs(
    filter_string=filter_split,
)

In [11]:
prod_runs[['params.model/name', 'params.data/split_strategy', 'params.data/negative_sampling', 'metrics.epoch', 'run_id',]]

Unnamed: 0,params.model/name,params.data/split_strategy,params.data/negative_sampling,metrics.epoch,run_id
0,rc_cxn,rcmcs,random,7.0,550bae7cedce4a20b5669db5c8fb5b98
1,rc_agg,rcmcs,random,10.0,45e615c9594445cb90fe4ae092b3b694
