In [34]:
import mlflow
import pandas as pd
from pathlib import Path
from hydra import compose, initialize

In [None]:
with initialize(config_path="../configs/filepaths", version_base=None):
    fps = compose(config_name="base")

def get_best_epochs(run_id: str) -> int:
    dir = Path(fps.tracking_uri.removeprefix("file:///")) / "0" / run_id / "checkpoints"
    ckpts = list(dir.glob('*.ckpt'))
    val_epochs = [
        (float(ckpt.stem.split('-')[4]), int(ckpt.stem.split('-')[2]))
        for ckpt in ckpts
    ]
    val_epochs = sorted(val_epochs, key=lambda x: x[0], reverse=True)
    if len(val_epochs) == 0:
        return -1
    return val_epochs[0][1] + 1  # epochs are 0-indexed in filenames

In [36]:
mlflow.set_tracking_uri(fps.tracking_uri)

In [37]:
filter_split = "params.'data/split_idx' = '0'"

runs = mlflow.search_runs(
    filter_string=filter_split,
)

In [38]:
runs

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.epoch,metrics.val/accuracy,metrics.val/binary_recall,metrics.val/roc,...,params.batch_norm,params.reduce_X_d,params.max_lr,params.X_d_transform,params.init_lr,params.model/radius,tags.mlflow.user,tags.mlflow.runName,tags.mlflow.source.type,tags.mlflow.source.name
0,cfb7fdbeb07a4967affe7f4485957236,0,RUNNING,file:///projects/p30041/spn1560/hiec/results/r...,2025-12-15 09:54:59.472000+00:00,NaT,20.0,0.912826,0.865118,0.937603,...,,,,,,,spn1560,bouncy-lark-884,LOCAL,/home/spn1560/hiec/scripts/train.py
1,8a016e73833f4a9f8e3127ab05a8f448,0,RUNNING,file:///projects/p30041/spn1560/hiec/results/r...,2025-12-15 04:27:55.540000+00:00,NaT,16.0,0.855373,0.796199,0.867821,...,,,,,,,spn1560,resilient-fish-941,LOCAL,/home/spn1560/hiec/scripts/train.py
2,e3738fe315f64c928c44fe4f8ac6de24,0,FINISHED,file:///projects/p30041/spn1560/hiec/results/r...,2025-12-15 04:27:55.540000+00:00,2025-12-15 12:29:26.686000+00:00,24.0,0.956274,0.825543,0.97245,...,True,LinDimRed(\n (linear_layer): Linear(in_featur...,0.001,,0.0001,,spn1560,unleashed-colt-871,LOCAL,/home/spn1560/hiec/scripts/train.py
3,ec68f688ce774c09a5811c38a38b1f8f,0,FINISHED,file:///projects/p30041/spn1560/hiec/results/r...,2025-12-15 04:27:35.721000+00:00,2025-12-15 09:40:41.798000+00:00,24.0,0.890778,0.900059,0.920642,...,,,,,,2.0,spn1560,dapper-calf-261,LOCAL,/home/spn1560/hiec/scripts/train.py
4,0c19be31db744a0e8a0c87905f46c81a,0,FINISHED,file:///projects/p30041/spn1560/hiec/results/r...,2025-12-15 04:17:09.981000+00:00,2025-12-15 11:10:55.009000+00:00,24.0,0.826837,0.804593,0.837735,...,,,,,,2.0,spn1560,big-croc-84,LOCAL,/home/spn1560/hiec/scripts/train.py
5,6c60b4f3d0d74c4a9c71ab58d8ca79df,0,FINISHED,file:///projects/p30041/spn1560/hiec/results/r...,2025-12-15 04:16:53.174000+00:00,2025-12-15 13:26:15.782000+00:00,24.0,0.911444,0.765037,0.910619,...,True,LinDimRed(\n (linear_layer): Linear(in_featur...,0.001,,0.0001,,spn1560,grandiose-hen-412,LOCAL,/home/spn1560/hiec/scripts/train.py
6,825c6f409237497991112112d2432f48,0,FINISHED,file:///projects/p30041/spn1560/hiec/results/r...,2025-12-15 03:54:04.160000+00:00,2025-12-15 11:15:43.168000+00:00,24.0,0.943353,0.550799,0.947453,...,True,LinDimRed(\n (linear_layer): Linear(in_featur...,0.001,,0.0001,,spn1560,rebellious-finch-358,LOCAL,/home/spn1560/hiec/scripts/train.py
7,d651d3368fc8427383ab68266359966f,0,RUNNING,file:///projects/p30041/spn1560/hiec/results/r...,2025-12-15 03:54:03.281000+00:00,NaT,17.0,0.859467,0.457157,0.784445,...,,,,,,,spn1560,calm-sponge-3,LOCAL,/home/spn1560/hiec/scripts/train.py
8,707754f2458b467d871b76e7cf93b8df,0,FINISHED,file:///projects/p30041/spn1560/hiec/results/r...,2025-12-15 03:53:38.219000+00:00,2025-12-15 10:33:58.676000+00:00,24.0,0.882118,0.493345,0.800193,...,,,,,,2.0,spn1560,industrious-loon-772,LOCAL,/home/spn1560/hiec/scripts/train.py
9,4b7d87b9ffba476e82cadf81c28dd3f8,0,FINISHED,file:///projects/p30041/spn1560/hiec/results/r...,2025-12-14 22:45:07.456000+00:00,2025-12-15 02:25:48.744000+00:00,24.0,0.879614,0.618912,0.877561,...,True,LinDimRed(\n (linear_layer): Linear(in_featur...,0.001,,0.0001,,spn1560,silent-croc-665,LOCAL,/home/spn1560/hiec/scripts/train.py


In [40]:

runs['best_epoch'] = runs['run_id'].apply(get_best_epochs)
runs[['params.model/name', 'params.data/split_strategy', 'params.data/negative_sampling', 'best_epoch', 'run_id',]]

Unnamed: 0,params.model/name,params.data/split_strategy,params.data/negative_sampling,best_epoch,run_id
0,drfp,random_reaction,alternate_reaction_center,21,cfb7fdbeb07a4967affe7f4485957236
1,drfp,random_reaction_center,alternate_reaction_center,9,8a016e73833f4a9f8e3127ab05a8f448
2,cgr,random_reaction,alternate_reaction_center,15,e3738fe315f64c928c44fe4f8ac6de24
3,mfp,random_reaction,alternate_reaction_center,24,ec68f688ce774c09a5811c38a38b1f8f
4,mfp,random_reaction_center,alternate_reaction_center,3,0c19be31db744a0e8a0c87905f46c81a
5,cgr,random_reaction_center,alternate_reaction_center,24,6c60b4f3d0d74c4a9c71ab58d8ca79df
6,cgr,rcmcs,random,12,825c6f409237497991112112d2432f48
7,drfp,rcmcs,random,15,d651d3368fc8427383ab68266359966f
8,mfp,rcmcs,random,12,707754f2458b467d871b76e7cf93b8df
9,bom,random_reaction,alternate_reaction_center,24,4b7d87b9ffba476e82cadf81c28dd3f8


In [41]:
filter_split = "params.'data/split_idx' = '-1'"

test_runs = mlflow.search_runs(
    filter_string=filter_split,
)

test_runs[['params.model/name', 'params.data/split_strategy', 'params.data/negative_sampling', 'metrics.val/roc', 'run_id']]

Unnamed: 0,params.model/name,params.data/split_strategy,params.data/negative_sampling,metrics.val/roc,run_id
0,cgr,random_reaction,alternate_reaction_center,0.985638,120f090f4b3e471a965979401b3271ae
1,cgr,random_reaction_center,alternate_reaction_center,0.807885,34cda1e3f64a4b18a98894ed0e86be86
2,cgr,rcmcs,random,0.922106,e627a08cec3c47e18b4fd9f1ade6b9fe
3,mfp,random_reaction,alternate_reaction_center,0.852761,7e836b6654ac4b9aba81fe692b059c5d
4,mfp,random_reaction_center,alternate_reaction_center,0.67967,ec2310e5872f47de8b9c81d1edce26b9
5,mfp,rcmcs,random,0.755406,04cc886dade64a68a84b151ddd92cef2
6,bom,random_reaction,alternate_reaction_center,0.941901,55c062d229394c4f82828814300c0f2b
7,rc_cxn,random_reaction,alternate_reaction_center,0.974725,56c0235555af4e4c98dd6293c824cc2f
8,rc_agg,random_reaction,alternate_reaction_center,0.985613,1d6c62f5f2d44b3fa82851639fb44ead
9,rc_agg,random_reaction_center,alternate_reaction_center,0.835046,30ed98a93bb149f6b7b6c08779e95915


In [42]:
filter_split = "params.'data/split_idx' = '-2'"

prod_runs = mlflow.search_runs(
    filter_string=filter_split,
)

In [43]:
prod_runs[['params.model/name', 'params.data/split_strategy', 'params.data/negative_sampling', 'metrics.epoch', 'run_id',]]

Unnamed: 0,params.model/name,params.data/split_strategy,params.data/negative_sampling,metrics.epoch,run_id
0,cgr,random_reaction_center,alternate_reaction_center,14.0,25964e2b2fcb4ee1aa6f5487866209bd
1,cgr,rcmcs,random,11.0,4c7872263bba4bd2952057c3146c7c9f
2,mfp,random_reaction_center,alternate_reaction_center,23.0,b736cf547f5b46a0b8eb0308bddcabbd
3,mfp,rcmcs,random,11.0,baa4374b85ca4deaa6df54cdc14617b7
4,rc_agg,random_reaction_center,alternate_reaction_center,5.0,9226ba678bcd42fbb95b44979deeabff
5,rc_cxn,random_reaction_center,alternate_reaction_center,11.0,99fb3dad48464fd8a2af365b5fd57621
6,bom,random_reaction_center,alternate_reaction_center,23.0,fafcf359d8864d1a8d012d65eb4f350d
7,rxnfp,rcmcs,random,11.0,2ae3adca4b534b6cb25872db6788bbad
8,rxnfp,random_reaction_center,alternate_reaction_center,23.0,b37763cf52bd4949acbc6c0b0423a054
9,rxnfp,random_reaction,alternate_reaction_center,8.0,e0058aa0fae9479385f6d75dd2aa19fa


In [45]:
for _, row in runs.iterrows():
    print(row['run_id'])

for _, row in test_runs.iterrows():
    print(row['run_id'])

cfb7fdbeb07a4967affe7f4485957236
8a016e73833f4a9f8e3127ab05a8f448
e3738fe315f64c928c44fe4f8ac6de24
ec68f688ce774c09a5811c38a38b1f8f
0c19be31db744a0e8a0c87905f46c81a
6c60b4f3d0d74c4a9c71ab58d8ca79df
825c6f409237497991112112d2432f48
d651d3368fc8427383ab68266359966f
707754f2458b467d871b76e7cf93b8df
4b7d87b9ffba476e82cadf81c28dd3f8
ac3565b3fe984552922766a57944a54a
103f73d69ff640c99bb8645a2e9146c6
9f917717289f4e9a89564564a38361f3
28abcb5c5573447fa20f212703d5d597
8cff9872dc0441e5a7d1561d75e41149
7725706057394618bc1c2fad58614627
0917682b20b545a59ed98c2d423c56d1
e109a0a7d3564f99bb6688796405dbf1
6bb1753ed0b14a6fb6eddb0d0b3679a5
e54a25eafc65463a95dee11b4a46e4cd
f0d279b0ffed4544bc23a81327e449d0
120f090f4b3e471a965979401b3271ae
34cda1e3f64a4b18a98894ed0e86be86
e627a08cec3c47e18b4fd9f1ade6b9fe
7e836b6654ac4b9aba81fe692b059c5d
ec2310e5872f47de8b9c81d1edce26b9
04cc886dade64a68a84b151ddd92cef2
55c062d229394c4f82828814300c0f2b
56c0235555af4e4c98dd6293c824cc2f
1d6c62f5f2d44b3fa82851639fb44ead
30ed98a93b