In [2]:
import mlflow
import pandas as pd
from pathlib import Path
from hydra import compose, initialize

In [3]:
with initialize(config_path="../configs/filepaths", version_base=None):
    fps = compose(config_name="base")

def get_best_epoch(run_id: str) -> int:
    dir = Path(fps.tracking_uri.removeprefix("file:///")) / "0" / run_id / "checkpoints"
    ckpts = list(dir.glob('*.ckpt'))
    val_epochs = [
        (float(ckpt.stem.split('-')[4]), int(ckpt.stem.split('-')[2]))
        for ckpt in ckpts
    ]
    val_epochs = sorted(val_epochs, key=lambda x: x[0], reverse=True)
    if len(val_epochs) == 0:
        return -1
    return val_epochs[0][1] + 1  # epochs are 0-indexed in filenames

In [4]:
mlflow.set_tracking_uri(fps.tracking_uri)

In [5]:
filter_split = "params.'data/split_idx' = '0'"

runs = mlflow.search_runs(
    filter_string=filter_split,
)

In [6]:

runs['best_epoch'] = runs['run_id'].apply(get_best_epoch)
runs[['params.model/name', 'params.data/split_strategy', 'params.data/negative_sampling', 'best_epoch', 'run_id',]].sort_values(
    by=['params.model/name', 'params.data/split_strategy', 'params.data/negative_sampling']
)

Unnamed: 0,params.model/name,params.data/split_strategy,params.data/negative_sampling,best_epoch,run_id
9,bom,random_reaction,alternate_reaction_center,24,4b7d87b9ffba476e82cadf81c28dd3f8
14,bom,random_reaction_center,alternate_reaction_center,24,8cff9872dc0441e5a7d1561d75e41149
15,bom,rcmcs,random,9,7725706057394618bc1c2fad58614627
2,cgr,random_reaction,alternate_reaction_center,15,e3738fe315f64c928c44fe4f8ac6de24
5,cgr,random_reaction_center,alternate_reaction_center,24,6c60b4f3d0d74c4a9c71ab58d8ca79df
6,cgr,rcmcs,random,12,825c6f409237497991112112d2432f48
0,drfp,random_reaction,alternate_reaction_center,21,cfb7fdbeb07a4967affe7f4485957236
1,drfp,random_reaction_center,alternate_reaction_center,9,8a016e73833f4a9f8e3127ab05a8f448
7,drfp,rcmcs,random,15,d651d3368fc8427383ab68266359966f
3,mfp,random_reaction,alternate_reaction_center,24,ec68f688ce774c09a5811c38a38b1f8f


In [13]:
filter_split = "params.'data/split_idx' = '-1'"

test_runs = mlflow.search_runs(
    filter_string=filter_split,
)

test_runs[['params.model/name', 'params.data/split_strategy', 'params.data/negative_sampling', 'metrics.val/roc', 'run_id']].sort_values(
    by=['params.data/split_strategy', 'params.data/negative_sampling', 'params.model/name', ]
)

Unnamed: 0,params.model/name,params.data/split_strategy,params.data/negative_sampling,metrics.val/roc,run_id
12,bom,random_reaction,alternate_reaction_center,0.941901,55c062d229394c4f82828814300c0f2b
6,cgr,random_reaction,alternate_reaction_center,0.9825,120f090f4b3e471a965979401b3271ae
3,drfp,random_reaction,alternate_reaction_center,0.90857,c4fdab61494a4affb7b48b3f18b0a314
9,mfp,random_reaction,alternate_reaction_center,0.852761,7e836b6654ac4b9aba81fe692b059c5d
14,rc_agg,random_reaction,alternate_reaction_center,0.985613,1d6c62f5f2d44b3fa82851639fb44ead
13,rc_cxn,random_reaction,alternate_reaction_center,0.974725,56c0235555af4e4c98dd6293c824cc2f
0,rxnfp,random_reaction,alternate_reaction_center,0.8836,000fc5f378954a27bf934ccd58241a57
16,bom,random_reaction_center,alternate_reaction_center,0.729101,35d782fa639e4035b08d5c74a4f002f9
7,cgr,random_reaction_center,alternate_reaction_center,0.807885,34cda1e3f64a4b18a98894ed0e86be86
4,drfp,random_reaction_center,alternate_reaction_center,0.733257,a91ff6acc5a848f18a179c42f5127a85


In [8]:
filter_split = "params.'data/split_idx' = '-2'"

prod_runs = mlflow.search_runs(
    filter_string=filter_split,
)

In [9]:
prod_runs[['params.model/name', 'params.data/split_strategy', 'params.data/negative_sampling', 'metrics.epoch', 'run_id']].sort_values(
    by=['params.model/name', 'params.data/split_strategy', 'params.data/negative_sampling']
)

Unnamed: 0,params.model/name,params.data/split_strategy,params.data/negative_sampling,metrics.epoch,run_id
9,bom,random_reaction_center,alternate_reaction_center,23.0,fafcf359d8864d1a8d012d65eb4f350d
13,bom,rcmcs,random,8.0,2fcb812a06c841d282d378d9f99e64c9
3,cgr,random_reaction_center,alternate_reaction_center,14.0,25964e2b2fcb4ee1aa6f5487866209bd
4,cgr,rcmcs,random,11.0,4c7872263bba4bd2952057c3146c7c9f
0,drfp,random_reaction,alternate_reaction_center,15.0,e981b96ef2a34a2382e727c4c3f9999e
1,drfp,random_reaction_center,alternate_reaction_center,8.0,636aa60f113244c5af41fa0573b6e814
2,drfp,rcmcs,random,14.0,cef83964c0cb429aa6724da42fd4cb7b
5,mfp,random_reaction_center,alternate_reaction_center,23.0,b736cf547f5b46a0b8eb0308bddcabbd
6,mfp,rcmcs,random,11.0,baa4374b85ca4deaa6df54cdc14617b7
7,rc_agg,random_reaction_center,alternate_reaction_center,5.0,9226ba678bcd42fbb95b44979deeabff


In [10]:
for _, row in runs.iterrows():
    print(row['run_id'])

for _, row in test_runs.iterrows():
    print(row['run_id'])

cfb7fdbeb07a4967affe7f4485957236
8a016e73833f4a9f8e3127ab05a8f448
e3738fe315f64c928c44fe4f8ac6de24
ec68f688ce774c09a5811c38a38b1f8f
0c19be31db744a0e8a0c87905f46c81a
6c60b4f3d0d74c4a9c71ab58d8ca79df
825c6f409237497991112112d2432f48
d651d3368fc8427383ab68266359966f
707754f2458b467d871b76e7cf93b8df
4b7d87b9ffba476e82cadf81c28dd3f8
ac3565b3fe984552922766a57944a54a
103f73d69ff640c99bb8645a2e9146c6
9f917717289f4e9a89564564a38361f3
28abcb5c5573447fa20f212703d5d597
8cff9872dc0441e5a7d1561d75e41149
7725706057394618bc1c2fad58614627
0917682b20b545a59ed98c2d423c56d1
e109a0a7d3564f99bb6688796405dbf1
6bb1753ed0b14a6fb6eddb0d0b3679a5
e54a25eafc65463a95dee11b4a46e4cd
f0d279b0ffed4544bc23a81327e449d0
000fc5f378954a27bf934ccd58241a57
bafce811361c4d19afe7919fb4666fb2
f1cf506a0c0f48b49bbf491f51db24f7
c4fdab61494a4affb7b48b3f18b0a314
a91ff6acc5a848f18a179c42f5127a85
36a93d8131bd460599ef98740f8d322d
120f090f4b3e471a965979401b3271ae
34cda1e3f64a4b18a98894ed0e86be86
e627a08cec3c47e18b4fd9f1ade6b9fe
7e836b6654

In [11]:
join_cols = ['params.model/name', 'params.data/split_strategy', 'params.data/negative_sampling']
in_out = pd.merge(runs, test_runs, how='inner', on=join_cols, suffixes=('_in', '_out'))
in_out.head()

Unnamed: 0,run_id_in,experiment_id_in,status_in,artifact_uri_in,start_time_in,end_time_in,metrics.val/f1_in,metrics.val/mcc_in,metrics.train_loss_in,metrics.val/binary_recall_in,...,params.max_lr_out,params.metrics_out,params.init_lr_out,params.warmup_epochs_out,params.reduce_X_d_out,params.model/radius_out,tags.mlflow.source.type_out,tags.mlflow.runName_out,tags.mlflow.user_out,tags.mlflow.source.name_out
0,cfb7fdbeb07a4967affe7f4485957236,0,RUNNING,file:///projects/p30041/spn1560/hiec/results/r...,2025-12-15 09:54:59.472000+00:00,NaT,0.68174,0.654328,1.272398,0.865118,...,,,,,,,LOCAL,rumbling-koi-381,spn1560,/home/spn1560/hiec/scripts/train.py
1,8a016e73833f4a9f8e3127ab05a8f448,0,RUNNING,file:///projects/p30041/spn1560/hiec/results/r...,2025-12-15 04:27:55.540000+00:00,NaT,0.497429,0.472496,1.273475,0.796199,...,,,,,,,LOCAL,delicate-bird-607,spn1560,/home/spn1560/hiec/scripts/train.py
2,e3738fe315f64c928c44fe4f8ac6de24,0,FINISHED,file:///projects/p30041/spn1560/hiec/results/r...,2025-12-15 04:27:55.540000+00:00,2025-12-15 12:29:26.686000+00:00,0.802645,0.779249,0.039858,0.825543,...,0.001,"[BinaryF1Metric(task_weights=[[1.0]]), BinaryP...",0.0001,2.0,LinDimRed(\n (linear_layer): Linear(in_featur...,,LOCAL,incongruous-ram-908,spn1560,/home/spn1560/hiec/scripts/train.py
3,ec68f688ce774c09a5811c38a38b1f8f,0,FINISHED,file:///projects/p30041/spn1560/hiec/results/r...,2025-12-15 04:27:35.721000+00:00,2025-12-15 09:40:41.798000+00:00,0.640662,0.618722,1.295166,0.900059,...,,,,,,2.0,LOCAL,wistful-horse-72,spn1560,/home/spn1560/hiec/scripts/train.py
4,0c19be31db744a0e8a0c87905f46c81a,0,FINISHED,file:///projects/p30041/spn1560/hiec/results/r...,2025-12-15 04:17:09.981000+00:00,2025-12-15 11:10:55.009000+00:00,0.455349,0.432504,1.28515,0.804593,...,,,,,,2.0,LOCAL,welcoming-shad-999,spn1560,/home/spn1560/hiec/scripts/train.py


In [12]:
for _, row in in_out.iterrows():
    print(f"{row['run_id_in']}: {row['run_id_out']}")

cfb7fdbeb07a4967affe7f4485957236: c4fdab61494a4affb7b48b3f18b0a314
8a016e73833f4a9f8e3127ab05a8f448: a91ff6acc5a848f18a179c42f5127a85
e3738fe315f64c928c44fe4f8ac6de24: 120f090f4b3e471a965979401b3271ae
ec68f688ce774c09a5811c38a38b1f8f: 7e836b6654ac4b9aba81fe692b059c5d
0c19be31db744a0e8a0c87905f46c81a: ec2310e5872f47de8b9c81d1edce26b9
6c60b4f3d0d74c4a9c71ab58d8ca79df: 34cda1e3f64a4b18a98894ed0e86be86
825c6f409237497991112112d2432f48: e627a08cec3c47e18b4fd9f1ade6b9fe
d651d3368fc8427383ab68266359966f: 36a93d8131bd460599ef98740f8d322d
707754f2458b467d871b76e7cf93b8df: 04cc886dade64a68a84b151ddd92cef2
4b7d87b9ffba476e82cadf81c28dd3f8: 55c062d229394c4f82828814300c0f2b
ac3565b3fe984552922766a57944a54a: 56c0235555af4e4c98dd6293c824cc2f
103f73d69ff640c99bb8645a2e9146c6: 30ed98a93bb149f6b7b6c08779e95915
9f917717289f4e9a89564564a38361f3: 874ef317e1b7403c9be3665c3c301d90
28abcb5c5573447fa20f212703d5d597: 1d6c62f5f2d44b3fa82851639fb44ead
8cff9872dc0441e5a7d1561d75e41149: 35d782fa639e4035b08d5c74a4f0