In [1]:
import os
import json
import pandas as pd 

from IPython.display import HTML

In [3]:
root_dir = '/lustre/groups/aih/hyformer/results'

task = 'property_prediction'
benchmark = 'molecule_net'
splitter = 'scaffold'
dataset = 'bace'
model = 'hyformer'
objective = 'combined'

results_filename = 'test_loss_aggregated.json'


max_epochs = ['100']
lr_decays = ['true', 'false']
batch_sizes = ['16', '256', '512']
learning_rates = ['1e-5', '2e-5', '4e-5', '6e-5', '1e-4', '2e-4', '4e-4']
weight_decays = ['1e-2', '1e-1']
dropouts = ['0.0', '0.1', '0.2']


file_path_template = '{root_dir}/{task}/{benchmark}/{splitter}/{dataset}/{model}/{objective}/hpo/max_epochs_{max_epoch}/decay_lr_{lr_decay}/batch_size_{batch_size}/learning_rate_{learning_rate}/weight_decay_{weight_decay}/dropout_{dropout}/{results_filename}'


In [4]:

df = pd.DataFrame(
    columns=[
        'objective_value_mean',
        'objective_value_std',
        'max_epoch',
        'decay_lr',
        'batch_size',
        'learning_rate',
        'weight_decay',
        'dropout'
        ])

for max_epoch in max_epochs:
    for decay_lr in lr_decays:
        for batch_size in batch_sizes:
            for learning_rate in learning_rates:
                for weight_decay in weight_decays:
                    for dropout in dropouts:
                        
                        file_path = file_path_template.format(
                            root_dir=root_dir,
                            task=task,
                            benchmark=benchmark,
                            splitter=splitter,
                            dataset=dataset,
                            model=model,
                            objective=objective,
                            max_epoch=max_epoch,
                            lr_decay=decay_lr,
                            batch_size=batch_size,
                            learning_rate=learning_rate,
                            weight_decay=weight_decay,
                            dropout=dropout,
                            results_filename=results_filename
                        )

                        if os.path.exists(file_path):
                            with open(file_path, 'r') as f:
                                _loss_dict = json.load(f)
                            
                            hparams = {
                                'objective_value_mean': round(_loss_dict['mean'], 3) * 100,
                                'objective_value_std': round(_loss_dict['std'], 3) * 100,
                                'max_epoch': max_epoch,
                                'decay_lr': decay_lr,
                                'batch_size': batch_size,
                                'learning_rate': learning_rate,
                                'weight_decay': weight_decay,
                                'dropout': dropout
                            }
                            hparams = {k: [v] for k,v in hparams.items()}
                            df = pd.concat([df, pd.DataFrame(hparams)], ignore_index=True)
                            
                            
                            # pd.concat([_df, ], ignore_index=True)

                        else:
                            print(f'Experiment not found: {file_path}')
                        



  df = pd.concat([df, pd.DataFrame(hparams)], ignore_index=True)


In [5]:
HTML(df.sort_values(by='objective_value_mean', ascending=False).to_html())

Unnamed: 0,objective_value_mean,objective_value_std,max_epoch,decay_lr,batch_size,learning_rate,weight_decay,dropout
142,82.7,2.4,100,False,16,4e-05,0.1,0.1
139,81.2,2.3,100,False,16,4e-05,0.01,0.1
190,81.2,0.4,100,False,256,6e-05,0.1,0.1
77,81.1,1.2,100,True,256,0.0002,0.1,0.2
133,81.1,4.4,100,False,16,2e-05,0.01,0.1
74,81.1,1.2,100,True,256,0.0002,0.01,0.2
187,81.0,0.6,100,False,256,6e-05,0.01,0.1
143,81.0,2.4,100,False,16,4e-05,0.1,0.2
75,80.9,1.1,100,True,256,0.0002,0.1,0.0
72,80.9,1.1,100,True,256,0.0002,0.01,0.0
