Code based on
    analyse_SR_results.ipynb
by 
    Fabrizio Frasca
in commit 
    bc07009fc949a32313991090a46e0f932689d7bc
which was commited
    May 15th 2021
in repository  
    twitter-research/cwn 

In [10]:
import sys
sys.path.append('./../')
import os
from definitions import ROOT_DIR
import matplotlib.pyplot as plt
import glob
import numpy as np

In [11]:
result_path = os.path.join(ROOT_DIR, 'exp', 'results', 'MOLHIV_tuning_molhiv_less_sparse')

In [12]:
def parse_results(path, args):
    with open(path, 'r') as handle:
        failure = None
        args_dict = None
        second_valid = False # we want the second line that says 'Valid' at the front
        for line in handle:
            if line.startswith('Valid'):
                if second_valid:
                    failure = line.strip().split(':                  ')[1]
                    failure = float(failure.split(' ± ')[0])
                else:
                    second_valid += 1
            elif line.startswith('Namespace'):
                args_dict = {}
                fields = line.lstrip('Namespace(').rstrip(')').strip().split(', ')
                for field in fields:
                    try:
                        key, value = field.split('=')
                        if key in args:
                            args_dict[key] = value
                    except ValueError:
                        pass
            else:
                pass
    return failure, args_dict

In [13]:
args = ['drop_rate', 
        'lr', 
        'lr_scheduler', 
        'lr_scheduler_decay_rate', 
        'omit_2cell_down',
        'num_layers',
        'readout',
        ]

In [14]:
path = os.path.join(result_path, 'MOLHIV-0', 'result.txt')
parse_results(path, args)

(0.8078673084460122,
 {'drop_rate': '0.1',
  'lr': '3e-05',
  'lr_scheduler': "'None'",
  'lr_scheduler_decay_rate': '0.5',
  'num_layers': '2',
  'omit_2cell_down': "'True'",
  'readout': "'mean'"})

In [15]:
results = list()
for i in range(300):
    try:
        path = os.path.join(result_path, f'MOLHIV-{i}', 'result.txt')
        final_val_acc, hypparam_settings = parse_results(path, args)
        results.append((i, final_val_acc, hypparam_settings))
    except FileNotFoundError:
        pass

In [16]:
results.sort(reverse=True, key=lambda r:r[1])

In [17]:
len(results)

300

In [18]:
results[:15]

[(290,
  0.8450329463060944,
  {'drop_rate': '0.5',
   'lr': '0.003',
   'lr_scheduler': "'None'",
   'lr_scheduler_decay_rate': '0.5',
   'num_layers': '3',
   'omit_2cell_down': "'True'",
   'readout': "'mean'"}),
 (206,
  0.8393010239075053,
  {'drop_rate': '0.5',
   'lr': '0.0001',
   'lr_scheduler': "'None'",
   'lr_scheduler_decay_rate': '0.9',
   'num_layers': '3',
   'omit_2cell_down': "'False'",
   'readout': "'mean'"}),
 (210,
  0.8379782725847541,
  {'drop_rate': '0.5',
   'lr': '0.0001',
   'lr_scheduler': "'ReduceLROnPlateau'",
   'lr_scheduler_decay_rate': '0.5',
   'num_layers': '3',
   'omit_2cell_down': "'True'",
   'readout': "'mean'"}),
 (267,
  0.833676268861454,
  {'drop_rate': '0.5',
   'lr': '0.001',
   'lr_scheduler': "'None'",
   'lr_scheduler_decay_rate': '0.9',
   'num_layers': '3',
   'omit_2cell_down': "'True'",
   'readout': "'sum'"}),
 (295,
  0.833481836664707,
  {'drop_rate': '0.5',
   'lr': '0.003',
   'lr_scheduler': "'None'",
   'lr_scheduler_decay_r