In [137]:
import submitit

import torch
import random
import numpy as np

import pickle
import itertools
import argparse
import logging
import os
import pathlib
import time
import json
import math
import matplotlib.pyplot as plt
from torch.utils import data
from fvcore.common.config import CfgNode

In [138]:
from multi_objective.main import main, get_config

In [139]:
executor = submitit.AutoExecutor(folder="tmp")

# Prepare the globals

In [140]:
adult_cfg = get_config('configs/adult.yaml')
methods = ['cosmos', 'mgda']

num_evaluations = 5
J = 2

min_angles = {
    'adult': 0.02,
    'compass': 0.0055,
}

epochs_tabular = 20
epochs_mnist = 50

In [141]:
# log transform for lr 
lr_range = (np.log(1e-4), np.log(1e-2))
lamda_range = (np.log(0.2), np.log(5))
alpha_range = (.2, 1.5)
scheduler_choice = ['None', 'MultiStep', 'Cosine']
mgda_norm_choice = ['none', 'l2', 'loss', 'loss+']
phn_solver = ['linear', 'epo']

Sample the hyperparameters

In [142]:
# sampling
np.random.seed(1)

learning_rates = np.exp(np.random.uniform(*lr_range ,[num_evaluations]))
lamdas = np.exp(np.random.uniform(*lamda_range ,[num_evaluations]))
alphas = np.random.uniform(*alpha_range ,[num_evaluations, J])
schedulers = np.random.choice(scheduler_choice, num_evaluations)
mgda_norms = np.random.choice(mgda_norm_choice, num_evaluations)
phn_solvers = np.random.choice(phn_solver, num_evaluations)
                     
print(f"Num evals: {len(learning_rates)}")

Num evals: 5


In [143]:
def convert_hp(array, arg, dtype):
    n = len(array)
    array = [[dtype(a_i) for a_i in a] if isinstance(a, np.ndarray) else dtype(a) for a in array]
    args = list(itertools.repeat(arg, n))
    return args, array

In [144]:
cosmos_lrs = convert_hp(learning_rates, 'cosmos.lr', float)
cosmos_scheds = convert_hp(schedulers, 'cosmos.lr_scheduler', str)
cosmos_lamdas = convert_hp(lamdas, 'cosmos.lamda', float)
cosmos_alphas = convert_hp(alphas, 'cosmos.alpha', float)

mgda_lrs = convert_hp(learning_rates, 'mgda.lr', float)
mgda_scheds = convert_hp(schedulers, 'mgda.lr_scheduler', str)
mgda_norms = convert_hp(mgda_norms, 'mgda.normalization_type', str)

pmtl_lrs = convert_hp(learning_rates, 'pmtl.lr', float)
pmtl_scheds = convert_hp(schedulers, 'pmtl.lr_scheduler', str)



In [145]:
def percent_finished(jobs):
    return sum(job.done() for job in jobs) / len(jobs)


def save_jobs(filename, jobs):
    with open(f'{filename}.pickle', 'wb') as f:
        pickle.dump(jobs, f)

        
def load_jobs(filename):
    with open(f'{filename}.pickle', 'rb') as f:
        return pickle.load(f)

# Tabular datasets

In [146]:
executor.update_parameters(timeout_min=20, slurm_partition="ml_gpu-rtx2080", name='hpo', gpus_per_node=1)
executor.update_parameters(slurm_array_parallelism=35)

## COSMOS

In [147]:
hpo_cosmos = zip(*cosmos_lrs, *cosmos_lamdas, *cosmos_alphas)
method = 'cosmos'

### Adult

In [148]:
cfg = adult_cfg.clone()
cfg.epochs = epochs_tabular
cfg.eval_every = epochs_tabular

**Runs the search**

In [150]:
assert not os.path.exists('cosmos_adult_hpo.pickle'), "Are you sure?"

cosmos_adult_jobs = []
with executor.batch():
    for args in hpo_cosmos:
        cfg = cfg.clone()
        cfg.merge_from_list(args)
        job = executor.submit(main, method, cfg, 'hpo')
        cosmos_adult_jobs.append(job)

In [151]:
print(percent_finished(cosmos_adult_jobs))

1.0


Save the results

In [129]:
assert not os.path.exists('cosmos_adult_hpo.pickle'), "Are you sure?"
save_jobs('cosmos_adult_hpo', cosmos_adult_jobs)

AssertionError: Are you sure?

Load the results

In [157]:
cosmos_adult_jobs = load_jobs('cosmos_adult_hpo')

In [162]:
def get_optimal_cfg(jobs, min_angle):
    idx_best = None
    hv_best = 0
    
    for i, job in enumerate(jobs):
        hv, angle = job.result()
        if angle > min_angle:
            if hv > hv_best:
                print(hv, angle)
                hv_best = hv
                idx_best = i
    assert idx_best is not None, "No optimal cfg found"
    print(f'Best job: {jobs[idx_best].job_id}')
    return jobs[idx_best].submission().args[1]

In [163]:
get_optimal_cfg(cosmos_adult_jobs, min_angles['adult'])

3.27720263842372 0.041743607173621355
3.303566993728911 0.053544836762180026
3.3087412346982084 0.0324059883178871
3.3171353978157 0.03985407698109267
Best job: 5483814_63


CfgNode({'dataset': 'adult', 'dim': (88,), 'objectives': ['BinaryCrossEntropyLoss', 'ddp'], 'task_ids': [], 'epochs': 20, 'num_workers': 4, 'checkpoint_every': 0, 'lr_scheduler': 'None', 'lr': 0.001, 'batch_size': 256, 'pmtl': CfgNode({'num_starts': 5, 'lr_scheduler': 'None', 'lr': 0.001}), 'mgda': CfgNode({'approximate_norm_solution': False, 'normalization_type': 'loss', 'lr_scheduler': 'None', 'lr': 0.001}), 'phn': CfgNode({'alpha': 0.2, 'internal_solver': 'linear', 'lr_scheduler': 'None', 'lr': 0.001}), 'single_task': CfgNode({'task_id': None, 'lr_scheduler': 'None', 'lr': 0.001}), 'cosmos': CfgNode({'alpha': [0.3984731373211122, 1.0083481012625402], 'lamda': 2.88333803931139, 'lr_scheduler': 'None', 'lr': 0.0006735324727887581}), 'seed': 1, 'logdir': 'results', 'n_partitions': 24, 'eval_every': 20, 'train_eval_every': 0, 'reference_point': [2, 2], 'device': 'cuda', 'metrics': None})

# Image datasets

In [None]:
executor.update_parameters(timeout_min=30, slurm_partition="ml_gpu-rtx2080", name='hpo', gpus_per_node=1)
executor.update_parameters(slurm_array_parallelism=30)

Save what we have so far

In [15]:

import time

done=False
while not done:
    with open('cosmos_adult_hpo_results.pickle', 'wb') as f:
        pickle.dump(jobs, f)
    
    done = sum(job.done() for job in jobs) == len(jobs)
    break
    
    time.sleep(5 * 60)

In [29]:
with open('cosmos_adult_hpo_results.pickle', 'rb') as f:
    jobs_l = pickle.load(f)

In [32]:
jobs_l[0].result()

(3.02712881565094, 0)

In [None]:






executor = submitit.AutoExecutor(folder="tmp")
executor.update_parameters(timeout_min=5, slurm_partition="testdlc_gpu-rtx2080")


# executor.update_parameters(timeout_min=1, slurm_partition="ml_gpu-rtx2080")

In [13]:
def add(a, b):
    return a + b

In [14]:
job = executor.submit(add, 5, 7)  # will compute add(5, 7)

In [15]:
print(job.job_id)  # ID of your job

5359691


In [16]:
output = job.result()

In [17]:
output

12