# Generate Hyperparameters for CVPR 2024 Sweeps

Sweep files are generated by the runtime based algorithm in this notebook.
`coef_calc` takes proposed distributions and reweights the variance by according to the number of runs that will likely complete during training.
`gen_sweep_config` creates a weights and biases sweep yaml file.

In [30]:
import numpy as np

## Setup Calculations

In [31]:
size = 1000    # Number of samples
from collections import namedtuple

dist_info = namedtuple('dist_info', ['mean', 'med', 'std', 'p2_5', 'p97_5', 'unique_95', 'values_95'])

def sample_quantized(dist, quantization_step):
    # Generate and quantize the normal distribution
    quantized_dist = np.round(dist / quantization_step) * quantization_step

    # Calculate the 2.5th and 97.5th percentiles to define the inner 95% range
    lower_bound, upper_bound = np.percentile(quantized_dist, [2.5, 97.5])

    # Filter values within this range
    inner_95_percent = quantized_dist[(quantized_dist >= lower_bound) & (quantized_dist <= upper_bound)]

    # Count unique values
    unique = np.sort(np.unique(inner_95_percent))
    unique_count = len(unique)
    
    return dist_info(mean=np.mean(inner_95_percent), med=np.median(inner_95_percent), std=np.std(inner_95_percent), p2_5=np.min(unique), p97_5=np.max(unique), unique_95=unique_count, values_95=np.sort(inner_95_percent))

sample_quantized(np.random.normal(0, 0.1, size), 0.1)

dist_info(mean=0.0025201612903225797, med=0.0, std=0.09981686656866609, p2_5=-0.2, p97_5=0.2, unique_95=5, values_95=array([-0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2,
       -0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2,
       -0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2,
       -0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2,
       -0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2,
       -0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.1, -0.1, -0.1, -0.1, -0.1,
       -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1,
       -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1,
       -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1,
       -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1,
       -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1,
       -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1,
       

In [32]:
import itertools
rows = []
for phase_multiplier, phase_relative_lr_multiplier in itertools.product([.5, 1.0, 2.0], [.8, 1.0, 1.2]):
    effective_lr = (1 / float(phase_multiplier)) * float(
        phase_relative_lr_multiplier
    )
    rows.append((phase_multiplier, phase_relative_lr_multiplier, effective_lr))

import pandas as pd
pd.DataFrame(rows, columns=['phase_multiplier', 'phase_relative_lr_multiplier', 'effective_lr'])

Unnamed: 0,phase_multiplier,phase_relative_lr_multiplier,effective_lr
0,0.5,0.8,1.6
1,0.5,1.0,2.0
2,0.5,1.2,2.4
3,1.0,0.8,0.8
4,1.0,1.0,1.0
5,1.0,1.2,1.2
6,2.0,0.8,0.4
7,2.0,1.0,0.5
8,2.0,1.2,0.6


In [33]:
import pandas as pd
pd.options.display.float_format = '{:.3g}'.format

In [34]:
normal = np.random.normal
log_normal = np.random.lognormal
uniform = np.random.uniform
log_uniform_values = lambda a, b, size: np.exp(np.random.uniform(np.log(a), np.log(b), size))

map_of_distributions = {
    'normal': normal,
    'log_normal': log_normal,
    'uniform': uniform,
    'log_uniform_values': log_uniform_values
}

def smart_sample(row, multiplier):
    if row['dist'] == 'values':
        v = np.array(row['parameters'])
        return dist_info(mean=np.mean(v), med=np.median(v), std=np.std(v), p2_5=np.min(v), p97_5=np.max(v), unique_95=len(v), values_95=v)
    else:
        distribution = map_of_distributions[row['dist']]
        return sample_quantized(distribution(row['parameters'][0], row['parameters'][1]*multiplier, 1000), row['q_step'])

def coef_calc(target, hps):
    parameters = pd.DataFrame([list(h) + [None] if len(h) == 3 else h for h in hps], columns=['name', 'dist', 'parameters', 'q_step']).set_index('name')

    num_unique = 0
    multiplier = 1.0
    while num_unique < target:
        # proto_coefs['std_dev'] = proto_coefs['std_dev'] * 1.1
        cols = ['mean', 'med', 'std', 'p2_5', 'p97_5', 'unique_95', 'values_95']
        calc_df = pd.DataFrame(columns=cols)
        applied_params = parameters.apply(lambda row: smart_sample(row, multiplier), axis=1, result_type='expand')
        calc_df[cols] = applied_params
        calc_df['dist'] = parameters['dist']
        calc_df['q'] = parameters['q_step']
        calc_df['parameters'] = parameters['parameters']

        multiplier = multiplier * 1.1

        num_unique = calc_df['unique_95'].prod()

    print(f'Number of unique configurations: {num_unique} at multiplier {multiplier/1.1}')
    return calc_df

def find_target_runs(expected_runtime_min):
    gpu_hours = 20 * 5
    gpu_min = gpu_hours * 60
    target_runs = int(gpu_min / expected_runtime_min)

    print(f'Target runs: {target_runs}')

    return target_runs//2

## ProtoPNet

In [35]:
protopnet_parameters = [
    ("num_warm_epochs_at_lr1", "log_normal", (1.6, .4), 2),
    ("joint_steps_per_phase_at_lr1", 'log_normal', (2.3, .3), 3),
    ("last_only_steps_per_joint_step", 'uniform', (.25, 1), 0.25),
    ("lr_multiplier", 'log_normal', (-0.6, 1), 0.05),
    ("lr_step_per_joint_phase_2exp", 'uniform', (-2, 2), 1),
    ("latent_dim_multiplier_2exp", 'uniform', (-3, 0), 1),
    ("cluster_coef", 'normal', (-1.0, 0.4), 0.2),
    ("separation_coef", 'normal', (0.1, 0.04), 0.02),
    ("l1_coef", 'log_uniform_values', (0.00005, 0.001), 0.00005),
    ("num_addon_layers", "values", [2]),
]
protopnet_df = coef_calc(find_target_runs(60), protopnet_parameters)
protopnet_df

Target runs: 100
Number of unique configurations: 280713600 at multiplier 1.0


Unnamed: 0_level_0,mean,med,std,p2_5,p97_5,unique_95,values_95,dist,q,parameters
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
num_warm_epochs_at_lr1,5.27,4.0,2.23,2.0,12.0,6,"[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, ...",log_normal,2.0,"(1.6, 0.4)"
joint_steps_per_phase_at_lr1,10.2,9.0,3.03,6.0,18.0,5,"[6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, ...",log_normal,3.0,"(2.3, 0.3)"
last_only_steps_per_joint_step,0.628,0.5,0.236,0.25,1.0,4,"[0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.2...",uniform,0.25,"(0.25, 1)"
lr_multiplier,0.83,0.55,0.781,0.1,4.35,76,"[0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, ...",log_normal,0.05,"(-0.6, 1)"
lr_step_per_joint_phase_2exp,0.048,0.0,1.22,-2.0,2.0,5,"[-2.0, -2.0, -2.0, -2.0, -2.0, -2.0, -2.0, -2....",uniform,1.0,"(-2, 2)"
latent_dim_multiplier_2exp,-1.52,-2.0,0.964,-3.0,-0.0,4,"[-3.0, -3.0, -3.0, -3.0, -3.0, -3.0, -3.0, -3....",uniform,1.0,"(-3, 0)"
cluster_coef,-1.02,-1.0,0.364,-1.8,-0.2,9,"[-1.8, -1.8, -1.8, -1.8, -1.8, -1.8, -1.8, -1....",normal,0.2,"(-1.0, 0.4)"
separation_coef,0.0999,0.1,0.0379,0.02,0.18,9,"[0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.0...",normal,0.02,"(0.1, 0.04)"
l1_coef,0.00032,0.00025,0.000254,5e-05,0.00095,19,"[5e-05, 5e-05, 5e-05, 5e-05, 5e-05, 5e-05, 5e-...",log_uniform_values,5e-05,"(5e-05, 0.001)"
num_addon_layers,2.0,2.0,0.0,2.0,2.0,1,[2],values,,[2]


In [36]:
import yaml
import math

def round_to_sig_figs(value, sig_figs=2):
    if value == 0:
        return 0
    else:
        return round(value, sig_figs - int(math.floor(math.log10(abs(value))) + 1))

def gen_parameters(setting_df):
    this_config = {'parameters': {}}
    for _, row in setting_df.iterrows():
        param_entry = {}
        if row['dist'] == 'values':
            param_entry['values'] = row['parameters']
        else:
            param_entry['distribution'] = "q_" + row['dist']
            param_entry['q'] = row['q']
            params = row['parameters']
            if row['dist'] == 'normal' or row['dist'] == 'log_normal':
                param_entry['mu'] = params[0]
                param_entry['sigma'] = params[1]
            elif row['dist'] == 'uniform' or row['dist'] == 'log_uniform' or row['dist'] == 'log_uniform_values':
                param_entry['min'] = params[0]
                param_entry['max'] = params[1]
            else:
                param_entry
        param_name = row.name
        this_config["parameters"][param_name] = param_entry

    return this_config

def format_yaml_horizontal(yaml_dict):
    # Convert dict into lines for each section
    sections = []
    max_width = 0
    
    for key, values in yaml_dict.items():
        lines = [f"{key}:"]
        for k, v in values.items():
            lines.append(f"  {k}: {v}")
        sections.append(lines)
        # Track maximum width for padding
        max_width = max(max_width, max(len(line) for line in lines))
    
    # Combine sections horizontally with padding and bars
    result = []
    max_height = max(len(section) for section in sections)
    
    # Handle each row
    for i in range(max_height):
        row_parts = []
        for section in sections:
            # If section has this line, pad it; otherwise use empty string
            if i < len(section):
                row_parts.append(section[i].ljust(max_width))
            else:
                row_parts.append(" " * max_width)
        result.append(" | ".join(row_parts))
    
    return "\n".join(result)

def update_parameters(df, file):
    with open(file, 'r') as f:
        config = yaml.safe_load(f)

    parameters = gen_parameters(df)
    config['parameters'] = parameters['parameters']
    
    with open(file, 'w') as f:
        yaml.dump(config, f)

    for s in format_yaml_horizontal(parameters['parameters']).split('\n'):
        print(s)

# update_parameters(protopnet_df, 'exp/short/vanilla-accuracy.yaml')

## TesNET

In [11]:
tesnet_df = coef_calc(find_target_runs(150), [
    # unique to tesnet
    ("grassmannian_orthogonality_loss_coef", "normal", (-1e-7, 2e-8), 2e-8),
    ("orthogonality_loss_coef", "normal", (1e-4, 2e-5), 2e-5),

    # shared with protopnet
    ("num_warm_epochs_at_lr1", "log_normal", (1.6, .4), 2),
    ("joint_steps_per_phase_at_lr1", 'log_normal', (2.3, .3), 3),
    ("last_only_steps_per_joint_step", 'uniform', (.25, 1), 0.25),
    ("latent_dim_multiplier_2exp", 'uniform', (-3, 0), 1),
    ("lr_multiplier", 'log_normal', (-0.6, 1), 0.05),
    ("lr_step_per_joint_phase_2exp", 'uniform', (-2, 2), 1),
    ("cluster_coef", 'normal', (-1.0, 0.4), 0.2),
    ("separation_coef", 'normal', (0.1, 0.04), 0.02),
    ("l1_coef", 'log_uniform_values', (0.00005, 0.001), 0.00005),
    ("num_addon_layers", "values", [2]),
])
tesnet_df

Target runs: 40
Number of unique configurations: 5321700000 at multiplier 1.0


Unnamed: 0_level_0,mean,med,std,p2_5,p97_5,unique_95,values_95,dist,q,parameters
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
grassmannian_orthogonality_loss_coef,-1.01e-07,-1e-07,1.97e-08,-1.4e-07,-6e-08,5,"[-1.4e-07, -1.4e-07, -1.4e-07, -1.4e-07, -1.4e...",normal,2e-08,"(-1e-07, 2e-08)"
orthogonality_loss_coef,0.0001,0.0001,2.01e-05,6e-05,0.00014,5,"[6.000000000000001e-05, 6.000000000000001e-05,...",normal,2e-05,"(0.0001, 2e-05)"
num_warm_epochs_at_lr1,5.12,4.0,1.89,2.0,10.0,5,"[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, ...",log_normal,2.0,"(1.6, 0.4)"
joint_steps_per_phase_at_lr1,10.3,9.0,3.0,6.0,18.0,5,"[6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, ...",log_normal,3.0,"(2.3, 0.3)"
last_only_steps_per_joint_step,0.637,0.75,0.236,0.25,1.0,4,"[0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.2...",uniform,0.25,"(0.25, 1)"
latent_dim_multiplier_2exp,-1.47,-1.0,0.961,-3.0,-0.0,4,"[-3.0, -3.0, -3.0, -3.0, -3.0, -3.0, -3.0, -3....",uniform,1.0,"(-3, 0)"
lr_multiplier,0.824,0.6,0.761,0.1,3.95,73,"[0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, ...",log_normal,0.05,"(-0.6, 1)"
lr_step_per_joint_phase_2exp,-0.048,0.0,1.25,-2.0,2.0,5,"[-2.0, -2.0, -2.0, -2.0, -2.0, -2.0, -2.0, -2....",uniform,1.0,"(-2, 2)"
cluster_coef,-1.01,-1.0,0.376,-1.8,-0.2,9,"[-1.8, -1.8, -1.8, -1.8, -1.8, -1.8, -1.8, -1....",normal,0.2,"(-1.0, 0.4)"
separation_coef,0.101,0.1,0.0368,0.02,0.18,9,"[0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.0...",normal,0.02,"(0.1, 0.04)"


In [12]:
# update_parameters(tesnet_df, 'exp/short/tesnet.yaml')

## Deformable

In [41]:
deformable_df = coef_calc(find_target_runs(150), [
    # unique to deformable
    ("orthogonality_loss_coef", "log_normal", (-5.2, 1.4), 5e-4),

    # shared with protopnet
    ("num_warm_epochs_at_lr1", "log_normal", (1.6, .4), 2),
    ("joint_steps_per_phase_at_lr1", 'log_normal', (2.3, .3), 3),
    ("last_only_steps_per_joint_step", 'uniform', (.25, 1), 0.25),
    ("lr_multiplier", 'log_normal', (-0.6, 1), 0.05),
    ("lr_step_per_joint_phase_2exp", 'uniform', (-2, 2), 1),
    ("cluster_coef", 'normal', (-1.0, 0.4), 0.2),
    ("separation_coef", 'normal', (0.1, 0.04), 0.02),
    ("l1_coef", 'log_uniform_values', (0.00005, 0.001), 0.00005),
    ("num_addon_layers", "values", [2]),
])
deformable_df

Target runs: 40
Number of unique configurations: 5853870000 at multiplier 1.0


Unnamed: 0_level_0,mean,med,std,p2_5,p97_5,unique_95,values_95,dist,q,parameters
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
orthogonality_loss_coef,0.0107,0.005,0.0144,0.0005,0.085,110,"[0.0005, 0.0005, 0.0005, 0.0005, 0.0005, 0.000...",log_normal,0.0005,"(-5.2, 1.4)"
num_warm_epochs_at_lr1,5.1,4.0,1.99,2.0,10.0,5,"[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, ...",log_normal,2.0,"(1.6, 0.4)"
joint_steps_per_phase_at_lr1,10.3,9.0,2.94,6.0,18.0,5,"[6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, ...",log_normal,3.0,"(2.3, 0.3)"
last_only_steps_per_joint_step,0.637,0.75,0.243,0.25,1.0,4,"[0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.2...",uniform,0.25,"(0.25, 1)"
lr_multiplier,0.8,0.55,0.75,0.05,4.3,73,"[0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.0...",log_normal,0.05,"(-0.6, 1)"
lr_step_per_joint_phase_2exp,-0.045,0.0,1.25,-2.0,2.0,5,"[-2.0, -2.0, -2.0, -2.0, -2.0, -2.0, -2.0, -2....",uniform,1.0,"(-2, 2)"
cluster_coef,-1.01,-1.0,0.377,-1.8,-0.2,9,"[-1.8, -1.8, -1.8, -1.8, -1.8, -1.8, -1.8, -1....",normal,0.2,"(-1.0, 0.4)"
separation_coef,0.098,0.1,0.0385,0.02,0.18,9,"[0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.0...",normal,0.02,"(0.1, 0.04)"
l1_coef,0.000294,0.0002,0.000239,5e-05,0.0009,18,"[5e-05, 5e-05, 5e-05, 5e-05, 5e-05, 5e-05, 5e-...",log_uniform_values,5e-05,"(5e-05, 0.001)"
num_addon_layers,2.0,2.0,0.0,2.0,2.0,1,[2],values,,[2]


In [None]:
# update_parameters(deformable_df, 'exp/short/deformable.yaml')

orthogonality_loss_coef:             | num_warm_epochs_at_lr1:              | joint_steps_per_phase_at_lr1:        | last_only_steps_per_joint_step:      | lr_multiplier:                       | lr_step_per_joint_phase_2exp:        | cluster_coef:                        | separation_coef:                     | l1_coef:                             | num_addon_layers:                   
  distribution: q_log_normal         |   distribution: q_log_normal         |   distribution: q_log_normal         |   distribution: q_uniform            |   distribution: q_log_normal         |   distribution: q_uniform            |   distribution: q_normal             |   distribution: q_normal             |   distribution: q_log_uniform_values |   values: [2]                       
  q: 0.0005                          |   q: 2.0                             |   q: 3.0                             |   q: 0.25                            |   q: 0.05                            |   q: 1.0                     

## ST-ProtoPNet

In [None]:
st_protopnet_parameters = [
    # unique to st_protopnet
    ("support_separation_coef", "normal", (0.5, 0.04), 0.02),
    ("trivial_separation_coef", "normal", (0.1, 0.04), 0.02),
    ("closeness_loss_coef", "normal", (1.0, .4), .2),
    ("discrimination_loss_coef", "normal", (1.0, .4), .2),
    ("orthogonality_loss_coef", "normal", (0.001, .4), .2),

    # shared with protopnet
    ("num_warm_epochs_at_lr1", "log_normal", (1.6, .4), 2),
    ("joint_steps_per_phase_at_lr1", 'log_normal', (2.3, .3), 3),
    ("last_only_steps_per_joint_step", 'uniform', (.25, 1), 0.25),
    ("lr_multiplier", 'log_normal', (-0.6, 1), 0.05),
    ("lr_step_per_joint_phase_2exp", 'uniform', (-2, 2), 1),
    ("cluster_coef", 'normal', (-1.0, 0.4), 0.2),
    ("l1_coef", 'log_uniform_values', (0.00005, 0.001), 0.00005),
    ("num_addon_layers", "values", [1]),
]
st_protopnet_df = coef_calc(find_target_runs(60), st_protopnet_parameters)
st_protopnet_df

Target runs: 100
Number of unique configurations: 1339231320000 at multiplier 1.0


Unnamed: 0_level_0,mean,med,std,p2_5,p97_5,unique_95,values_95,dist,q,parameters
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
support_separation_coef,0.503,0.5,0.0362,0.42,0.58,9,"[0.42, 0.42, 0.42, 0.42, 0.42, 0.42, 0.42, 0.4...",normal,0.02,"(0.5, 0.04)"
trivial_separation_coef,0.1,0.1,0.0374,0.02,0.18,9,"[0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.0...",normal,0.02,"(0.1, 0.04)"
closeness_loss_coef,1.0,1.0,0.382,0.2,1.8,9,"[0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, ...",normal,0.2,"(1.0, 0.4)"
discrimination_loss_coef,0.993,1.0,0.373,0.2,1.8,9,"[0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, ...",normal,0.2,"(1.0, 0.4)"
orthogonality_loss_coef,-0.00595,0.0,0.384,-0.8,0.8,9,"[-0.8, -0.8, -0.8, -0.8, -0.8, -0.8, -0.8, -0....",normal,0.2,"(0.001, 0.4)"
num_warm_epochs_at_lr1,5.3,4.0,2.0,2.0,10.0,5,"[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, ...",log_normal,2.0,"(1.6, 0.4)"
joint_steps_per_phase_at_lr1,10.4,9.0,2.97,6.0,18.0,5,"[6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, ...",log_normal,3.0,"(2.3, 0.3)"
last_only_steps_per_joint_step,0.617,0.5,0.241,0.25,1.0,4,"[0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.2...",uniform,0.25,"(0.25, 1)"
lr_multiplier,0.801,0.55,0.756,0.1,4.25,70,"[0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, ...",log_normal,0.05,"(-0.6, 1)"
lr_step_per_joint_phase_2exp,0.0,0.0,1.2,-2.0,2.0,5,"[-2.0, -2.0, -2.0, -2.0, -2.0, -2.0, -2.0, -2....",uniform,1.0,"(-2, 2)"


In [None]:
# update_parameters(st_protopnet_df, 'exp/short/st-protopnet.yaml')

support_separation_coef:             | trivial_separation_coef:             | closeness_loss_coef:                 | discrimination_loss_coef:            | orthogonality_loss_coef:             | num_warm_epochs_at_lr1:              | joint_steps_per_phase_at_lr1:        | last_only_steps_per_joint_step:      | lr_multiplier:                       | lr_step_per_joint_phase_2exp:        | latent_dim_multiplier_2exp:          | cluster_coef:                        | l1_coef:                             | num_addon_layers:                   
  distribution: q_normal             |   distribution: q_normal             |   distribution: q_normal             |   distribution: q_normal             |   distribution: q_normal             |   distribution: q_log_normal         |   distribution: q_log_normal         |   distribution: q_uniform            |   distribution: q_log_normal         |   distribution: q_uniform            |   distribution: q_uniform            |   distribution: q_normal   

## ProtoTree

In [39]:
prototree_parameters = [
    ("warm_up_phase_len_at_lr1", "normal", (30, 5), 5),
    ("joint_phase_len_at_lr1", "normal", (70, 10), 5),
    
    ("backbone_lr_multiplier", "log_normal", (0, 1.2), 0.2),
    ("non_backbone_lr_multiplier", "log_normal", (0, 1.2), 0.2),
    ("lr_step_gamma", "normal", (0.5, 0.2), 0.1),
    ("lr_weight_decay", "log_normal", (-10, 1), 0.00005),
    ("log_probabilities", "values", [False]),
]
prototree_df = coef_calc(find_target_runs(40), prototree_parameters)
prototree_df

Target runs: 150
Number of unique configurations: 6855840 at multiplier 1.0


Unnamed: 0_level_0,mean,med,std,p2_5,p97_5,unique_95,values_95,dist,q,parameters
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
warm_up_phase_len_at_lr1,30.2,30.0,4.96,20,40,5,"[20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20....",normal,5.0,"(30, 5)"
joint_phase_len_at_lr1,70.6,70.0,9.59,50,90,9,"[50.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50....",normal,5.0,"(70, 10)"
backbone_lr_multiplier,1.67,1.0,1.81,0.2,9.6,46,"[0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, ...",log_normal,0.2,"(0, 1.2)"
non_backbone_lr_multiplier,1.55,1.0,1.71,0,9.2,46,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",log_normal,0.2,"(0, 1.2)"
lr_step_gamma,0.502,0.5,0.194,0.1,0.9,9,"[0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, ...",normal,0.1,"(0.5, 0.2)"
lr_weight_decay,6.22e-05,5e-05,6.45e-05,0,0.00035,8,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",log_normal,5e-05,"(-10, 1)"
log_probabilities,0.0,0.0,0.0,False,False,1,[False],values,,[False]


In [38]:
update_parameters(prototree_df, 'exp/short/prototree.yaml')

warm_up_phase_len_at_lr1:    | joint_phase_len_at_lr1:      | backbone_lr_multiplier:      | non_backbone_lr_multiplier:  | lr_step_gamma:               | lr_weight_decay:            
  distribution: q_normal     |   distribution: q_normal     |   distribution: q_log_normal |   distribution: q_log_normal |   distribution: q_normal     |   distribution: q_log_normal
  q: 5.0                     |   q: 5.0                     |   q: 0.2                     |   q: 0.2                     |   q: 0.1                     |   q: 5e-05                  
  mu: 30                     |   mu: 70                     |   mu: 0                      |   mu: 0                      |   mu: 0.5                    |   mu: -10                   
  sigma: 5                   |   sigma: 10                  |   sigma: 1.2                 |   sigma: 1.2                 |   sigma: 0.2                 |   sigma: 1                  
