In [None]:
! pip install wandb
# grab @TheodoreGalanos's fork for config file
! git clone https://github.com/TheodoreGalanos/gpt-neox/
import wandb
import pandas as pd
import numpy as np

In [None]:
# initialize config
sweep_config = {
  "name": "Scaling laws sweep",
  "method": "grid",
  "parameters": {
    "valid_set": {
        "distribution": "categorical",
        'values': [
          ### This will be a list of strings
          ### with the following values:
          ### N, num_layers, num_attention_heads, hidden_size, lr
          ### separated by commas
      ]
    },
  }
}

# read TG's config
df = pd.read_csv(
    './gpt-neox/configs/scaling_experiment/config_parameters.csv',
    )

# create strings for each row
for i in range(len(df)):
  row = df.iloc[i]
  row['N'] = 12*row['n_layer']*row['n_embd']**2
  # add LR according equation D.1 from Kaplan et. al
  # "Scaling Laws for Neural Language Models"
  row['lr'] = 0.003239 + (-0.0001395)*np.log(row['N'])
  row.drop(['Exponent', 'head_state'], inplace=True)
  row = row[['N', 'n_layer', 'n_head', 'n_embd', 'lr']]
  # concatenate into a string to be split later
  row = ','.join([str(x) for x in row])
  sweep_config['parameters']['valid_set']['values'].append(row)

In [None]:
# check source
print(df)

# and config
sweep_config

In [None]:
# test run
sweep_id = wandb.sweep(sweep_config)

def train():
    run = wandb.init()
    print(run.config.valid_set)
    vars = {k:v for k,v in zip(
        # these are from neox_arguments.md
        ['N',
         'num_layers',
         'num_attention_heads',
         'hidden_size',
         'lr'
         ],
        [float(x) for x in run.config.valid_set.split(',')]
    )}
    print([x for x in zip(vars.keys(),vars.values())])
    run.finish()

sweep_id = wandb.sweep(sweep_config)
agent = wandb.agent(sweep_id=sweep_id, function=train)
agent.run()