In [1]:
def average_values(dictionary):
    sum = 0
    for i, v in enumerate(list(dictionary.values())):
        sum += v
    return sum / (i + 1)

In [2]:
keys = [
    'ai2_arc_ARC-Challenge/acc',
    'ai2_arc_ARC-Easy/acc',
    'hellaswag/acc',
    'math_qa/acc',
    'piqa/acc',
    'pubmed_qa/acc',
    'super_glue_copa/acc',
    'winogrande_winogrande_s/acc',
]

In [3]:
import wandb

WANDB_PROJECT = "Gradient Ascent Limit Test"

api = wandb.Api(timeout=100)
all_runs = [run for run in api.runs(WANDB_PROJECT)]
print(f"{len(all_runs)} runs found")

1749 runs found


In [4]:
from tqdm import tqdm
valid_runs = []
for run in all_runs:
    if set(keys).issubset(list(run.summary.keys())):
        valid_runs.append(run)
    # else:
    #     tqdm.write(f"{run.name} lacks the proper summary keys, skipping...")

In [5]:
valid_runs = [r for r in valid_runs if r.config['negative_loss']]
len(valid_runs)

1689

In [6]:
runs_1b = [r for r in valid_runs if r.config['model_name_or_path'] == 'facebook/opt-1.3b']
len(runs_1b)

690

In [7]:
runs_350m = [r for r in valid_runs if r.config['model_name_or_path'] == 'facebook/opt-350m']
len(runs_350m)

694

In [8]:
runs_3b = [r for r in valid_runs if r.config['model_name_or_path'] == 'facebook/opt-2.7b']
len(runs_3b)

305

In [9]:
import random
random.seed(42)

def sample_with_corpora(runs):
  corpora = {'extraction': [], 'cc': [], 'github': []}
  for r in runs:
    if r.config['train_batch_size'] == 1:
      if 'extraction' in r.config['train_set']:
        corpora['extraction'].append(r)
      elif 'cc' in r.config['train_set']:
        corpora['cc'].append(r)
      elif 'github' in r.config['train_set']:
        corpora['github'].append(r)
      else:
        raise KeyError('')
  for k, v in corpora.items():
    try:
      corpora[k] = random.sample(corpora[k], 100)
    except ValueError:
        pass
  print(len(corpora['extraction']))
  print(len(corpora['cc']))
  print(len(corpora['github']))

  
  final = corpora['extraction'] + corpora['cc'] + corpora['github']
  # assert len(final) == 300
  return final

In [10]:
runs_1b = sample_with_corpora(runs_1b)
runs_350m = sample_with_corpora(runs_350m)
runs_3b = sample_with_corpora(runs_3b)

100
94
100
100
100
100
100
100
100


In [11]:
print(len(runs_1b), len(runs_350m), len(runs_3b))

294 300 300


In [12]:
def calc_best_for_all_runs(runs):
  best_average_all = []
  best_non_average_all = []
  best_epoch_all = []
  corpora = []
  index_all = []
  for run in tqdm(runs):
    data = [row for row in run.scan_history(keys=keys)]
    averages = [average_values(d) for d in data] # length == total_epochs
    best_average = max(averages)
    best_epoch = averages.index(best_average)
    best_average_all.append(best_average)
    best_non_average_all.append(data[best_epoch])
    best_epoch_all.append(best_epoch)
    if 'extraction' in run.config['train_set']:
      corpora.append('TDEC')
    elif 'cc' in run.config['train_set']:
      corpora.append('CC')
    elif 'github' in run.config['train_set']:
      corpora.append('Git.')
    else:
      raise KeyError('')
    index = run.name.split('_')[-1]
    assert index.isdigit()
    index_all.append(index)
  return best_average_all, best_non_average_all, best_epoch_all, corpora, index_all

In [13]:
average_1b, non_average_1b, epoch_1b, c_1b, i_1b = calc_best_for_all_runs(runs_1b)
average_350m, non_average_350m, epoch_350m, c_350m, i_350m = calc_best_for_all_runs(runs_350m)
average_3b, non_average_3b, epoch_3b, c_3b, i_3b = calc_best_for_all_runs(runs_3b)

100%|██████████| 294/294 [05:31<00:00,  1.13s/it]
100%|██████████| 300/300 [05:13<00:00,  1.04s/it]
100%|██████████| 300/300 [05:07<00:00,  1.02s/it]


In [14]:
import numpy as np
import pandas as pd
print(np.mean(average_1b))
print(np.median(average_1b))

0.5100680713282049
0.5096146762371063


In [15]:
df_1b = pd.DataFrame({'corpora': c_1b, 'index': i_1b, 'epoch': epoch_1b,'average': average_1b})
df_1b['model'] = '1.3B'
df_1b_non_averaged = pd.DataFrame(non_average_1b)
df_1b = pd.concat([df_1b, df_1b_non_averaged], axis=1)
df_3b = pd.DataFrame({'corpora': c_3b, 'index': i_3b, 'epoch': epoch_3b,'average': average_3b})
df_3b['model'] = '2.7B'
df_3b_non_averaged = pd.DataFrame(non_average_3b)
df_3b = pd.concat([df_3b, df_3b_non_averaged], axis=1)
df_350m = pd.DataFrame({'corpora': c_350m, 'index': i_350m, 'epoch': epoch_350m, 'average': average_350m})
df_350m['model'] = '350M'
df_350m_non_averaged = pd.DataFrame(non_average_350m)
df_350m = pd.concat([df_350m, df_350m_non_averaged], axis=1)
df_melt = pd.concat([df_350m, df_1b, df_3b])
df_melt.head(5)

Unnamed: 0,corpora,index,epoch,average,model,ai2_arc_ARC-Challenge/acc,ai2_arc_ARC-Easy/acc,hellaswag/acc,math_qa/acc,piqa/acc,pubmed_qa/acc,super_glue_copa/acc,winogrande_winogrande_s/acc
0,TDEC,44,0,0.457211,350M,0.186441,0.45625,0.359375,0.21875,0.675,0.540625,0.69,0.53125
1,TDEC,34,3,0.461394,350M,0.20678,0.453125,0.384375,0.209375,0.65625,0.546875,0.7,0.534375
2,TDEC,43,0,0.457211,350M,0.186441,0.45625,0.359375,0.21875,0.675,0.540625,0.69,0.53125
3,TDEC,85,0,0.457211,350M,0.186441,0.45625,0.359375,0.21875,0.675,0.540625,0.69,0.53125
4,TDEC,69,0,0.457211,350M,0.186441,0.45625,0.359375,0.21875,0.675,0.540625,0.69,0.53125


In [16]:
df_melt.to_csv('../full_results/classification.csv', index=False)