In [1]:
import pandas as pd
import numpy as np
import os
import glob
import plot_utils
from tqdm import tqdm

In [2]:
folder_list = [
    "ModelConfig",
    "ISO-PR",
    "ISO-FLOP",
]

date = "13Jan2023"

In [3]:
eval_list = []
test_list = []
for dir_ in folder_list:
    files_ = glob.glob(f'./{dir_}/*.csv')
    
    for file in files_:
        if '_eval_' in file:
            eval_list.append(file)
        elif '_test_' in file:
            test_list.append(file)

#
df_list = []
for file in eval_list:
    df_i = pd.read_csv(file)
    if "ISO-PR" in file:
        df_i["Experiment type"] = "Iso-PR"
    elif "ISO-FLOP" in file:
        df_i["Experiment type"] = "Iso-FLOPs"
    else:
        df_i["Experiment type"] = "Model Config"
        
    df_i = df_i.loc[df_i.loc[:, "Embedding parameters"] > 0, :]
    df_list.append(df_i)
df_eval = pd.concat(df_list)

#
df_list = []
for file in test_list:
    df_i = pd.read_csv(file)
    if "ISO-PR" in file:
        df_i["Experiment type"] = "Iso-PR"
    elif "ISO-FLOP" in file:
        df_i["Experiment type"] = "Iso-FLOPs"
    else:
        df_i["Experiment type"] = "Model Config"
        
    df_i = df_i.loc[df_i.loc[:, "Embedding parameters"] > 0, :]
    df_list.append(df_i)
df_test = pd.concat(df_list)

#
print(df_eval.groupby(['run_name']).count().shape)
print(df_test.groupby(['run_name']).count().shape)

# calculate flops
for idx in tqdm(df_eval.index):
    run_name = df_eval.loc[idx, "run_name"]
    
    #
    num_non_emb_pars = df_eval.loc[idx, "Non-embedding parameters"]
    
    # Hoffman flops per sequence
    hoffman = plot_utils.get_flops_hoffman(
        embedding_size=df_eval.loc[idx, "embedding_size"],
        hidden_size=df_eval.loc[idx, "hidden_size"],
        intermediate_size=df_eval.loc[idx, "intermediate_size"],
        num_attention_heads=df_eval.loc[idx, "num_attention_heads"],
        num_hidden_layers=df_eval.loc[idx, "num_hidden_layers"],
    )
    
    # Kaplan flops per sequence
    kaplan = plot_utils.get_flops_kaplan(
        num_non_emb_pars,
        seq_len=128,
        #num_training_seq=9081600,
    )
    
    #
    num_sequences = df_eval.loc[idx, "eval/updates"] * 256
    total_hoffman = hoffman * num_sequences
    total_kaplan = kaplan * num_sequences
    
    #
    df_eval.loc[idx, "FLOPS Hoffman per sequence"] = hoffman
    df_eval.loc[idx, "FLOPS Hoffman total"] = total_hoffman
    df_eval.loc[idx, "FLOPS Kaplan per sequence"] = kaplan
    df_eval.loc[idx, "FLOPS Kaplan total"] = total_kaplan
    df_eval.loc[idx, "Tokens"] = num_sequences * 128

#
for idx in tqdm(df_test.index):
    run_name = df_test.loc[idx, "run_name"]
    
    #
    num_non_emb_pars = df_test.loc[idx, "Non-embedding parameters"]
    
    # Hoffman flops per sequence
    hoffman = plot_utils.get_flops_hoffman(
        embedding_size=df_test.loc[idx, "embedding_size"],
        hidden_size=df_test.loc[idx, "hidden_size"],
        intermediate_size=df_test.loc[idx, "intermediate_size"],
        num_attention_heads=df_test.loc[idx, "num_attention_heads"],
        num_hidden_layers=df_test.loc[idx, "num_hidden_layers"],
    )
    
    # Kaplan flops per sequence
    kaplan = plot_utils.get_flops_kaplan(
        num_non_emb_pars,
        seq_len=128,
        #num_training_seq=9081600,
    )
    
    #
    num_sequences = 9081600
    total_hoffman = hoffman * num_sequences
    total_kaplan = kaplan * num_sequences
    
    #
    df_test.loc[idx, "FLOPS Hoffman per sequence"] = hoffman
    df_test.loc[idx, "FLOPS Hoffman total"] = total_hoffman
    df_test.loc[idx, "FLOPS Kaplan per sequence"] = kaplan
    df_test.loc[idx, "FLOPS Kaplan total"] = total_kaplan
    df_test.loc[idx, "Tokens"] = num_sequences * 128
    

#
df_eval.to_csv(f"EVAL_RESULTS_{date}.csv")
df_test.to_csv(f"TEST_RESULTS_{date}.csv")

  exec(code_obj, self.user_global_ns, self.user_ns)


(42, 18)
(42, 13)


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1512/1512 [00:12<00:00, 125.78it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 42/42 [00:00<00:00, 133.34it/s]
