In [2]:
import os
import sys
import pandas as pd

from run_cebab import get_cbm_standard, get_cbm_joint, get_cbm_LLM_mix_joint

# Working Dir

In [3]:
# can only run once when kernal start
os.chdir('run_cebab')

# Configs

In [4]:
# functions to call
plms_funcs = {
    'PLMs': get_cbm_standard,
    'CBE-PLMs': get_cbm_joint,
    'CBE-PLMs-CM': get_cbm_LLM_mix_joint
}

# D vs. D^
data_types = ['pure_cebab', 'aug_cebab']

# models
model_names = ['bert-base-uncased', 'roberta-base', 'gpt2', 'lstm']

# learning rate by model
lr_rate_dt = {
    'lstm': 1e-2,
    'gpt2': 1e-4,
    'roberta-base': 1e-5,
    'bert-base-uncased': 1e-5
}

In [5]:
def get_average_scores(score_list):
    if not score_list:
        return (0.0, 0.0)

    s1 = s2 = 0.0
    n = 0
    for a, b in score_list:
        s1 += a
        s2 += b
        n += 1
    return ((s1 / n * 100), (s2 / n * 100))

def get_tuple_2f_fmt(tp):
    f1, f2 = tp
    return f"{f1:.2f}/{f2:.2f}"

In [27]:
num_epochs = 20


results = {
    'data_type': [],
    'function': [],
    'model': [],
    'score': []
}

# functions
for f_name, f in plms_funcs.items():
    print(f"Running {f_name}...")
    for data_type in data_types:
        print(f"\tRunning {data_type}...")
        for model_name in model_names:
            lr = lr_rate_dt.get(model_name)
            print(f"\t\tRunning {model_name}... with learning rate: {lr}")
            results['data_type'].append(data_type)
            results['function'].append(f_name)
            results['model'].append(model_name)
            results['score'].append(
                f(
                    model_name=model_name,
                    num_epochs=num_epochs,
                    data_type=data_type,
                    max_len=512,
                    batch_size=8,
                    optimizer_lr=lr
                )
            )

In [7]:
# df = pd.DataFrame.from_dict(results)
# df['score_avg'] = df.score.apply(get_average_scores)
# df['score_fmted'] = df.score_avg.apply(get_tuple_2f_fmt)
# df.to_csv("result.csv", index=False)

In [17]:
df = pd.read_csv('result.csv')
df['model'] = df['model'].map(
    {
        'lstm': 'LSTM',
        'gpt2': 'GPT2',
        'bert-base-uncased': 'BERT',
        'roberta-base': 'RoBERTa'
    }
)
df['data_type'] = df['data_type'].map({'pure_cebab': 'D', 'aug_cebab': 'D^'})

In [26]:
dfp = df.pivot(index=['function', 'model'], columns=['data_type'], values='score_fmted')
func_order = ["PLMs", "CBE-PLMs", "CBE-PLMs-CM"]
model_order = ["LSTM", "GPT2", "BERT", "RoBERTa"]
dfp = dfp.reindex(
    pd.MultiIndex.from_product([func_order, model_order], names=["function", "model"])
)
dfp

Unnamed: 0_level_0,data_type,D,D^
function,model,Unnamed: 2_level_1,Unnamed: 3_level_1
PLMs,LSTM,47.25/65.99,43.26/63.72
PLMs,GPT2,66.90/77.19,65.28/76.44
PLMs,BERT,72.29/81.23,72.46/81.33
PLMs,RoBERTa,72.35/81.65,70.74/81.79
CBE-PLMs,LSTM,39.73/61.93,40.71/60.76
CBE-PLMs,GPT2,66.07/77.88,58.04/73.65
CBE-PLMs,BERT,70.69/81.32,70.80/81.10
CBE-PLMs,RoBERTa,70.75/81.74,71.75/82.08
CBE-PLMs-CM,LSTM,0.00/0.00,0.00/0.00
CBE-PLMs-CM,GPT2,65.18/77.56,54.93/70.82
