In [None]:
import os
import sys
import pandas as pd

from run_cebab import get_cbm_standard, get_cbm_joint, get_cbm_LLM_mix_joint

# Working Dir

In [2]:
# can only run once when kernal start
os.chdir('run_cebab')

# Configs

In [None]:
# functions to call
plms_funcs = {
    'PLMs': get_cbm_standard,
    'CBE-PLMs': get_cbm_joint,
    'CBE-PLMs-CM': get_cbm_LLM_mix_joint
}

# D vs. D^
data_types = ['pure_cebab', 'aug_cebab']

# models
model_names = ['bert-base-uncased', 'roberta-base', 'gpt2', 'lstm']

# learning rate by model
lr_rate_dt = {
    'lstm': 1e-2,
    'gpt2': 1e-4,
    'roberta-base': 1e-5,
    'bert-base-uncased': 1e-5
}

In [3]:
def get_average_scores(score_list):
    if not score_list:
        return (0.0, 0.0)

    s1 = s2 = 0.0
    n = 0
    for a, b in score_list:
        s1 += a
        s2 += b
        n += 1
    return ((s1 / n * 100), (s2 / n * 100))

def get_tuple_2f_fmt(tp):
    f1, f2 = tp
    return f"{f1:.2f}/{f2:.2f}"

In [4]:
num_epochs = 20


results = {
    'data_type': [],
    'function': [],
    'model': [],
    'score': []
}

# functions
for f_name, f in plms_funcs.items():
    print(f"Running {f_name}...")
    for data_type in data_types:
        print(f"\tRunning {data_type}...")
        for model_name in model_names:
            lr = lr_rate_dt.get(model_name)
            print(f"\t\tRunning {model_name}... with learning rate: {lr}")
            results['data_type'].append(data_type)
            results['function'].append(f_name)
            results['model'].append(model_name)
            results['score'].append(
                f(
                    model_name=model_name,
                    num_epochs=num_epochs,
                    data_type=data_type,
                    max_len=512,
                    batch_size=8,
                    optimizer_lr=lr
                )
            )

Running PLMs...
	Running pure_cebab...
		Running bert-base-uncased... with learning rate: 1e-05


Training: 100%|██████████| 183/183 [00:28<00:00,  6.31batch/s]
Val: 100%|██████████| 210/210 [00:11<00:00, 18.52batch/s]


Epoch 1: Val Acc = 50.029886431560065 Val Macro F1 = 63.63764939723184
Test!


Test: 100%|██████████| 212/212 [00:11<00:00, 18.61batch/s]


Epoch 1: Test Acc = 50.68087625814092 Test Macro F1 = 63.87937748787647
		Running roberta-base... with learning rate: 1e-05


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Training: 100%|██████████| 183/183 [00:28<00:00,  6.38batch/s]
Val: 100%|██████████| 210/210 [00:11<00:00, 18.98batch/s]


Epoch 1: Val Acc = 64.07650926479378 Val Macro F1 = 72.35676723573592
Test!


Test: 100%|██████████| 212/212 [00:11<00:00, 19.05batch/s]


Epoch 1: Test Acc = 63.58792184724689 Test Macro F1 = 73.02175910556439
		Running gpt2... with learning rate: 0.0001


Training: 100%|██████████| 183/183 [00:30<00:00,  6.00batch/s]
Val: 100%|██████████| 210/210 [00:11<00:00, 17.53batch/s]


Epoch 1: Val Acc = 58.876270173341304 Val Macro F1 = 72.0512325062708
Test!


Test: 100%|██████████| 212/212 [00:11<00:00, 17.69batch/s]


Epoch 1: Test Acc = 60.50917702782712 Test Macro F1 = 72.9359530274976
		Running lstm... with learning rate: 0.01


Training: 100%|██████████| 183/183 [00:01<00:00, 141.87batch/s]
Val: 100%|██████████| 210/210 [00:01<00:00, 137.32batch/s]


Epoch 1: Val Acc = 27.017334130304842 Val Macro F1 = 40.20214692628115
Test!


Test: 100%|██████████| 212/212 [00:01<00:00, 137.03batch/s]


Epoch 1: Test Acc = 27.29425695677916 Test Macro F1 = 40.23426598513936
	Running aug_cebab...
		Running bert-base-uncased... with learning rate: 1e-05


Training: 100%|██████████| 183/183 [00:28<00:00,  6.42batch/s]
Val: 100%|██████████| 210/210 [00:11<00:00, 18.49batch/s]


Epoch 1: Val Acc = 61.14763897190676 Val Macro F1 = 72.76223284139552
Test!


Test: 100%|██████████| 211/211 [00:11<00:00, 18.51batch/s]


Epoch 1: Test Acc = 61.36498516320474 Test Macro F1 = 72.87759048988632
		Running roberta-base... with learning rate: 1e-05


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Training: 100%|██████████| 183/183 [00:28<00:00,  6.38batch/s]
Val: 100%|██████████| 210/210 [00:11<00:00, 18.86batch/s]


Epoch 1: Val Acc = 67.90197250448297 Val Macro F1 = 78.26486852815819
Test!


Test: 100%|██████████| 211/211 [00:11<00:00, 18.93batch/s]


Epoch 1: Test Acc = 68.486646884273 Test Macro F1 = 78.85821107004375
		Running gpt2... with learning rate: 0.0001


Training: 100%|██████████| 183/183 [00:30<00:00,  6.02batch/s]
Val: 100%|██████████| 210/210 [00:12<00:00, 17.44batch/s]


Epoch 1: Val Acc = 29.826658696951586 Val Macro F1 = 45.97102559543168
Test!


Test: 100%|██████████| 211/211 [00:12<00:00, 17.53batch/s]


Epoch 1: Test Acc = 29.554896142433236 Test Macro F1 = 45.70488734320543
		Running lstm... with learning rate: 0.01


Training: 100%|██████████| 183/183 [00:01<00:00, 147.87batch/s]
Val: 100%|██████████| 210/210 [00:01<00:00, 132.89batch/s]


Epoch 1: Val Acc = 27.017334130304842 Val Macro F1 = 40.20214692628115
Test!


Test: 100%|██████████| 211/211 [00:01<00:00, 127.12batch/s]


Epoch 1: Test Acc = 27.299703264094955 Test Macro F1 = 40.234709433382285
Running CBE-PLMs...
	Running pure_cebab...
		Running bert-base-uncased... with learning rate: 1e-05


Training: 100%|██████████| 183/183 [00:28<00:00,  6.40batch/s]
Val: 100%|██████████| 210/210 [00:11<00:00, 18.39batch/s]


Epoch 1: Val concept Acc = 44.02271368798566 Val concept Macro F1 = 51.60804964280153
Epoch 1: Val Acc = 62.94082486551106 Val Macro F1 = 73.5520832695864
Test!


Test: 100%|██████████| 212/212 [00:11<00:00, 18.50batch/s]


Epoch 1: Test concept Acc = 43.17643576080521 Test concept Macro F1 = 51.36825859929996
Epoch 1: Test Acc = 65.66015393724098 Test Macro F1 = 75.0660301708592
		Running roberta-base... with learning rate: 1e-05


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Training: 100%|██████████| 183/183 [00:28<00:00,  6.36batch/s]
Val: 100%|██████████| 210/210 [00:11<00:00, 18.79batch/s]


Epoch 1: Val concept Acc = 57.38194859533772 Val concept Macro F1 = 61.35688096540086
Epoch 1: Val Acc = 66.52719665271967 Val Macro F1 = 75.84625646384158
Test!


Test: 100%|██████████| 212/212 [00:11<00:00, 19.00batch/s]


Epoch 1: Test concept Acc = 58.229721728833624 Test concept Macro F1 = 61.88925853072496
Epoch 1: Test Acc = 66.0153937240971 Test Macro F1 = 75.91776623287603
		Running gpt2... with learning rate: 0.0001


Training: 100%|██████████| 183/183 [00:30<00:00,  6.01batch/s]
Val: 100%|██████████| 210/210 [00:12<00:00, 17.40batch/s]


Epoch 1: Val concept Acc = 59.98206814106396 Val concept Macro F1 = 62.824589819880174
Epoch 1: Val Acc = 52.65989240884639 Val Macro F1 = 67.05098219962775
Test!


Test: 100%|██████████| 212/212 [00:12<00:00, 17.56batch/s]


Epoch 1: Test concept Acc = 60.52397868561279 Test concept Macro F1 = 62.92699870271543
Epoch 1: Test Acc = 53.522794552989936 Test Macro F1 = 67.36727592522193
		Running lstm... with learning rate: 0.01


Training: 100%|██████████| 183/183 [00:01<00:00, 134.27batch/s]
Val: 100%|██████████| 210/210 [00:01<00:00, 131.81batch/s]


Epoch 1: Val concept Acc = 55.24506873879259 Val concept Macro F1 = 54.407725262073455
Epoch 1: Val Acc = 21.219366407650927 Val Macro F1 = 41.9981468720971
Test!


Test: 100%|██████████| 212/212 [00:01<00:00, 127.66batch/s]


Epoch 1: Test concept Acc = 56.867969212551806 Test concept Macro F1 = 54.83383673131463
Epoch 1: Test Acc = 20.663114268798104 Test Macro F1 = 41.660813540387274
	Running aug_cebab...
		Running bert-base-uncased... with learning rate: 1e-05


Training: 100%|██████████| 183/183 [00:28<00:00,  6.38batch/s]
Val: 100%|██████████| 210/210 [00:11<00:00, 18.21batch/s]


Epoch 1: Val concept Acc = 55.7501494321578 Val concept Macro F1 = 51.650724725735756
Epoch 1: Val Acc = 64.55469216975493 Val Macro F1 = 76.14642920762552
Test!


Test: 100%|██████████| 211/211 [00:11<00:00, 18.24batch/s]


Epoch 1: Test concept Acc = 55.98219584569733 Test concept Macro F1 = 51.63683500963497
Epoch 1: Test Acc = 63.026706231454014 Test Macro F1 = 75.65698610303576
		Running roberta-base... with learning rate: 1e-05


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Training: 100%|██████████| 183/183 [00:28<00:00,  6.35batch/s]
Val: 100%|██████████| 210/210 [00:11<00:00, 18.61batch/s]


Epoch 1: Val concept Acc = 62.52839210998208 Val concept Macro F1 = 61.41055088252284
Epoch 1: Val Acc = 69.63538553496711 Val Macro F1 = 79.37846917217557
Test!


Test: 100%|██████████| 211/211 [00:11<00:00, 18.67batch/s]


Epoch 1: Test concept Acc = 63.85756676557863 Test concept Macro F1 = 61.98637611692154
Epoch 1: Test Acc = 67.00296735905044 Test Macro F1 = 78.66227180841682
		Running gpt2... with learning rate: 0.0001


Training: 100%|██████████| 183/183 [00:30<00:00,  5.99batch/s]
Val: 100%|██████████| 210/210 [00:12<00:00, 17.21batch/s]


Epoch 1: Val concept Acc = 50.41841004184101 Val concept Macro F1 = 51.25309623279464
Epoch 1: Val Acc = 59.83263598326359 Val Macro F1 = 73.25395248503227
Test!


Test: 100%|██████████| 211/211 [00:12<00:00, 17.31batch/s]


Epoch 1: Test concept Acc = 51.471810089020764 Test concept Macro F1 = 51.48673718832424
Epoch 1: Test Acc = 59.82195845697329 Test Macro F1 = 73.53339767494136
		Running lstm... with learning rate: 0.01


Training: 100%|██████████| 183/183 [00:01<00:00, 119.26batch/s]
Val: 100%|██████████| 210/210 [00:01<00:00, 125.31batch/s]


Epoch 1: Val concept Acc = 75.54692169754932 Val concept Macro F1 = 57.60844696101505
Epoch 1: Val Acc = 27.017334130304842 Val Macro F1 = 40.20214692628115
Test!


Test: 100%|██████████| 211/211 [00:01<00:00, 124.20batch/s]


Epoch 1: Test concept Acc = 75.83382789317508 Test concept Macro F1 = 57.835447050058974
Epoch 1: Test Acc = 27.299703264094955 Test Macro F1 = 40.234709433382285
Running CBE-PLMs-CM...
	Running pure_cebab...
		Running bert-base-uncased... with learning rate: 1e-05


UnboundLocalError: cannot access local variable 'train_split' where it is not associated with a value

In [4]:
df = pd.DataFrame.from_dict(results)
df['score_avg'] = df.score.apply(get_average_scores)
df['score_fmted'] = df.score_avg.apply(get_tuple_2f_fmt)

df.to_csv("result.csv", index=False)

In [4]:
df.pivot(index=['model'], columns=['data_type'], values='score_fmted')