In [1]:
max_batch_sizes = {
    "EleutherAI/gpt-neo-1.3B": 32,
    "EleutherAI/gpt-neo-2.7B": 16,
    "EleutherAI/gpt-j-6B": 8,
    "albert-xxlarge-v2": 64,
    "facebook/opt-6.7b": 16,
}
max_gpus = 9

classification_column_map = {
    "eval/loss":"min",
    "test/loss":"min",
    "eval/accuracy":"max",
    "test/accuracy":"max",
}

# "train/" prefix due to our two-phase training/evaluation pipeline
classification_eval_column_map = {
    "train/original_ms_accuracy":"max",
    "train/anti_ms_accuracy":"max",
    "train/optional_ms_accuracy":"max",
    "train/contra_ms_accuracy":"max",
}

In [2]:
import fastmodellib as fml
from fastmodellib.log_utils import load_logs_from_dir, agg_logs, get_experiment_duration
from fastmodellib.deploy import ModelDeployer, ModelTask, DeepSpeedTask, PythonTask
from fastmodellib.deploy import get_device_placement, make_param_grid

In [3]:
deployer = ModelDeployer()
deployer.status()

pending 0
tasks 0
finished 0


In [11]:
deployer.status()

pending 0
tasks 0
finished 24


In [4]:
deployer.stop_all()

# Classification
***

In [4]:
models_small = ["distilbert-base-uncased", "bert-base-uncased", "bert-large-uncased", "roberta-large", "albert-xxlarge-v2"]
models_large = ["EleutherAI/gpt-neo-1.3B", "EleutherAI/gpt-neo-2.7B",]

models = models_large + models_small

# Original MS
***

In [6]:
notebook_path = "train_action_classification.ipynb"

dataset_folder = "data/contrastive_moral_stories/original_ms/action+norm/norm_distance/"

pgrid = make_param_grid(models, [16, 32, 64, 128], [1e-5, 3e-5, 5e-5])
for model_name, batch_size, lr in pgrid:

    # find suitable device placement parameters
    max_batch_size = max_batch_sizes.get(model_name, batch_size)
    min_gpus = 1 if not "EleutherAI" in model_name else 2
    device_bs, num_gpus, grad_acc = get_device_placement(batch_size, max_batch_size, max_gpus, min_gpus)

    print(f"model={model_name}: batch_size={batch_size}, device_bs={device_bs}, num_gpus={num_gpus}, grad_acc={grad_acc}")
    training_args = {
        "gradient_accumulation_steps": grad_acc,
        "per_device_train_batch_size": device_bs,
        "per_device_eval_batch_size": device_bs,
        "learning_rate": lr,
    }
    logdir = f"data/models/original_ms/{model_name}/bs{batch_size}_lr_{str(lr).replace('.','_')}/"
    
    deployer.enqueue(notebook=notebook_path, backend="deepspeed", training_args=training_args, deepspeed=True,
                     num_gpus=num_gpus, logdir=logdir, model_name=model_name, dataset_folder=dataset_folder)

model=distilbert-base-uncased: batch_size=16, device_bs=16, num_gpus=1, grad_acc=1
model=distilbert-base-uncased: batch_size=16, device_bs=16, num_gpus=1, grad_acc=1
model=distilbert-base-uncased: batch_size=16, device_bs=16, num_gpus=1, grad_acc=1
model=distilbert-base-uncased: batch_size=32, device_bs=32, num_gpus=1, grad_acc=1
model=distilbert-base-uncased: batch_size=32, device_bs=32, num_gpus=1, grad_acc=1
model=distilbert-base-uncased: batch_size=32, device_bs=32, num_gpus=1, grad_acc=1
model=distilbert-base-uncased: batch_size=64, device_bs=64, num_gpus=1, grad_acc=1
model=distilbert-base-uncased: batch_size=64, device_bs=64, num_gpus=1, grad_acc=1
model=distilbert-base-uncased: batch_size=64, device_bs=64, num_gpus=1, grad_acc=1
model=distilbert-base-uncased: batch_size=128, device_bs=128, num_gpus=1, grad_acc=1
model=distilbert-base-uncased: batch_size=128, device_bs=128, num_gpus=1, grad_acc=1
model=distilbert-base-uncased: batch_size=128, device_bs=128, num_gpus=1, grad_acc=

# Original MS no pretrain
***

In [None]:
notebook_path = "train_action_classification.ipynb"

dataset = "original_ms"

dataset_folder = f"data/contrastive_moral_stories/{dataset}/action+norm/norm_distance/"

for model_name in models_small[::-1]:
    for batch_size in [16, 32, 64, 128]:
        for lr in [1e-5, 3e-5, 5e-5]:
            # find suitable device placement parameters
            max_batch_size = max_batch_sizes.get(model_name, batch_size)
            min_gpus = 1 if not "EleutherAI" in model_name else 2
            device_bs, num_gpus, grad_acc = get_device_placement(batch_size, max_batch_size, max_gpus, min_gpus)

            print(f"model={model_name}: batch_size={batch_size}, device_bs={device_bs}, num_gpus={num_gpus}, grad_acc={grad_acc}")
            training_args = {
                "gradient_accumulation_steps": grad_acc,
                "per_device_train_batch_size": device_bs,
                "per_device_eval_batch_size": device_bs,
                "learning_rate": lr,
            }
            logdir = f"data/models/{dataset}_no_pretrain/{model_name}/bs{batch_size}_lr_{str(lr).replace('.','_')}/"
            script, config, logdir, num_gpus = prepare_deepspeed_run_from_notebook(notebook_path, training_args=training_args, 
                                                                                   num_gpus=num_gpus, logdir=logdir,
                                                                                   model_name=model_name, 
                                                                                   load_pretrained_weights = False,
                                                                                   dataset_folder=dataset_folder,)

            deployer.enqueue(script, config, logdir, slots=num_gpus)

# Anti MS
***

In [None]:
notebook_path = "train_action_classification.ipynb"

dataset_folder = "data/contrastive_moral_stories/anti_ms/action+norm/norm_distance/"

for model_name in models:
    for batch_size in [16, 32, 64, 128]:
        for lr in [1e-5, 3e-5, 5e-5]:
            # find suitable device placement parameters
            max_batch_size = max_batch_sizes.get(model_name, batch_size)
            min_gpus = 1 if not "EleutherAI" in model_name else 2
            device_bs, num_gpus, grad_acc = get_device_placement(batch_size, max_batch_size, max_gpus, min_gpus)

            print(f"model={model_name}: batch_size={batch_size}, device_bs={device_bs}, num_gpus={num_gpus}, grad_acc={grad_acc}")
            training_args = {
                "gradient_accumulation_steps": grad_acc,
                "per_device_train_batch_size": device_bs,
                "per_device_eval_batch_size": device_bs,
                "learning_rate": lr,
            }
            logdir = f"data/models/anti_ms/{model_name}/bs{batch_size}_lr_{str(lr).replace('.','_')}/"
            script, config, logdir, num_gpus = prepare_deepspeed_run_from_notebook(notebook_path, training_args=training_args, 
                                                                                   num_gpus=num_gpus, logdir=logdir,
                                                                                   model_name=model_name, dataset_folder=dataset_folder)

            deployer.enqueue(script, config, logdir, slots=num_gpus)

# Optional MS
***

In [None]:
notebook_path = "train_action_classification.ipynb"

dataset_folder = "data/contrastive_moral_stories/optional_ms/action+norm/norm_distance/"

# deploy training jobs
for model_name in models:
    for batch_size in [16, 32, 64, 128]:
        for lr in [1e-5, 3e-5, 5e-5]:
            # find suitable device placement parameters
            max_batch_size = max_batch_sizes.get(model_name, batch_size)
            min_gpus = 1 if not "EleutherAI" in model_name else 2
            device_bs, num_gpus, grad_acc = get_device_placement(batch_size, max_batch_size, max_gpus, min_gpus)

            print(f"model={model_name}: batch_size={batch_size}, device_bs={device_bs}, num_gpus={num_gpus}, grad_acc={grad_acc}")
            training_args = {
                "gradient_accumulation_steps": grad_acc,
                "per_device_train_batch_size": device_bs,
                "per_device_eval_batch_size": device_bs,
                "learning_rate": lr,
            }
            logdir = f"data/models/optional_ms/{model_name}/bs{batch_size}_lr_{str(lr).replace('.','_')}/"
            script, config, logdir, num_gpus = prepare_deepspeed_run_from_notebook(notebook_path, training_args=training_args, 
                                                                                   num_gpus=num_gpus, logdir=logdir,
                                                                                   model_name=model_name, dataset_folder=dataset_folder)

            deployer.enqueue(script, config, logdir, slots=num_gpus)

# Paradox MS
***

In [None]:
notebook_path = "train_action_classification.ipynb"

dataset = "contra_ms"

dataset_folder = f"data/contrastive_moral_stories/{dataset}/action+norm/norm_distance/"


# deploy training jobs
for model_name in models_small:
    for batch_size in [16, 32, 64, 128]:
        for lr in [1e-5, 3e-5, 5e-5]:
            # find suitable device placement parameters
            max_batch_size = max_batch_sizes.get(model_name, batch_size)
            min_gpus = 1 if not "EleutherAI" in model_name else 2
            device_bs, num_gpus, grad_acc = get_device_placement(batch_size, max_batch_size, max_gpus, min_gpus)

            print(f"model={model_name}: batch_size={batch_size}, device_bs={device_bs}, num_gpus={num_gpus}, grad_acc={grad_acc}")
            training_args = {
                "gradient_accumulation_steps": grad_acc,
                "per_device_train_batch_size": device_bs,
                "per_device_eval_batch_size": device_bs,
                "learning_rate": lr,
            }
            logdir = f"data/models/{dataset}/{model_name}/bs{batch_size}_lr_{str(lr).replace('.','_')}/"
            script, config, logdir, num_gpus = prepare_deepspeed_run_from_notebook(notebook_path, training_args=training_args, 
                                                                                   num_gpus=num_gpus, logdir=logdir,
                                                                                   model_name=model_name, dataset_folder=dataset_folder)

            deployer.enqueue(script, config, logdir, slots=num_gpus)

# Paradox MS no pretrain
***

In [None]:
notebook_path = "train_action_classification.ipynb"

dataset = "contra_ms"

dataset_folder = f"data/contrastive_moral_stories/{dataset}/action+norm/norm_distance/"

for model_name in ["bert-base-uncased"]:
    for batch_size in [16, 32, 64, 128]:
        for lr in [1e-5, 3e-5, 5e-5]:
            # find suitable device placement parameters
            max_batch_size = max_batch_sizes.get(model_name, batch_size)
            min_gpus = 1 if not "EleutherAI" in model_name else 2
            device_bs, num_gpus, grad_acc = get_device_placement(batch_size, max_batch_size, max_gpus, min_gpus)

            print(f"model={model_name}: batch_size={batch_size}, device_bs={device_bs}, num_gpus={num_gpus}, grad_acc={grad_acc}")
            training_args = {
                "gradient_accumulation_steps": grad_acc,
                "per_device_train_batch_size": device_bs,
                "per_device_eval_batch_size": device_bs,
                "learning_rate": lr,
            }
            logdir = f"data/models/{dataset}_no_pretrain/{model_name}/bs{batch_size}_lr_{str(lr).replace('.','_')}/"
            script, config, logdir, num_gpus = prepare_deepspeed_run_from_notebook(notebook_path, training_args=training_args, 
                                                                                   num_gpus=num_gpus, logdir=logdir,
                                                                                   model_name=model_name, 
                                                                                   load_pretrained_weights = False,
                                                                                   dataset_folder=dataset_folder,)

            deployer.enqueue(script, config, logdir, slots=num_gpus)

# Classification evaluation
***

In [31]:
def get_best_models_per_dataset(dataset=None, logs=None, metric_key=None):
    if dataset is not None:
        metric_key = f"train/{dataset}_accuracy"
        logs = load_logs_from_dir(f"data/models/{dataset}/")
    elif logs is None or metric_key is None:
        raise ValueError("Provide either dataset or logs/metric_key")

    results = agg_logs(logs, classification_eval_column_map)
    results["hyperparameters"] = results.index.map(lambda x: x.rsplit("/", 1)[1])
    groups = results.groupby(results.index.map(lambda x: x.rsplit("/", 1)[0]))
    best_runs = groups.agg({metric_key: np.argmax})
    best = groups.apply(lambda x: x.iloc[best_runs.loc[x.name][metric_key]])
    return best, results

In [None]:
### EVALUATION ONLY AFTER TRAINING ###
# the big models sometimes OOM if evaluated right after training...

notebook_path = "eval_action_classification.ipynb"

dataset = "optional_ms"

dataset_folder = f"data/contrastive_moral_stories/{dataset}/action+norm/norm_distance/"

# deploy eval jobs
for model_name in models:
    for batch_size in [16, 32, 64, 128]:
        for lr in [1e-5, 3e-5, 5e-5]:
            # inference is much nicer on vram...
            training_args = {
                "per_device_eval_batch_size": 4 * max_batch_sizes.get(model_name, batch_size),
            }
            logdir = f"data/models/{dataset}/{model_name}/bs{batch_size}_lr_{str(lr).replace('.','_')}/"
            script, config, logdir, num_gpus = prepare_deepspeed_run_from_notebook(notebook_path, training_args=training_args, 
                                                                                   num_gpus=1, logdir=logdir,
                                                                                   model_name=model_name, override_logdir=False,
                                                                                   dataset_folder=dataset_folder, checkpoint="first")

            deployer.enqueue(script, config, logdir, slots=num_gpus)
            


### Eval for original ms without pretraining
***

In [None]:
notebook_path = "eval_action_classification.ipynb"

dataset = "original_ms"

dataset_folder = f"data/contrastive_moral_stories/{dataset}_no_pretrain/action+norm/norm_distance/"

# deploy eval jobs
for model_name in models_small:
    for batch_size in [16, 32, 64, 128]:
        for lr in [1e-5, 3e-5, 5e-5]:
            # inference is much nicer on vram...
            training_args = {
                "per_device_eval_batch_size": 4 * max_batch_sizes.get(model_name, batch_size),
            }
            logdir = f"data/models/{dataset}_no_pretrain/{model_name}/bs{batch_size}_lr_{str(lr).replace('.','_')}/"
            script, config, logdir, num_gpus = prepare_deepspeed_run_from_notebook(notebook_path, training_args=training_args, 
                                                                                   num_gpus=1, logdir=logdir,
                                                                                   model_name=model_name, override_logdir=False,
                                                                                   dataset_folder=dataset_folder, checkpoint="first")

            deployer.enqueue(script, config, logdir, slots=num_gpus)
            


### Eval for paradox ms
***

In [None]:
notebook_path = "eval_action_classification.ipynb"

dataset = "contra_ms"

dataset_folder = f"data/contrastive_moral_stories/{dataset}/action+norm/norm_distance/"

# deploy eval jobs
for model_name in models_small:
    for batch_size in [16, 32, 64, 128]:
        for lr in [1e-5, 3e-5, 5e-5]:
            # inference is much nicer on vram...
            training_args = {
                "per_device_eval_batch_size": 4 * max_batch_sizes.get(model_name, batch_size),
            }
            logdir = f"data/models/{dataset}/{model_name}/bs{batch_size}_lr_{str(lr).replace('.','_')}/"
            script, config, logdir, num_gpus = prepare_deepspeed_run_from_notebook(notebook_path, training_args=training_args, 
                                                                                   num_gpus=1, logdir=logdir,
                                                                                   model_name=model_name, override_logdir=False,
                                                                                   dataset_folder=dataset_folder, checkpoint="first")

            deployer.enqueue(script, config, logdir, slots=num_gpus)
            


### Eval for paradox ms without pretraining
***

In [None]:
notebook_path = "eval_action_classification.ipynb"

dataset = "contra_ms"

dataset_folder = f"data/contrastive_moral_stories/{dataset}_no_pretrain/action+norm/norm_distance/"

# deploy eval jobs
for model_name in models_small:
    for batch_size in [16, 32, 64, 128]:
        for lr in [1e-5, 3e-5, 5e-5]:
            # inference is much nicer on vram...
            training_args = {
                "per_device_eval_batch_size": 4 * max_batch_sizes.get(model_name, batch_size),
            }
            logdir = f"data/models/{dataset}_no_pretrain/{model_name}/bs{batch_size}_lr_{str(lr).replace('.','_')}/"
            script, config, logdir, num_gpus = prepare_deepspeed_run_from_notebook(notebook_path, training_args=training_args, 
                                                                                   num_gpus=1, logdir=logdir,
                                                                                   model_name=model_name, override_logdir=False,
                                                                                   dataset_folder=dataset_folder, checkpoint="first")

            deployer.enqueue(script, config, logdir, slots=num_gpus)
            


### Results

In [30]:
# creating latex tables...

# dict guarantees ordering *phew*
latex_columns = {"train/original_ms_accuracy":"\\thead{\\textit{ms}}", 
           "train/anti_ms_accuracy":"\\thead{\\textit{anti-ms}}", 
           "train/optional_ms_accuracy":"\\thead{\\textit{o.-ms}}", 
           "train/contra_ms_accuracy":"\\thead{\\textit{contra\\_ms}}",
}
latex_models = [
    'distilbert-base-uncased',
    'bert-base-uncased',
    'bert-large-uncased',
    'roberta-large',
    'albert-xxlarge-v2',
    'EleutherAI/gpt-neo-1.3B',
    'EleutherAI/gpt-neo-2.7B',
]

def format_best_models_table(best):
    # prepare single model tables for latex output

    def extract_hparams(line):
        parts = line.split("_")
        return f"bs {parts[0][2:]}, lr {parts[-1].replace('0','')}"
    
    def format_acc(x):
        return "{:,.1f}".format(100*x)
    available_models = [x for x in latex_models if x in best.index]
    latex_table = best.rename(columns=latex_columns).loc[available_models]

    latex_table = latex_table.style.highlight_max(list(latex_columns.values()), props="font-weight: bold;")
    formats = {c:format_acc for c in latex_columns.values()}
    formats["hyperparameters"] = extract_hparams
    latex_table.format(formatter=formats)
    return latex_table


### Finetuned on Moral Stories:

In [37]:
ms_best, ms_all = get_best_models_per_dataset("original_ms")
ms_formatted = format_best_models_table(ms_best)
ms_latex = ms_formatted.to_latex(convert_css=True)
print(ms_latex)
ms_formatted

\begin{tabular}{lrrrrl}
 & \thead{\textit{ms}} & \thead{\textit{anti-ms}} & \thead{\textit{o.-ms}} & \thead{\textit{contra\_ms}} & hyperparameters \\
distilbert-base-uncased & 78.0 & 22.1 & 52.4 & 50.8 & bs 32, lr 5e-5 \\
bert-base-uncased & 80.7 & 22.2 & 49.0 & 50.6 & bs 16, lr 5e-5 \\
bert-large-uncased & 82.6 & 19.4 & 53.5 & 51.8 & bs 128, lr 3e-5 \\
roberta-large & 92.5 & 43.7 & 49.1 & 61.8 & bs 128, lr 3e-5 \\
albert-xxlarge-v2 & \bfseries 94.2 & \bfseries 45.5 & \bfseries 54.4 & \bfseries 64.7 & bs 32, lr 1e-5 \\
EleutherAI/gpt-neo-1.3B & 83.0 & 30.3 & 50.8 & 54.7 & bs 32, lr 1e-5 \\
EleutherAI/gpt-neo-2.7B & 86.2 & 38.2 & 51.2 & 58.5 & bs 16, lr 1e-5 \\
\end{tabular}



Unnamed: 0,\thead{\textit{ms}},\thead{\textit{anti-ms}},\thead{\textit{o.-ms}},\thead{\textit{contra\_ms}},hyperparameters
distilbert-base-uncased,78.0,22.1,52.4,50.8,"bs 32, lr 5e-5"
bert-base-uncased,80.7,22.2,49.0,50.6,"bs 16, lr 5e-5"
bert-large-uncased,82.6,19.4,53.5,51.8,"bs 128, lr 3e-5"
roberta-large,92.5,43.7,49.1,61.8,"bs 128, lr 3e-5"
albert-xxlarge-v2,94.2,45.5,54.4,64.7,"bs 32, lr 1e-5"
EleutherAI/gpt-neo-1.3B,83.0,30.3,50.8,54.7,"bs 32, lr 1e-5"
EleutherAI/gpt-neo-2.7B,86.2,38.2,51.2,58.5,"bs 16, lr 1e-5"


### Finetuned on anti-ms

In [38]:
anti_ms_best, anti_all = get_best_models_per_dataset("anti_ms")
anti_formatted = format_best_models_table(anti_ms_best)
anti_latex = anti_formatted.to_latex(convert_css=True)
print(anti_latex)
anti_formatted

\begin{tabular}{lrrrrl}
 & \thead{\textit{ms}} & \thead{\textit{anti-ms}} & \thead{\textit{o.-ms}} & \thead{\textit{contra\_ms}} & hyperparameters \\
distilbert-base-uncased & 23.6 & 77.0 & 49.4 & 50.0 & bs 64, lr 3e-5 \\
bert-base-uncased & 30.3 & 80.7 & 53.1 & 54.7 & bs 32, lr 5e-5 \\
bert-large-uncased & 30.9 & 82.9 & 52.0 & 55.3 & bs 16, lr 1e-5 \\
roberta-large & 23.1 & 91.4 & 53.8 & 56.1 & bs 16, lr 1e-5 \\
albert-xxlarge-v2 & 27.8 & \bfseries 93.0 & \bfseries 55.9 & \bfseries 58.9 & bs 32, lr 1e-5 \\
EleutherAI/gpt-neo-1.3B & 30.4 & 82.4 & 42.8 & 51.9 & bs 32, lr 1e-5 \\
EleutherAI/gpt-neo-2.7B & \bfseries 35.4 & 85.0 & 46.5 & 55.6 & bs 16, lr 1e-5 \\
\end{tabular}



Unnamed: 0,\thead{\textit{ms}},\thead{\textit{anti-ms}},\thead{\textit{o.-ms}},\thead{\textit{contra\_ms}},hyperparameters
distilbert-base-uncased,23.6,77.0,49.4,50.0,"bs 64, lr 3e-5"
bert-base-uncased,30.3,80.7,53.1,54.7,"bs 32, lr 5e-5"
bert-large-uncased,30.9,82.9,52.0,55.3,"bs 16, lr 1e-5"
roberta-large,23.1,91.4,53.8,56.1,"bs 16, lr 1e-5"
albert-xxlarge-v2,27.8,93.0,55.9,58.9,"bs 32, lr 1e-5"
EleutherAI/gpt-neo-1.3B,30.4,82.4,42.8,51.9,"bs 32, lr 1e-5"
EleutherAI/gpt-neo-2.7B,35.4,85.0,46.5,55.6,"bs 16, lr 1e-5"


### Finetuned on optional-ms

In [39]:
opt_ms_best, opt_all = get_best_models_per_dataset("optional_ms")
opt_formatted = format_best_models_table(opt_ms_best)
opt_latex = opt_formatted.to_latex(convert_css=True)
print(opt_latex)
opt_formatted

\begin{tabular}{lrrrrl}
 & \thead{\textit{ms}} & \thead{\textit{anti-ms}} & \thead{\textit{o.-ms}} & \thead{\textit{contra\_ms}} & hyperparameters \\
distilbert-base-uncased & \bfseries 50.0 & \bfseries 50.0 & \bfseries 100.0 & \bfseries 66.7 & bs 32, lr 5e-5 \\
bert-base-uncased & \bfseries 50.0 & \bfseries 50.0 & \bfseries 100.0 & \bfseries 66.7 & bs 32, lr 5e-5 \\
bert-large-uncased & \bfseries 50.0 & \bfseries 50.0 & \bfseries 100.0 & \bfseries 66.7 & bs 32, lr 5e-5 \\
roberta-large & \bfseries 50.0 & \bfseries 50.0 & \bfseries 100.0 & \bfseries 66.7 & bs 32, lr 5e-5 \\
albert-xxlarge-v2 & \bfseries 50.0 & \bfseries 50.0 & \bfseries 100.0 & \bfseries 66.7 & bs 32, lr 5e-5 \\
EleutherAI/gpt-neo-1.3B & \bfseries 50.0 & \bfseries 50.0 & \bfseries 100.0 & \bfseries 66.7 & bs 32, lr 5e-5 \\
EleutherAI/gpt-neo-2.7B & \bfseries 50.0 & \bfseries 50.0 & \bfseries 100.0 & \bfseries 66.7 & bs 32, lr 5e-5 \\
\end{tabular}



Unnamed: 0,\thead{\textit{ms}},\thead{\textit{anti-ms}},\thead{\textit{o.-ms}},\thead{\textit{contra\_ms}},hyperparameters
distilbert-base-uncased,50.0,50.0,100.0,66.7,"bs 32, lr 5e-5"
bert-base-uncased,50.0,50.0,100.0,66.7,"bs 32, lr 5e-5"
bert-large-uncased,50.0,50.0,100.0,66.7,"bs 32, lr 5e-5"
roberta-large,50.0,50.0,100.0,66.7,"bs 32, lr 5e-5"
albert-xxlarge-v2,50.0,50.0,100.0,66.7,"bs 32, lr 5e-5"
EleutherAI/gpt-neo-1.3B,50.0,50.0,100.0,66.7,"bs 32, lr 5e-5"
EleutherAI/gpt-neo-2.7B,50.0,50.0,100.0,66.7,"bs 32, lr 5e-5"


In [82]:
# create the joined table

ms_lines = ms_formatted.hide(axis="index").hide(subset=["\\thead{\\textit{contra\\_ms}}", "hyperparameters"], axis=1).to_latex(convert_css=True).split("\n")[2:-2]
ms_lines = [l+" & " + x.replace("\\\\","& ") for l,x in zip(latex_models, ms_lines)]

anti_lines = anti_formatted.hide(axis="index").hide(subset=["\\thead{\\textit{contra\\_ms}}", "hyperparameters"], axis=1).to_latex(convert_css=True).split("\n")[2:-2]
anti_lines = [x.replace("\\\\", "& ") for x in anti_lines]

opt_lines = opt_formatted.hide(axis="index").hide(subset=["\\thead{\\textit{contra\\_ms}}", "hyperparameters"], axis=1).to_latex(convert_css=True).split("\n")[2:-2]
opt_lines = [x.replace("\\bfseries","") for x in opt_lines]

table = "\n".join([x + y + z for x,y,z in zip(ms_lines, anti_lines, opt_lines)])
print(table)


distilbert-base-uncased & 78.0 & 22.1 & 52.4 & 23.6 & 77.0 & 49.4 &  50.0 &  50.0 &  100.0 \\
bert-base-uncased & 80.7 & 22.2 & 49.0 & 30.3 & 80.7 & 53.1 &  50.0 &  50.0 &  100.0 \\
bert-large-uncased & 82.6 & 19.4 & 53.5 & 30.9 & 82.9 & 52.0 &  50.0 &  50.0 &  100.0 \\
roberta-large & 92.5 & 43.7 & 49.1 & 23.1 & 91.4 & 53.8 &  50.0 &  50.0 &  100.0 \\
albert-xxlarge-v2 & \bfseries 94.2 & \bfseries 45.5 & \bfseries 54.4 & 27.8 & \bfseries 93.0 & \bfseries 55.9 &  50.0 &  50.0 &  100.0 \\
EleutherAI/gpt-neo-1.3B & 83.0 & 30.3 & 50.8 & 30.4 & 82.4 & 42.8 &  50.0 &  50.0 &  100.0 \\
EleutherAI/gpt-neo-2.7B & 86.2 & 38.2 & 51.2 & \bfseries 35.4 & 85.0 & 46.5 &  50.0 &  50.0 &  100.0 \\


## Finetuned on paradox ms (pretrained)
***

In [40]:
contrap_best, contrap_all = get_best_models_per_dataset("contra_ms")
contrap_formatted = format_best_models_table(contrap_best)
contrap_formatted.hide(["hyperparameters"], axis=1)
contrap_latex = contrap_formatted.to_latex(convert_css=True)
print(contrap_latex)
contrap_formatted

\begin{tabular}{lrrrr}
 & \thead{\textit{ms}} & \thead{\textit{anti-ms}} & \thead{\textit{o.-ms}} & \thead{\textit{contra\_ms}} \\
distilbert-base-uncased & 70.0 & 65.1 & 99.6 & 78.2 \\
bert-base-uncased & 75.4 & 74.0 & 99.7 & 83.0 \\
bert-large-uncased & 78.4 & 77.2 & \bfseries 99.8 & 85.1 \\
roberta-large & 89.1 & 86.4 & 99.5 & 91.7 \\
albert-xxlarge-v2 & \bfseries 90.8 & \bfseries 88.1 & 99.6 & \bfseries 92.8 \\
\end{tabular}



Unnamed: 0,\thead{\textit{ms}},\thead{\textit{anti-ms}},\thead{\textit{o.-ms}},\thead{\textit{contra\_ms}}
distilbert-base-uncased,70.0,65.1,99.6,78.2
bert-base-uncased,75.4,74.0,99.7,83.0
bert-large-uncased,78.4,77.2,99.8,85.1
roberta-large,89.1,86.4,99.5,91.7
albert-xxlarge-v2,90.8,88.1,99.6,92.8


## Finetuned on original ms (not pretrained)
***

In [41]:
logs = load_logs_from_dir("data/models/original_ms_no_pretrain/")
originalnp_best, originalnp_all = get_best_models_per_dataset(logs=logs, metric_key="train/original_ms_accuracy")
originalnp_formatted = format_best_models_table(originalnp_best)
originalnp_formatted.hide(["hyperparameters"], axis=1)
originalnp_latex = originalnp_formatted.to_latex(convert_css=True)
print(originalnp_latex)
originalnp_formatted

\begin{tabular}{lrrrr}
 & \thead{\textit{ms}} & \thead{\textit{anti-ms}} & \thead{\textit{o.-ms}} & \thead{\textit{contra\_ms}} \\
distilbert-base-uncased & 70.7 & 29.4 & 54.5 & 51.5 \\
bert-base-uncased & \bfseries 71.1 & 29.2 & 52.1 & 50.8 \\
bert-large-uncased & 64.6 & 35.0 & 48.8 & 49.4 \\
roberta-large & 59.4 & 41.1 & 68.2 & 56.2 \\
albert-xxlarge-v2 & 50.0 & \bfseries 50.0 & \bfseries 100.0 & \bfseries 66.7 \\
\end{tabular}



Unnamed: 0,\thead{\textit{ms}},\thead{\textit{anti-ms}},\thead{\textit{o.-ms}},\thead{\textit{contra\_ms}}
distilbert-base-uncased,70.7,29.4,54.5,51.5
bert-base-uncased,71.1,29.2,52.1,50.8
bert-large-uncased,64.6,35.0,48.8,49.4
roberta-large,59.4,41.1,68.2,56.2
albert-xxlarge-v2,50.0,50.0,100.0,66.7


## Finetuned on paradox ms (not pretrained)
***

In [32]:
logs = load_logs_from_dir("data/models/contra_ms_no_pretrain/")
contranp_best, contranp_all = get_best_models_per_dataset(logs=logs, metric_key="train/contra_ms_accuracy")
contranp_formatted = format_best_models_table(contranp_best)
contranp_formatted.hide(["hyperparameters"], axis=1)
contranp_latex = contranp_formatted.to_latex(convert_css=True)
contranp_formatted

Unnamed: 0,\thead{\textit{ms}},\thead{\textit{anti-ms}},\thead{\textit{o.-ms}},\thead{\textit{contra\_ms}}
distilbert-base-uncased,49.9,50.6,100.0,66.8
bert-base-uncased,50.2,49.9,100.0,66.7
bert-large-uncased,50.0,50.0,100.0,66.7
roberta-large,50.0,50.0,100.0,66.7
albert-xxlarge-v2,50.0,50.0,100.0,66.7


# ROT-Generator
***

In [None]:
notebook_path = "rot_generator.ipynb"

models = ["t5-small", "facebook/bart-base", "facebook/bart-large", "t5-base"][::-1]

for model_name in models:
    for batch_size in [16, 32, 64, 128]:
        for lr in [1e-5, 3e-5, 5e-5]:
            # find suitable device placement parameters
            max_batch_size = max_batch_sizes.get(model_name, batch_size)
            min_gpus = 1 if not "EleutherAI" in model_name else 2
            device_bs, num_gpus, grad_acc = get_device_placement(batch_size, max_batch_size, max_gpus, min_gpus)

            training_args = {
                "gradient_accumulation_steps": grad_acc,
                "per_device_train_batch_size": device_bs,
                "per_device_eval_batch_size": device_bs,
                "learning_rate": lr,
            }
            logdir = f"data/models/rot-generator/{model_name}/bs{batch_size}_lr_{str(lr).replace('.','_')}/"
            script, config, logdir, num_gpus = prepare_deepspeed_run_from_notebook(notebook_path, training_args=training_args, 
                                                                                   num_gpus=num_gpus, logdir=logdir,
                                                                                   model_name=model_name)

            deployer.enqueue(script, config, logdir, slots=num_gpus)

In [34]:
generator_column_map = {"train/loss":"min", "eval/loss":"min", "test/loss": "min", 
                         "eval/BLEU-4":"max", "test/BLEU-4":"max",
                         "eval/ROUGE-L":"max","test/ROUGE-L":"max"}

metric_key = "test/BLEU-4"

logs = load_logs_from_dir("data/models/rot-generator/")
results = agg_logs(logs, generator_column_map)
results["hyperparameters"] = results.index.map(lambda x: x.rsplit("/", 1)[1])
groups = results.groupby(results.index.map(lambda x: x.rsplit("/", 1)[0]))
best_runs = groups.agg({metric_key: np.argmax})
best = groups.apply(lambda x: x.iloc[best_runs.loc[x.name][metric_key]])
best

Unnamed: 0,train/loss,eval/loss,test/loss,eval/BLEU-4,test/BLEU-4,eval/ROUGE-L,test/ROUGE-L,hyperparameters
facebook/bart-base,0.0121,0.01944,0.019394,89.574867,89.626129,95.490997,95.462997,bs16_lr_3e-05
facebook/bart-large,0.01,0.0336,0.033905,89.802925,89.995766,95.553001,95.621002,bs16_lr_3e-05
t5-base,0.0148,0.01857,0.018326,89.012932,89.101349,95.332001,95.336998,bs16_lr_5e-05
t5-small,0.0219,0.022003,0.021591,88.191483,88.328392,94.953003,94.961998,bs16_lr_5e-05


# Textual entailment
***

## Polarity classifier
***

In [None]:
notebook_path = "train_polarity_classifier.ipynb"

# deploy training jobs
for model_name in ["bert-base-uncased"]:
    for batch_size in [16, 32, 64, 128]:
        for lr in [1e-5, 3e-5, 5e-5]:
            # find suitable device placement parameters
            max_batch_size = max_batch_sizes.get(model_name, batch_size)
            min_gpus = 1 if not "EleutherAI" in model_name else 2
            device_bs, num_gpus, grad_acc = get_device_placement(batch_size, max_batch_size, max_gpus, min_gpus)

            print(f"model={model_name}: batch_size={batch_size}, device_bs={device_bs}, num_gpus={num_gpus}, grad_acc={grad_acc}")
            training_args = {
                "gradient_accumulation_steps": grad_acc,
                "per_device_train_batch_size": device_bs,
                "per_device_eval_batch_size": device_bs,
                "learning_rate": lr,
            }
            logdir = f"data/models/polarity/{model_name}/bs{batch_size}_lr_{str(lr).replace('.','_')}/"
            script, config, logdir, num_gpus = prepare_deepspeed_run_from_notebook(notebook_path, training_args=training_args, 
                                                                                   num_gpus=num_gpus, logdir=logdir,
                                                                                   model_name=model_name, dataset_folder=dataset_folder)

            t = deployer.enqueue(script, config, logdir, slots=num_gpus)

In [5]:
# results
l = load_logs_from_dir("data/models/polarity/")
a = agg_logs(l, {k:v for k,v in classification_eval_column_map.items() if "contra" not in k})
a.style.highlight_max()

Unnamed: 0,train/original_ms_accuracy,train/anti_ms_accuracy,train/optional_ms_accuracy
bert-base-uncased/bs32_lr_5e-05,0.981,0.954,0.895
bert-base-uncased/bs16_lr_1e-05,0.978,0.959,0.901
bert-base-uncased/bs16_lr_5e-05,0.979,0.962,0.96
bert-base-uncased/bs32_lr_1e-05,0.978,0.959,0.986
bert-base-uncased/bs16_lr_3e-05,0.979,0.956,0.947
bert-base-uncased/bs128_lr_1e-05,0.978,0.96,0.992
bert-base-uncased/bs128_lr_3e-05,0.977,0.955,0.979
bert-base-uncased/bs32_lr_3e-05,0.98,0.96,0.917
bert-base-uncased/bs64_lr_3e-05,0.977,0.955,0.992
bert-base-uncased/bs64_lr_5e-05,0.981,0.955,0.917


## Train textual entailment model
***
Requires you to run "apply_polarity_classifier.ipynb" first!

In [None]:
notebook_path = "train_textual_entailment.ipynb"

# deploy training jobs
model_name = "ynie/roberta-large-snli_mnli_fever_anli_R1_R2_R3-nli"
for batch_size in [16, 32, 64, 128][-1:]:
    for lr in [1e-5, 3e-5, 5e-5][:1]:
        # find suitable device placement parameters
        max_batch_size = max_batch_sizes.get(model_name, batch_size)
        min_gpus = 1 if not "EleutherAI" in model_name else 2
        device_bs, num_gpus, grad_acc = get_device_placement(batch_size, max_batch_size, max_gpus, min_gpus)

        print(f"model={model_name}: batch_size={batch_size}, device_bs={device_bs}, num_gpus={num_gpus}, grad_acc={grad_acc}")
        training_args = {
            "gradient_accumulation_steps": grad_acc,
            "per_device_train_batch_size": device_bs,
            "per_device_eval_batch_size": device_bs,
            "learning_rate": lr,
        }
        logdir = f"data/models/textual_entailment/{model_name}/bs{batch_size}_lr_{str(lr).replace('.','_')}/"
        script, config, logdir, num_gpus = prepare_deepspeed_run_from_notebook(notebook_path, training_args=training_args, 
                                                                               num_gpus=num_gpus, logdir=logdir,
                                                                               model_name=model_name)

        t = deployer.enqueue(script, config, logdir, slots=num_gpus)

In [63]:
deployer.status()

pending 0
tasks 0
finished 142


In [61]:
t

<__main__.DeepSpeedTask at 0x7f70cb84e850>

In [62]:
t.cancel()

cancelled, shutting down
ds terminated


In [7]:
notebook_path = "train_action_classification.ipynb"

dataset_folder = "data/contrastive_moral_stories/original_ms/action+norm/norm_distance/"

model_name = "bert-base-uncased"
wd = 0.0

for batch_size in [1024]:
    for lr in [1e-5]:
        # find suitable device placement parameters
        max_batch_size = max_batch_sizes.get(model_name, batch_size)
        min_gpus = 2
        device_bs, num_gpus, grad_acc = get_device_placement(batch_size, max_batch_size, max_gpus, min_gpus)

        print(f"model={model_name}: batch_size={batch_size}, device_bs={device_bs}, num_gpus={num_gpus}, grad_acc={grad_acc}")
        training_args = {
            "gradient_accumulation_steps": grad_acc,
            "per_device_train_batch_size": device_bs,
            "per_device_eval_batch_size": device_bs,
            "learning_rate": lr,
            "num_train_epochs": 4,
            "weight_decay": wd,
        }
        fltstr = lambda x: str(x).replace('.','_')
        logdir = f"data/models/tests/{model_name}/bs{batch_size}_lr_{fltstr(lr)}_wd_{fltstr(wd)}/"
        script, config, logdir, num_gpus = prepare_deepspeed_run_from_notebook(notebook_path, training_args=training_args, 
                                                                               num_gpus=num_gpus, logdir=logdir,
                                                                               model_name=model_name, dataset_folder=dataset_folder)

        deployer.enqueue(script, config, logdir, slots=num_gpus)

model=bert-base-uncased: batch_size=1024, device_bs=512, num_gpus=2, grad_acc=1
Starting deepspeed --include=localhost:1,2 --master_port=27501 /home/kiehne/jupyter-lab/workspace/emnlp2022/train_action_classification_fb0b3201-8707-44fb-8863-8da6fead7a43.py --deepspeed data/models/tests/bert-base-uncased/bs1024_lr_1e-05_wd_0_0/ds_config.json


In [24]:
notebook_path = "train_action_classification.ipynb"

dataset_folder = "data/contrastive_moral_stories/original_ms/action+norm/norm_distance/"

model_name = "facebook/opt-6.7b"
model_name = "EleutherAI/gpt-neo-1.3B"

for batch_size in [256, 512, 1024]:
    for lr in [1e-5, 3e-5, 5e-5]:
        for wd in [0.01, 0.05, 0.1]:
            # find suitable device placement parameters
            max_batch_size = max_batch_sizes.get(model_name, batch_size)
            min_gpus = 2
            device_bs, num_gpus, grad_acc = get_device_placement(batch_size, max_batch_size, max_gpus, min_gpus)

            print(f"model={model_name}: batch_size={batch_size}, device_bs={device_bs}, num_gpus={num_gpus}, grad_acc={grad_acc}")
            training_args = {
                "gradient_accumulation_steps": grad_acc,
                "per_device_train_batch_size": device_bs,
                "per_device_eval_batch_size": device_bs,
                "learning_rate": lr,
                "num_train_epochs": 4,
                "weight_decay": wd,
            }
            fltstr = lambda x: str(x).replace('.','_')
            logdir = f"data/models/tests/{model_name}/bs{batch_size}_lr_{fltstr(lr)}_wd_{fltstr(wd)}/"
            script, config, logdir, num_gpus = prepare_deepspeed_run_from_notebook(notebook_path, training_args=training_args, 
                                                                                   num_gpus=num_gpus, logdir=logdir,
                                                                                   model_name=model_name, dataset_folder=dataset_folder)

            deployer.enqueue(script, config, logdir, slots=num_gpus)

model=EleutherAI/gpt-neo-1.3B: batch_size=256, device_bs=32, num_gpus=8, grad_acc=1
model=EleutherAI/gpt-neo-1.3B: batch_size=256, device_bs=32, num_gpus=8, grad_acc=1
model=EleutherAI/gpt-neo-1.3B: batch_size=256, device_bs=32, num_gpus=8, grad_acc=1
model=EleutherAI/gpt-neo-1.3B: batch_size=256, device_bs=32, num_gpus=8, grad_acc=1
model=EleutherAI/gpt-neo-1.3B: batch_size=256, device_bs=32, num_gpus=8, grad_acc=1
model=EleutherAI/gpt-neo-1.3B: batch_size=256, device_bs=32, num_gpus=8, grad_acc=1
model=EleutherAI/gpt-neo-1.3B: batch_size=256, device_bs=32, num_gpus=8, grad_acc=1
model=EleutherAI/gpt-neo-1.3B: batch_size=256, device_bs=32, num_gpus=8, grad_acc=1
model=EleutherAI/gpt-neo-1.3B: batch_size=256, device_bs=32, num_gpus=8, grad_acc=1
model=EleutherAI/gpt-neo-1.3B: batch_size=512, device_bs=32, num_gpus=8, grad_acc=2
model=EleutherAI/gpt-neo-1.3B: batch_size=512, device_bs=32, num_gpus=8, grad_acc=2
model=EleutherAI/gpt-neo-1.3B: batch_size=512, device_bs=32, num_gpus=8, gra

In [22]:
get_experiment_duration("data/models/tests/EleutherAI/")

2022-07-05 01:18:42.585866: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


Timedelta('0 days 05:02:03.846773')

In [8]:
deployer.stop_all()

cancelled, shutting down
ds terminated


## TODO:
***
* Try deepspeed autotune for throughput
* delete papermill'ed files once process terminates

* figure out, what our main argument is

In [32]:
notebook_path = "train_action_classification_judgments_only.ipynb"


pgrid = make_param_grid(["roberta-large"], [32, 64, 128], [1e-5, 3e-5, 5e-5])
for model_name, batch_size, lr in pgrid:

    # find suitable device placement parameters
    max_batch_size = max_batch_sizes.get(model_name, batch_size)
    min_gpus = 1 if not "EleutherAI" in model_name else 2
    device_bs, num_gpus, grad_acc = get_device_placement(batch_size, max_batch_size, max_gpus, min_gpus)

    print(f"model={model_name}: batch_size={batch_size}, device_bs={device_bs}, num_gpus={num_gpus}, grad_acc={grad_acc}")
    training_args = {
        "gradient_accumulation_steps": grad_acc,
        "per_device_train_batch_size": device_bs,
        "per_device_eval_batch_size": device_bs,
        "learning_rate": lr,
        "load_best_model_at_end": True,
    }
    logdir = f"data/models/anti_ms_judgments_only/{model_name}/bs{batch_size}_lr_{str(lr).replace('.','_')}/"
    
    deployer.enqueue(notebook=notebook_path, backend="deepspeed", training_args=training_args, deepspeed=True,
                     num_gpus=num_gpus, logdir=logdir, model_name=model_name, action_only=False)

model=roberta-large: batch_size=32, device_bs=32, num_gpus=1, grad_acc=1
Running training and evaluation in the same process might cause cuda OOMs!
model=roberta-large: batch_size=32, device_bs=32, num_gpus=1, grad_acc=1
Running training and evaluation in the same process might cause cuda OOMs!
model=roberta-large: batch_size=32, device_bs=32, num_gpus=1, grad_acc=1
Running training and evaluation in the same process might cause cuda OOMs!
model=roberta-large: batch_size=64, device_bs=64, num_gpus=1, grad_acc=1
Running training and evaluation in the same process might cause cuda OOMs!
model=roberta-large: batch_size=64, device_bs=64, num_gpus=1, grad_acc=1
Running training and evaluation in the same process might cause cuda OOMs!
model=roberta-large: batch_size=64, device_bs=64, num_gpus=1, grad_acc=1
Running training and evaluation in the same process might cause cuda OOMs!
model=roberta-large: batch_size=128, device_bs=128, num_gpus=1, grad_acc=1
Running training and evaluation in th

In [31]:
notebook_path = "train_action_classification_judgments_only.ipynb"


pgrid = make_param_grid(["roberta-large"], [32, 64, 128], [1e-5, 3e-5, 5e-5])
for model_name, batch_size, lr in pgrid:

    # find suitable device placement parameters
    max_batch_size = max_batch_sizes.get(model_name, batch_size)
    min_gpus = 1 if not "EleutherAI" in model_name else 2
    device_bs, num_gpus, grad_acc = get_device_placement(batch_size, max_batch_size, max_gpus, min_gpus)

    print(f"model={model_name}: batch_size={batch_size}, device_bs={device_bs}, num_gpus={num_gpus}, grad_acc={grad_acc}")
    training_args = {
        "gradient_accumulation_steps": grad_acc,
        "per_device_train_batch_size": device_bs,
        "per_device_eval_batch_size": device_bs,
        "learning_rate": lr,
        "load_best_model_at_end": True,
    }
    logdir = f"data/models/anti_ms_actions_only/{model_name}/bs{batch_size}_lr_{str(lr).replace('.','_')}/"
    
    deployer.enqueue(notebook=notebook_path, backend="deepspeed", training_args=training_args, deepspeed=True,
                     num_gpus=num_gpus, logdir=logdir, model_name=model_name, action_only=True)

model=roberta-large: batch_size=32, device_bs=32, num_gpus=1, grad_acc=1
Running training and evaluation in the same process might cause cuda OOMs!
model=roberta-large: batch_size=32, device_bs=32, num_gpus=1, grad_acc=1
Running training and evaluation in the same process might cause cuda OOMs!
model=roberta-large: batch_size=32, device_bs=32, num_gpus=1, grad_acc=1
Running training and evaluation in the same process might cause cuda OOMs!
model=roberta-large: batch_size=64, device_bs=64, num_gpus=1, grad_acc=1
Running training and evaluation in the same process might cause cuda OOMs!
model=roberta-large: batch_size=64, device_bs=64, num_gpus=1, grad_acc=1
Running training and evaluation in the same process might cause cuda OOMs!
model=roberta-large: batch_size=64, device_bs=64, num_gpus=1, grad_acc=1
Running training and evaluation in the same process might cause cuda OOMs!
model=roberta-large: batch_size=128, device_bs=128, num_gpus=1, grad_acc=1
Running training and evaluation in th

In [33]:
logs = load_logs_from_dir("data/models/anti_ms_judgments_only/")
agg_logs(logs, {"test/train_accuracy": "max", "test/dev_accuracy":"max", "test/test_accuracy":"max"})

Unnamed: 0,test/train_accuracy,test/dev_accuracy,test/test_accuracy
bert-base-uncased/bs32_lr_5e-05,0.993895,0.796482,0.7835
bert-base-uncased/bs32_lr_1e-05,0.91092,0.795477,0.7805
bert-base-uncased/bs128_lr_1e-05,0.854269,0.776382,0.76
bert-base-uncased/bs128_lr_3e-05,0.934191,0.796985,0.7905
bert-base-uncased/bs32_lr_3e-05,0.986888,0.80201,0.7935
bert-base-uncased/bs64_lr_3e-05,0.961015,0.796482,0.789
bert-base-uncased/bs64_lr_5e-05,0.979882,0.80804,0.7885
bert-base-uncased/bs128_lr_5e-05,0.970423,0.797487,0.7885
bert-base-uncased/bs64_lr_1e-05,0.874337,0.788442,0.775
roberta-large/bs32_lr_5e-05,0.5,0.5,0.5


In [34]:
logs = load_logs_from_dir("data/models/anti_ms_actions_only/")
agg_logs(logs, {"test/train_accuracy": "max", "test/dev_accuracy":"max", "test/test_accuracy":"max"})

Unnamed: 0,test/train_accuracy,test/dev_accuracy,test/test_accuracy
bert-base-uncased/bs32_lr_5e-05,0.98929,0.799497,0.7805
bert-base-uncased/bs32_lr_1e-05,0.902362,0.794975,0.7815
bert-base-uncased/bs128_lr_1e-05,0.84401,0.775377,0.7695
bert-base-uncased/bs128_lr_3e-05,0.941397,0.802513,0.786
bert-base-uncased/bs32_lr_3e-05,0.97773,0.799497,0.7835
bert-base-uncased/bs64_lr_3e-05,0.959163,0.802513,0.784
bert-base-uncased/bs64_lr_5e-05,0.980733,0.80201,0.785
bert-base-uncased/bs128_lr_5e-05,0.979181,0.801508,0.781
bert-base-uncased/bs64_lr_1e-05,0.879642,0.790452,0.7755
roberta-large/bs32_lr_5e-05,0.5,0.5,0.5


In [26]:
deployer.stop_all()

cancelled, shutting down
cancelled, shutting down
cancelled, shutting down
cancelled, shutting down
cancelled, shutting down
cancelled, shutting down
cancelled, shutting down
cancelled, shutting down
cancelled, shutting down
ds terminated
ds terminated
ds terminated
ds terminated
ds terminated
ds terminated
ds terminated
ds terminated
ds terminated
