In [None]:
%load_ext autoreload
%autoreload 2
%load_ext line_profiler

In [None]:
## Imports and environment variables 
import torch, wandb, os, pandas as pd 
from travis_attack.utils import set_seed, set_session_options, setup_logging, setup_parser, resume_wandb_run, display_all, print_important_cfg_vars
from travis_attack.config import Config
from travis_attack.models import prepare_models, get_optimizer
from travis_attack.data import ProcessedDataset
from travis_attack.trainer import Trainer
from travis_attack.insights import (postprocess_df, create_and_log_wandb_postrun_plots, get_training_dfs)
from fastcore.basics import in_jupyter

import logging 
logger = logging.getLogger("run")

import warnings
warnings.filterwarnings("ignore", message="Passing `max_length` to BeamSearchScorer is deprecated")  # we ignore the warning because it works anyway for diverse beam search 

In [None]:
from nbdev.export import notebook2script
notebook2script()

!jupyter nbconvert \
    --TagRemovePreprocessor.enabled=True \
    --TagRemovePreprocessor.remove_cell_tags="['hide']" \
    --TemplateExporter.exclude_markdown=True \
    --to python "run.ipynb"

Converted 00_utils.ipynb.
Converted 02_tests.ipynb.
Converted 03_config.ipynb.
Converted 07_models.ipynb.
Converted 10_data.ipynb.
Converted 20_trainer.ipynb.
Converted 25_insights.ipynb.
Converted Untitled.ipynb.
Converted baselines.ipynb.
Converted baselines_analysis.ipynb.
Converted index.ipynb.
Converted pp_eval_baselines.ipynb.
Converted run.ipynb.
Converted show_examples.ipynb.
Converted test_grammar_options.ipynb.
Converted test_pp_model.ipynb.
[NbConvertApp] Converting notebook run.ipynb to python


In [None]:
cfg = Config()  # default values
if not in_jupyter():  # override with any -- options when running with command line
    parser = setup_parser()
    newargs = vars(parser.parse_args())
    for k,v in newargs.items(): 
        if v is not None: 
            if k in cfg.gen_params_train.keys():  cfg.gen_params_train[k] = v
            else:                   setattr(cfg, k, v)
if cfg.use_small_ds:  cfg = cfg.small_ds()
set_seed(cfg.seed)
set_session_options()
setup_logging(cfg, disable_other_loggers=True)
vm_tokenizer,vm_model,pp_tokenizer,pp_model,ref_pp_model,sts_model,nli_tokenizer,nli_model,cola_tokenizer,cola_model,cfg = prepare_models(cfg)
optimizer = get_optimizer(cfg, pp_model)
ds = ProcessedDataset(cfg, vm_tokenizer, vm_model, pp_tokenizer, sts_model, load_processed_from_file=False)


travis_attack.data: INFO     Will load dataset simple with use_small_ds set to False
travis_attack.data: INFO     Will load dataset simple with use_small_ds set to False
travis_attack.data: INFO     Will load dataset simple with use_small_ds set to False


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, description='Batches', max=1.0, style=ProgressStyle(description_width=…





HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, description='Batches', max=1.0, style=ProgressStyle(description_width=…





HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, description='Batches', max=1.0, style=ProgressStyle(description_width=…





HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Flattening the indices', max=1.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Flattening the indices', max=1.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Flattening the indices', max=1.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Flattening the indices', max=1.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Flattening the indices', max=1.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Flattening the indices', max=1.0, style=ProgressStyle(des…




In [None]:
cfg.wandb['mode'] = 'disabled'
trainer = Trainer(cfg, vm_tokenizer,vm_model,pp_tokenizer,pp_model,ref_pp_model,sts_model,nli_tokenizer,nli_model,cola_tokenizer,cola_model, optimizer,
         ds, initial_eval=True)
print_important_cfg_vars(cfg)
trainer.train()

{'pp_name': 'prithivida/parrot_paraphraser_on_T5',
 'dataset_name': 'simple',
 'sts_name': 'sentence-transformers/paraphrase-MiniLM-L12-v2',
 'nli_name': 'howey/electra-small-mnli',
 'cola_name': 'textattack/albert-base-v2-CoLA',
 'vm_name': 'textattack/distilbert-base-uncased-rotten-tomatoes',
 'seed': 421,
 'use_small_ds': False,
 'lr': 4e-05,
 'reward_fn': 'reward_fn_contradiction_and_letter_diff',
 'reward_clip_max': 4,
 'reward_vm_multiplier': 12,
 'sts_threshold': 0.8,
 'acceptability_threshold': 0.5,
 'contradiction_threshold': 0.2,
 'pp_letter_diff_threshold': 30,
 'reward_penalty_type': 'kl_div',
 'kl_coef': 0.25,
 'ref_logp_coef': 0.05,
 'max_pp_length': 48,
 'n_eval_seq': 48,
 'decode_method_train': 'sample',
 'decode_method_eval': 'sample',
 'gen_params_train': {'min_length': 2,
                      'max_length': 48,
                      'do_sample': True,
                      'temperature': 1.1,
                      'top_p': 0.95,
                      'length_penalty'

HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

travis_attack.trainer: INFO     Launching initial eval run: train
travis_attack.trainer: INFO     Launching initial eval run: train
travis_attack.trainer: INFO     Launching initial eval run: train


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

travis_attack.trainer: INFO     Launching initial eval run: valid
travis_attack.trainer: INFO     Launching initial eval run: valid
travis_attack.trainer: INFO     Launching initial eval run: valid





HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

travis_attack.trainer: INFO     Epoch: 0. Min epochs before early stopping activated: 10
travis_attack.trainer: INFO     Epoch: 0. Min epochs before early stopping activated: 10
travis_attack.trainer: INFO     Epoch: 0. Min epochs before early stopping activated: 10
travis_attack.trainer: INFO     Eval metric: 0.000 | Running median: 0.000
travis_attack.trainer: INFO     Eval metric: 0.000 | Running median: 0.000
travis_attack.trainer: INFO     Eval metric: 0.000 | Running median: 0.000





travis_attack.trainer: INFO     Launching initial eval run: test
travis_attack.trainer: INFO     Launching initial eval run: test
travis_attack.trainer: INFO     Launching initial eval run: test


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

travis_attack.trainer: INFO     Now on epoch 1 of 6
travis_attack.trainer: INFO     Now on epoch 1 of 6
travis_attack.trainer: INFO     Now on epoch 1 of 6







HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

travis_attack.trainer: INFO     Epoch: 1. Min epochs before early stopping activated: 10
travis_attack.trainer: INFO     Epoch: 1. Min epochs before early stopping activated: 10
travis_attack.trainer: INFO     Epoch: 1. Min epochs before early stopping activated: 10
travis_attack.trainer: INFO     Eval metric: 0.000 | Running median: 0.000
travis_attack.trainer: INFO     Eval metric: 0.000 | Running median: 0.000
travis_attack.trainer: INFO     Eval metric: 0.000 | Running median: 0.000





travis_attack.trainer: INFO     Now on epoch 2 of 6
travis_attack.trainer: INFO     Now on epoch 2 of 6
travis_attack.trainer: INFO     Now on epoch 2 of 6


False False False False


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

travis_attack.trainer: INFO     Epoch: 2. Min epochs before early stopping activated: 10
travis_attack.trainer: INFO     Epoch: 2. Min epochs before early stopping activated: 10
travis_attack.trainer: INFO     Epoch: 2. Min epochs before early stopping activated: 10
travis_attack.trainer: INFO     Eval metric: 0.000 | Running median: 0.000
travis_attack.trainer: INFO     Eval metric: 0.000 | Running median: 0.000
travis_attack.trainer: INFO     Eval metric: 0.000 | Running median: 0.000





travis_attack.trainer: INFO     Now on epoch 3 of 6
travis_attack.trainer: INFO     Now on epoch 3 of 6
travis_attack.trainer: INFO     Now on epoch 3 of 6


False False False False


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

travis_attack.trainer: INFO     Epoch: 3. Min epochs before early stopping activated: 10
travis_attack.trainer: INFO     Epoch: 3. Min epochs before early stopping activated: 10
travis_attack.trainer: INFO     Epoch: 3. Min epochs before early stopping activated: 10
travis_attack.trainer: INFO     Eval metric: 0.250 | Running median: 0.000
travis_attack.trainer: INFO     Eval metric: 0.250 | Running median: 0.000
travis_attack.trainer: INFO     Eval metric: 0.250 | Running median: 0.000





travis_attack.trainer: INFO     Now on epoch 4 of 6
travis_attack.trainer: INFO     Now on epoch 4 of 6
travis_attack.trainer: INFO     Now on epoch 4 of 6


False False False False


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

travis_attack.trainer: INFO     Epoch: 4. Min epochs before early stopping activated: 10
travis_attack.trainer: INFO     Epoch: 4. Min epochs before early stopping activated: 10
travis_attack.trainer: INFO     Epoch: 4. Min epochs before early stopping activated: 10
travis_attack.trainer: INFO     Eval metric: 0.250 | Running median: 0.000
travis_attack.trainer: INFO     Eval metric: 0.250 | Running median: 0.000
travis_attack.trainer: INFO     Eval metric: 0.250 | Running median: 0.000





travis_attack.trainer: INFO     Now on epoch 5 of 6
travis_attack.trainer: INFO     Now on epoch 5 of 6
travis_attack.trainer: INFO     Now on epoch 5 of 6


False False False False


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

travis_attack.trainer: INFO     Epoch: 5. Min epochs before early stopping activated: 10
travis_attack.trainer: INFO     Epoch: 5. Min epochs before early stopping activated: 10
travis_attack.trainer: INFO     Epoch: 5. Min epochs before early stopping activated: 10
travis_attack.trainer: INFO     Eval metric: 0.000 | Running median: 0.000
travis_attack.trainer: INFO     Eval metric: 0.000 | Running median: 0.000
travis_attack.trainer: INFO     Eval metric: 0.000 | Running median: 0.000





travis_attack.trainer: INFO     Now on epoch 6 of 6
travis_attack.trainer: INFO     Now on epoch 6 of 6
travis_attack.trainer: INFO     Now on epoch 6 of 6


False False False False


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

travis_attack.trainer: INFO     Epoch: 6. Min epochs before early stopping activated: 10
travis_attack.trainer: INFO     Epoch: 6. Min epochs before early stopping activated: 10
travis_attack.trainer: INFO     Epoch: 6. Min epochs before early stopping activated: 10
travis_attack.trainer: INFO     Eval metric: 0.000 | Running median: 0.000
travis_attack.trainer: INFO     Eval metric: 0.000 | Running median: 0.000
travis_attack.trainer: INFO     Eval metric: 0.000 | Running median: 0.000





travis_attack.trainer: INFO     Evaluating test set with best model at path : ../model_checkpoints/travis_attack/dummy-36FPjQASsBexcLZydPyxTF/model_3.pt
travis_attack.trainer: INFO     Evaluating test set with best model at path : ../model_checkpoints/travis_attack/dummy-36FPjQASsBexcLZydPyxTF/model_3.pt
travis_attack.trainer: INFO     Evaluating test set with best model at path : ../model_checkpoints/travis_attack/dummy-36FPjQASsBexcLZydPyxTF/model_3.pt


True False False True


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




In [None]:
df_expanded = df_expanded.drop_duplicates(subset=df_expanded.columns.difference(['pp_idx', 'sts_scores']))


 df_grp_stats = df_expanded[['idx'] + eval_metric_cols].groupby('idx').agg(agg_metrics)
df_grp_stats.columns = df_grp_stats.columns = ["-".join(a) for a in df_grp_stats.columns.to_flat_index()]
df_grp_stats = df_grp_stats.merge(df_expanded.groupby('idx').size().rename('n_pp').to_frame(), how='left', left_index=True, right_index=True)




df1 = df_expanded[0:10]
df1.drop_duplicates(subset=df1.columns.difference(['pp_idx', 'sts_scores']))

In [None]:
%debug

> [0;32m/home/tproth/Programs/miniconda/envs/nlp_env/lib/python3.8/site-packages/pandas/core/indexes/base.py[0m(3623)[0;36mget_loc[0;34m()[0m
[0;32m   3621 [0;31m                [0;32mreturn[0m [0mself[0m[0;34m.[0m[0m_engine[0m[0;34m.[0m[0mget_loc[0m[0;34m([0m[0mcasted_key[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   3622 [0;31m            [0;32mexcept[0m [0mKeyError[0m [0;32mas[0m [0merr[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m-> 3623 [0;31m                [0;32mraise[0m [0mKeyError[0m[0;34m([0m[0mkey[0m[0;34m)[0m [0;32mfrom[0m [0merr[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   3624 [0;31m            [0;32mexcept[0m [0mTypeError[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   3625 [0;31m                [0;31m# If we have a listlike key, _check_indexing_error will raise[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m
ipdb> u 3 
> [0;32m/data/tproth/travis_attack/travis_attack/trainer.py[0m(115)[0;36m_tra

In [None]:
#df = pd.read_csv(cfg.path_results + "run_results.csv")
#display_all(df)

In [None]:
# df = pd.read_csv(f'{cfg.path_run}training_step.csv')
# #display_all(df.query('idx==1'))
# df.columns

In [None]:
#trainer.run.finish()

In [None]:
# # ## TO RESUME RUN
# cfg = Config()
# cfg.run_id = 'b9r88lmz'
# cfg.run_name = "fresh-cosmos-151"
# cfg.path_run = f"{cfg.path_checkpoints}{cfg.run_name}/"
# run = resume_wandb_run(cfg)


In [None]:
#cfg.path_run

In [None]:
df_d = get_training_dfs(cfg.path_run, postprocessed=False)
for k, df in df_d.items(): 
    df_d[k] = postprocess_df(df, filter_idx=None, num_proc=1)
    df_d[k].to_pickle(f"{cfg.path_run}{k}_postprocessed.pkl")    
create_and_log_wandb_postrun_plots(df_d)
trainer.run.finish()
#run.finish()

travis_attack.insights: INFO     Dataframes have shapes ['training_step: (2445, 52)', 'train: (83456, 21)', 'valid: (10240, 21)', 'test: (1344, 21)']
travis_attack.insights: INFO     Adding text metrics for column orig


HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…

travis_attack.insights: INFO     Adding text metrics for column pp





HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…

travis_attack.insights: INFO     Calculating metric differences between orig and pp
travis_attack.insights: INFO     Calculating text pair statistics for (orig, pp) unique pairs





HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…




travis_attack.insights: INFO     Adding text metrics for column orig


HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…

travis_attack.insights: INFO     Adding text metrics for column pp





HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…




travis_attack.insights: INFO     Calculating metric differences between orig and pp
travis_attack.insights: INFO     Calculating text pair statistics for (orig, pp) unique pairs


HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)






travis_attack.insights: INFO     Adding text metrics for column orig


HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…

travis_attack.insights: INFO     Adding text metrics for column pp





HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…




travis_attack.insights: INFO     Calculating metric differences between orig and pp
travis_attack.insights: INFO     Calculating text pair statistics for (orig, pp) unique pairs


HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…




travis_attack.insights: INFO     Adding text metrics for column orig


HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…

travis_attack.insights: INFO     Adding text metrics for column pp





HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…




travis_attack.insights: INFO     Calculating metric differences between orig and pp
travis_attack.insights: INFO     Calculating text pair statistics for (orig, pp) unique pairs


HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…




VBox(children=(Label(value=' 0.31MB of 0.31MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
%_of_tokens_above_prob_1/vocab_size,████████████▁███████▁▁███▁██████████▁███
%_of_tokens_above_prob_1e-05,████████████▁███████▁▁███▁██████████▁███
%_of_tokens_above_prob_1e-06,████████████▁███████▁▁███▁██████████▁███
%_of_tokens_above_prob_1e-07,████████████▁███████▁▁███▁██████████▁███
%_of_tokens_above_prob_1e-08,████████████▁███████▁▁███▁██████████▁███
%_of_tokens_above_prob_1e-09,████████████▁███████▁▁███▁██████████▁███
acc_batch_n_examples,██▁██▁████▁██▁█▁██▁██▁█▁██▁████▁██▁█▁██▁
acc_num,▁▁▁▁████▁▁▁████▁▁▁████▁▁▁████▁▁▁████▁▁▁█
acceptability_scores-mean-test,█▁
acceptability_scores-mean-train,██▇▇▆▄▄▅▄▄▃▃▂▁▃▁

0,1
%_of_tokens_above_prob_1/vocab_size,1.0
%_of_tokens_above_prob_1e-05,1.0
%_of_tokens_above_prob_1e-06,1.0
%_of_tokens_above_prob_1e-07,1.0
%_of_tokens_above_prob_1e-08,1.0
%_of_tokens_above_prob_1e-09,1.0
acc_batch_n_examples,35.0
acc_num,1.0
acceptability_scores-mean-test,0.67215
acceptability_scores-mean-train,0.61854
