In [None]:
%load_ext autoreload
%autoreload 2
%load_ext line_profiler

In [None]:
## Imports and environment variables 
import os
import torch

from travis_attack.utils import set_seed, set_session_options, setup_logging, resume_wandb_run, display_all
from travis_attack.config import Config
from travis_attack.models import prepare_models, get_optimizer
from travis_attack.data import ProcessedDataset
from travis_attack.trainer import Trainer
from travis_attack.insights import (postprocess_df, create_and_log_wandb_postrun_plots, get_training_dfs)

In [None]:
import logging 
logger = logging.getLogger("run")

In [None]:
cfg = Config()
if cfg.dataset_name == "rotten_tomatoes": cfg = cfg.small_ds()
set_seed(cfg.seed)
set_session_options()
setup_logging(cfg, disable_other_loggers=False)
vm_tokenizer, vm_model, pp_tokenizer, pp_model, sts_model, cfg = prepare_models(cfg)
optimizer = get_optimizer(cfg, pp_model)
ds = ProcessedDataset(cfg, vm_tokenizer, vm_model, pp_tokenizer, sts_model, load_processed_from_file=True)

sentence_transformers.SentenceTransformer: INFO     Load pretrained SentenceTransformer: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
sentence_transformers.SentenceTransformer: INFO     Did not find folder sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
sentence_transformers.SentenceTransformer: INFO     Search model on server: http://sbert.net/models/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2.zip
sentence_transformers.SentenceTransformer: INFO     Load SentenceTransformer from folder: /home/tproth/.cache/torch/sentence_transformers/sbert.net_models_sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2
sentence_transformers.SentenceTransformer: INFO     Use pytorch device: cuda
travis_attack.data: INFO     Will load dataset rotten_tomatoes with use_small_ds set to True
travis_attack.data: INFO     Cache file found for processed dataset, so loading that dataset.


In [None]:
cfg.wandb['mode'] = 'online'
trainer = Trainer(cfg, vm_tokenizer, vm_model, pp_tokenizer, pp_model, sts_model, optimizer,
                  ds, initial_eval=False)
trainer.train()

[34m[1mwandb[0m: Currently logged in as: [33muts_nlp[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.11 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


Launching training on one GPU.


  0%|          | 0/15 [00:00<?, ?it/s]

travis_attack.trainer: INFO     Now on epoch 1 of 5


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/10 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

travis_attack.trainer: INFO     Now on epoch 2 of 5


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/10 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

travis_attack.trainer: INFO     Now on epoch 3 of 5


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/10 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

travis_attack.trainer: INFO     Now on epoch 4 of 5


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/10 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

travis_attack.trainer: INFO     Now on epoch 5 of 5


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/10 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
## TO RESUME RUN 
# cfg.run_id = '61zaljb2'
# cfg.run_name = "hearty-cherry-289"
# cfg.path_run = f"{cfg.path_checkpoints}{cfg.run_name}/"
# run = resume_wandb_run(cfg)


In [None]:
cfg.path_run

'../model_checkpoints/travis_attack/neapolitan-brownie-291/'

In [None]:
# run_id = '3014an7l'
# run_name = "neapolitan-brownie-291"
# cfg.path_run = f"{cfg.path_checkpoints}{run_name}/"

df_d = get_training_dfs(cfg.path_run, postprocessed=False)
for k, df in df_d.items(): 
    df_d[k] = postprocess_df(df, filter_idx=None, num_proc=4)
    df_d[k].to_pickle(f"{cfg.path_run}{k}_postprocessed.pkl")    
create_and_log_wandb_postrun_plots(df_d)
trainer.run.finish()

travis_attack.insights: INFO     Dataframes have shapes ['training_step: (140, 38)', 'train: (140, 25)', 'valid: (20, 25)', 'test: (4, 25)']
travis_attack.insights: INFO     Adding text metrics for column orig_l
travis_attack.insights: INFO     Adding text metrics for column pp_l
travis_attack.insights: INFO     Calculating metric differences between orig and pp
travis_attack.insights: INFO     Calculating text pair statistics for (orig, pp) unique pairs
travis_attack.insights: INFO     Adding text metrics for column orig_l
travis_attack.insights: INFO     Adding text metrics for column pp_l
travis_attack.insights: INFO     Calculating metric differences between orig and pp
travis_attack.insights: INFO     Calculating text pair statistics for (orig, pp) unique pairs
travis_attack.insights: INFO     Adding text metrics for column orig_l
travis_attack.insights: INFO     Adding text metrics for column pp_l
travis_attack.insights: INFO     Calculating metric differences between orig and pp

Error: You must call wandb.init() before wandb.log()

In [None]:
df_d