In [None]:
#default_exp config

In [None]:
%load_ext autoreload
%autoreload 2
%load_ext line_profiler

In [None]:
#export
from fastcore.test import test_fail
import torch
from pprint import pprint

# Config

Here we define hyperparameters and config variables. We store them all in a class `config`.

Design from https://github.com/cswinter/DeepCodeCraft/blob/master/hyper_params.py

In [None]:
#export
class Config: 
    def __init__(self): 
        """Set up default parameters"""
          
        ### Models and datasets
        # options for the pp_model 
        # 1. tuner007/pegasus_paraphrase
        # 2. tdopierre/ProtAugment-ParaphraseGenerator
        # 3. eugenesiow/bart-paraphrase
        self.pp_name = "eugenesiow/bart-paraphrase"
        self.vm_name = "textattack/distilbert-base-uncased-rotten-tomatoes"
        self.sts_name = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
        self.dataset_name = None
        
        ### Training hyperparameters
        self.seed = 420
        self.use_fp16 = True
        self.lr = 1e-5   
        self.normalise_rewards = False
        self.metrics = ['loss', 'pp_logp', 'reward', 'vm_score', "sts_score", 'label_flip']
        self.pin_memory = True
        self.zero_grad_with_none = False
        self.pad_token_embeddings = True
        self.embedding_padding_multiple = 8
        self.padding_multiple = 8 
        self.bucket_by_length = True
        self.shuffle_train = False
        self.remove_misclassified_examples = True


        ### Paraphrase parameters  
        self.pp = {
            "num_beams": 1, 
            "num_return_sequences": 1, 
            "num_beam_groups": 1, 
            "diversity_penalty": 0.,   # must be a float
            "temperature": 1.5,
            "length_penalty" : 1,
            "min_length" : 5,
        }
        
        ### Used for testing
        self.use_small_ds = False
        self.n_shards = None
        self.shard_contiguous = None
        
        ### Logging parameters
        self.save_model_while_training = False
        self.save_model_freq = 10
        
        ### W&B parameters
        self.wandb = dict(
            mode = "online",  # set to "disabled" to turn off wandb, "online" to enable it
            log_grads = False, 
            log_grads_freq = 1,  # no effect if wandb_log_grads is False
            plot_examples = False,
            n_examples_plot = 4,  # number of individual examples to plot curves for
            # log a table to wandb with the examples and rewards the model sees while training. Useful for debugging 
            # and seeing what is going on, but slows down training time. 
            log_training_step_table = True,  
            log_token_entropy=True,
            log_token_probabilities = True
        )
        
        
        ### Devices and GPU settings
        #### TODO: do you need this with accelerator? does this handle the post-processing analytics too?
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 
        #device = accelerator.device
        self.devicenum = torch.cuda.current_device() if self.device.type == 'cuda' else -1
        # When not using Accelerator
        #n_wkrs = 4 * torch.cuda.device_count()
        # When using Accelerator 
        self.n_wkrs = 0 
        
        
        ### These parameters don't do anything yet
        self.sampling_strategy = "greedy"  # doesn't do anything
        # This makes the reward function more visible
        # copy-paste this from reward function
        self.reward_strategy = "[-0.5 if sts < 0.5 else 0.5+v*sts for v,sts in zip(vm_scores, sts_scores)]" 
        self.n_layers_frozen = "2"  # counting from the back (doesn't do anything yet)
     
    
    def rotten_tomatoes_dataset(self): 
        """Adjust config for the rotten_tomatoes dataset."""
        self.dataset_name = "rotten_tomatoes"
        self.orig_cname = "text"
        self.label_cname = 'label'
        
        self.orig_max_length = 64
        self.pp['max_length'] = 64 
        self.batch_size_train = 32
        self.batch_size_eval = 128 
        self.accumulation_steps = 1
        self.n_train_epochs = 250
        self.eval_freq = 1 
        
        return self
    
    def simple_dataset(self): 
        """Adjust config for the simple dataset."""
        self.dataset_name = "simple_dataset"
        self.orig_cname = "text"
        self.label_cname = 'label'

        self.orig_max_length = 20
        self.pp['max_length'] = 20 
        self.batch_size_train = 2
        self.batch_size_eval = 4
        self.accumulation_steps = 1
        self.eval_freq = 10 
        self.n_train_epochs = 60
        return self
        
        
    def small_ds(self):
        """Adjust the config to use a small dataset (for testing purposes).
        Not possible when using the simple dataset. """
        if self.dataset_name == "simple_dataset": 
            raise Exception("Don't shard when using the simple dataset (no need)")
        self.use_small_ds = True  # for testing purposes 
        self.n_shards = 60 
        self.shard_contiguous = False
        return self

## Usage

### Basics 

Usage is pretty straightforward. First initialise the config object and then initialise the dataset by chaining. All hyperparameters and variables are stored as attributes in the class. 

Currently you can use the `simple` dataset: 

In [None]:
cfg = Config().simple_dataset()
print("Dataset name: ", cfg.dataset_name)
print("Number of train epochs: ", cfg.n_train_epochs)
print("Batch size for train?: ", cfg.batch_size_train)
print("Max paraphrase length?: ", cfg.pp['max_length'])

Dataset name:  simple_dataset
Number of train epochs:  60
Batch size for train?:  2
Max paraphrase length?:  20


or the `rotten_tomatoes` dataset

In [None]:
cfg = Config().rotten_tomatoes_dataset()
print("Dataset name: ", cfg.dataset_name)
print("Number of train epochs: ", cfg.n_train_epochs)
print("Batch size for train?: ", cfg.batch_size_train)
print("Max paraphrase length?: ", cfg.pp['max_length'])


Dataset name:  rotten_tomatoes
Number of train epochs:  250
Batch size for train?:  32
Max paraphrase length?:  64


You can use `vars(cfg)` to get all parameters as a dict: 

In [None]:
pprint(vars(cfg))

{'accumulation_steps': 1,
 'batch_size_eval': 128,
 'batch_size_train': 32,
 'bucket_by_length': True,
 'dataset_name': 'rotten_tomatoes',
 'device': device(type='cuda'),
 'devicenum': 0,
 'embedding_padding_multiple': 8,
 'eval_freq': 1,
 'label_cname': 'label',
 'lr': 1e-05,
 'metrics': ['loss',
             'pp_logp',
             'reward',
             'vm_score',
             'sts_score',
             'label_flip'],
 'n_layers_frozen': '2',
 'n_shards': None,
 'n_train_epochs': 250,
 'n_wkrs': 0,
 'normalise_rewards': False,
 'orig_cname': 'text',
 'orig_max_length': 64,
 'pad_token_embeddings': True,
 'padding_multiple': 8,
 'pin_memory': True,
 'pp': {'diversity_penalty': 0.0,
        'length_penalty': 1,
        'max_length': 64,
        'min_length': 5,
        'num_beam_groups': 1,
        'num_beams': 1,
        'num_return_sequences': 1,
        'temperature': 1.5},
 'pp_name': 'eugenesiow/bart-paraphrase',
 'remove_misclassified_examples': True,
 'reward_strategy': '[-0.5 

### Using a small dataset for testing

If you want to do testing on a small dataset you can chain on `use_small_ds()` to adjust the config accordingly.

In [None]:
cfg = Config().rotten_tomatoes_dataset().small_ds()
print("Dataset name: ", cfg.dataset_name)
print("Number of train epochs: ", cfg.n_train_epochs)
print("Batch size for train?: ", cfg.batch_size_train)
print("Max paraphrase length?: ", cfg.pp['max_length'])
print("Using small dataset?", cfg.use_small_ds)
print("How many shards?", cfg.n_shards)

Dataset name:  rotten_tomatoes
Number of train epochs:  250
Batch size for train?:  32
Max paraphrase length?:  64
Using small dataset? True
How many shards? 60


This functionality is disabled for the simple dataset because we only have 4 data points for each split. 

In [None]:
test_fail(Config().simple_dataset().small_ds)


## Export

In [None]:
#hide
from nbdev.export import notebook2script
notebook2script()

Converted 00_utils.ipynb.
Converted 03_config.ipynb.
Converted 07_models.ipynb.
Converted 10_data.ipynb.
Converted 20_trainer.ipynb.
Converted 30_logging.ipynb.
Converted index.ipynb.
Converted run.ipynb.
