# Training

This notebook serves as a walkthrough for training with trapper package.

In [1]:
# Setting up the correct path

from copy import deepcopy
import os
import json
from typing import Any, Dict, List, Tuple, Union
import warnings

from jury import Jury
import requests
from tqdm import tqdm

from trapper.training.train import run_experiment



Set logging configuration.

In [2]:
import logging
import sys

# You can customize your logger below.
logging.basicConfig(format='%(asctime)s | %(levelname)s : %(message)s',
                     level=logging.INFO, stream=sys.stdout)

In [3]:
# Define constants
EXPERIMENT_NAME = "roberta-base-training-example"

WORKING_DIR = os.getcwd()
PROJECT_ROOT = os.path.dirname(os.path.dirname(WORKING_DIR))
EXPERIMENTS_DIR = os.path.join(WORKING_DIR, "experiments")

## Helper functions

Some useful helper functions to ease training.

In [4]:
def get_dir_from_task(path: str, task: str):
    task = "unnamed-task" if task is None else task
    return path.format(task=task)

def start_experiment(config: str, task: str, ext_vars: Dict[str, str]):
    result = run_experiment(
        config_path=config,
        ext_vars=ext_vars,
    )

    print("Training complete.")
    return result

In [5]:
TASK = "question-answering"
TASK_DIR = get_dir_from_task(os.path.join(EXPERIMENTS_DIR, "{task}"), task=TASK)
DATASET_DIR = os.path.join(TASK_DIR, "datasets")
EXPERIMENT_DIR = os.path.join(TASK_DIR, EXPERIMENT_NAME)
MODEL_DIR = os.path.join(EXPERIMENT_DIR, "model")
CHECKPOINT_DIR = os.path.join(EXPERIMENT_DIR, "checkpoints")
OUTPUT_DIR = os.path.join(EXPERIMENT_DIR, "outputs")

In [6]:
ext_vars = {
    # Used to feed the jsonnet config file with file paths
    "OUTPUT_PATH": OUTPUT_DIR,
    "CHECKPOINT_PATH": CHECKPOINT_DIR
}

CONFIG_PATH = os.path.join(TASK_DIR, "experiment.jsonnet")  # default experiment params

In [7]:
result = start_experiment(
    config=CONFIG_PATH,
    task=TASK,
    ext_vars=ext_vars,
)

2021-11-02 13:40:55,492 | INFO : type = default
2021-11-02 13:40:55,492 | INFO : pretrained_model_name_or_path = roberta-base
2021-11-02 13:40:55,493 | INFO : train_split_name = train
2021-11-02 13:40:55,493 | INFO : dev_split_name = validation
2021-11-02 13:40:55,494 | INFO : label_mapper = None
2021-11-02 13:40:55,494 | INFO : compute_metrics = None
2021-11-02 13:40:55,494 | INFO : no_grad = None
2021-11-02 13:40:55,494 | INFO : args.type = default
2021-11-02 13:40:55,495 | INFO : args.output_dir = /home/devrimcavusoglu/lab/gh/trapper/examples/question_answering/experiments/question-answering/roberta-base-training-example/checkpoints
2021-11-02 13:40:55,495 | INFO : args.overwrite_output_dir = False
2021-11-02 13:40:55,495 | INFO : args.do_train = True
2021-11-02 13:40:55,496 | INFO : args.do_eval = True
2021-11-02 13:40:55,496 | INFO : args.do_predict = False
2021-11-02 13:40:55,496 | INFO : args.evaluation_strategy = steps
2021-11-02 13:40:55,496 | INFO : args.prediction_loss_only 

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForQuestionAnswering: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForQuestionAnswering were not initialized from the model checkpoint at roberta-base and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use 

2021-11-02 13:40:59,488 | INFO : tokenizer_wrapper.type = question-answering
2021-11-02 13:40:59,489 | INFO : tokenizer_wrapper.pretrained_tokenizer = None
2021-11-02 13:41:05,416 | INFO : optimizer.type = huggingface_adamw
2021-11-02 13:41:05,417 | INFO : optimizer.lr = 5e-05
2021-11-02 13:41:05,417 | INFO : optimizer.betas = (0.9, 0.999)
2021-11-02 13:41:05,418 | INFO : optimizer.eps = 1e-06
2021-11-02 13:41:05,418 | INFO : optimizer.weight_decay = 0.01
2021-11-02 13:41:05,418 | INFO : optimizer.correct_bias = True
2021-11-02 13:41:05,419 | INFO : Done constructing parameter groups.
2021-11-02 13:41:05,419 | INFO : Group 0: ['roberta.encoder.layer.3.attention.output.dense.bias', 'roberta.encoder.layer.11.attention.output.LayerNorm.bias', 'roberta.encoder.layer.8.attention.self.key.bias', 'roberta.encoder.layer.1.attention.self.query.bias', 'roberta.encoder.layer.11.output.dense.bias', 'roberta.encoder.layer.1.intermediate.dense.bias', 'roberta.encoder.layer.4.attention.self.key.bias'

2021-11-02 13:41:05,421 | INFO : Number of trainable parameters: 124057346
2021-11-02 13:41:05,421 | INFO : dataset_loader.type = default
2021-11-02 13:41:05,422 | INFO : dataset_loader.dataset_reader.type = default
2021-11-02 13:41:05,424 | INFO : dataset_loader.dataset_reader.path = ../../test_fixtures/hf_datasets/squad_qa_test_fixture
2021-11-02 13:41:05,424 | INFO : dataset_loader.dataset_reader.name = None
2021-11-02 13:41:05,424 | INFO : dataset_loader.dataset_reader.data_dir = None
2021-11-02 13:41:05,425 | INFO : dataset_loader.dataset_reader.data_files = None
2021-11-02 13:41:05,425 | INFO : dataset_loader.dataset_reader.split = None
2021-11-02 13:41:05,426 | INFO : dataset_loader.dataset_reader.cache_dir = None
2021-11-02 13:41:05,426 | INFO : dataset_loader.dataset_reader.features = None
2021-11-02 13:41:05,426 | INFO : dataset_loader.dataset_reader.download_config = None
2021-11-02 13:41:05,427 | INFO : dataset_loader.dataset_reader.download_mode = None
2021-11-02 13:41:05,

[INFO|trainer.py:1013] 2021-11-02 13:41:07,802 >> ***** Running training *****
[INFO|trainer.py:1014] 2021-11-02 13:41:07,803 >>   Num examples = 5
[INFO|trainer.py:1015] 2021-11-02 13:41:07,803 >>   Num Epochs = 10
[INFO|trainer.py:1016] 2021-11-02 13:41:07,803 >>   Instantaneous batch size per device = 2
[INFO|trainer.py:1017] 2021-11-02 13:41:07,804 >>   Total train batch size (w. parallel, distributed & accumulation) = 24
[INFO|trainer.py:1018] 2021-11-02 13:41:07,804 >>   Gradient Accumulation steps = 12
[INFO|trainer.py:1019] 2021-11-02 13:41:07,804 >>   Total optimization steps = 10


Step,Training Loss,Validation Loss


[INFO|trainer.py:1196] 2021-11-02 13:41:10,046 >> 

Training completed. Do not forget to share your model on huggingface.co/models =)


[INFO|trainer.py:1648] 2021-11-02 13:41:10,144 >> Saving model checkpoint to /home/devrimcavusoglu/lab/gh/trapper/examples/question_answering/experiments/question-answering/roberta-base-training-example/outputs
[INFO|configuration_utils.py:329] 2021-11-02 13:41:10,144 >> Configuration saved in /home/devrimcavusoglu/lab/gh/trapper/examples/question_answering/experiments/question-answering/roberta-base-training-example/outputs/config.json
[INFO|modeling_utils.py:831] 2021-11-02 13:41:10,682 >> Model weights saved in /home/devrimcavusoglu/lab/gh/trapper/examples/question_answering/experiments/question-answering/roberta-base-training-example/outputs/pytorch_model.bin
[INFO|tokenization_utils_base.py:1901] 2021-11-02 13:41:10,683 >> tokenizer config file saved in /home/devrimcavusoglu/lab/gh/trapper/examples/question_answering/experiments/question-answering

2021-11-02 13:41:10,733 | INFO : ***** Train results *****
2021-11-02 13:41:10,733 | INFO :   epoch = 10.0
2021-11-02 13:41:10,734 | INFO :   init_mem_cpu_alloc_delta = 2296905728
2021-11-02 13:41:10,734 | INFO :   init_mem_cpu_peaked_delta = 380686336
2021-11-02 13:41:10,734 | INFO :   init_mem_gpu_alloc_delta = 497524736
2021-11-02 13:41:10,734 | INFO :   init_mem_gpu_peaked_delta = 0
2021-11-02 13:41:10,734 | INFO :   total_flos = 6018021854460.0
2021-11-02 13:41:10,735 | INFO :   train_mem_cpu_alloc_delta = 11640832
2021-11-02 13:41:10,735 | INFO :   train_mem_cpu_peaked_delta = 0
2021-11-02 13:41:10,735 | INFO :   train_mem_gpu_alloc_delta = 1496595968
2021-11-02 13:41:10,735 | INFO :   train_mem_gpu_peaked_delta = 419147776
2021-11-02 13:41:10,736 | INFO :   train_runtime = 2.2421
2021-11-02 13:41:10,736 | INFO :   train_samples_per_second = 4.46
2021-11-02 13:41:10,737 | INFO : *** Evaluate ***


[INFO|trainer.py:1865] 2021-11-02 13:41:10,805 >> ***** Running Evaluation *****
[INFO|trainer.py:1866] 2021-11-02 13:41:10,806 >>   Num examples = 6
[INFO|trainer.py:1867] 2021-11-02 13:41:10,806 >>   Batch size = 2


2021-11-02 13:41:10,955 | INFO : ***** Eval results *****
2021-11-02 13:41:10,955 | INFO :   epoch = 10.0
2021-11-02 13:41:10,955 | INFO :   eval_loss = 5.149980545043945
2021-11-02 13:41:10,956 | INFO :   eval_mem_cpu_alloc_delta = 0
2021-11-02 13:41:10,956 | INFO :   eval_mem_cpu_peaked_delta = 0
2021-11-02 13:41:10,956 | INFO :   eval_mem_gpu_alloc_delta = 0
2021-11-02 13:41:10,957 | INFO :   eval_mem_gpu_peaked_delta = 16803328
2021-11-02 13:41:10,957 | INFO :   eval_runtime = 0.0757
2021-11-02 13:41:10,957 | INFO :   eval_samples_per_second = 79.209
Training complete.


In [8]:
result

{'eval_loss': 5.149980545043945,
 'eval_runtime': 0.0757,
 'eval_samples_per_second': 79.209,
 'epoch': 10.0,
 'eval_mem_cpu_alloc_delta': 0,
 'eval_mem_gpu_alloc_delta': 0,
 'eval_mem_cpu_peaked_delta': 0,
 'eval_mem_gpu_peaked_delta': 16803328}

# Inference

In this section, usage of pipeline for inference is illustrated.

In [7]:
from trapper.pipelines.question_answering_pipeline import SquadQuestionAnsweringPipeline
from trapper.pipelines.pipeline import create_pipeline_from_checkpoint

## Helper Functions

Some helper functions for inference steps.

In [8]:
def save_json(samples: List[Dict], path: str):
    with open(path, "w") as jf:
        json.dump(samples, jf)


def load_json(path: str):
    with open(path, "r") as jf:
        return json.load(jf)


def prepare_samples(data: Union[str, Dict]):
    if isinstance(data, str):
        data = load_json(data)
    data = data["data"]
    qa_samples = []

    for article in data:
        for paragraph in article["paragraphs"]:
            for qa in paragraph["qas"]:
                sample = {}
                sample["context"] = paragraph["context"]
                sample["question"] = qa["question"]
                sample["gold_answers"] = [ans["text"] for ans in qa["answers"]]
                qa_samples.append(sample)

    return qa_samples


def prepare_samples_for_pipeline(samples: List[Dict]):
    pipeline_samples = deepcopy(samples)
    for i, sample in enumerate(pipeline_samples):
        sample.pop("gold_answers")
        if "id" not in sample:
            sample["id"] = str(i)
    return pipeline_samples


def predict(pipeline, samples, **kwargs):
    pipeline_samples = prepare_samples_for_pipeline(samples)
    predictions = pipeline(pipeline_samples, **kwargs)
    for i, prediction in enumerate(predictions):
        samples[i]["predicted_answer"] = prediction[0]["answer"].text
    return samples

In [9]:
SQUAD_DEV = os.path.join(PROJECT_ROOT, "test_fixtures/data/question_answering/squad_qa/dev.json")
EXPORT_PATH = os.path.join(WORKING_DIR, "qa-outputs.json")

PRETRAINED_MODEL_PATH = OUTPUT_DIR
EXPERIMENT_CONFIG = os.path.join(PRETRAINED_MODEL_PATH, "experiment_config.json")

In [10]:
qa_pipeline = create_pipeline_from_checkpoint(
    checkpoint_path=PRETRAINED_MODEL_PATH,
    experiment_config_path=EXPERIMENT_CONFIG,
    task="squad-question-answering",
    device=0
)

2021-11-02 16:43:42,899 | INFO : type = question_answering
2021-11-02 16:43:42,899 | INFO : pretrained_model = None
2021-11-02 16:43:42,900 | INFO : pretrained_model_name_or_path = /home/devrimcavusoglu/lab/gh/trapper/examples/question_answering/experiments/question-answering/roberta-base-training-example/outputs
2021-11-02 16:43:45,201 | INFO : type = question-answering
2021-11-02 16:43:45,202 | INFO : pretrained_tokenizer = None
2021-11-02 16:43:45,202 | INFO : pretrained_model_name_or_path = /home/devrimcavusoglu/lab/gh/trapper/examples/question_answering/experiments/question-answering/roberta-base-training-example/outputs


In [11]:
samples = prepare_samples(SQUAD_DEV)

In [13]:
predictions = predict(qa_pipeline, samples)

100%|█████████████████████████████████████████████| 6/6 [00:00<00:00, 42.95it/s]


In [None]:
save_json(predictions, EXPORT_PATH)

In [14]:
references = [sample["gold_answers"] for sample in predictions]
hypotheses = [sample["predicted_answer"] for sample in predictions]

In [18]:
jury = Jury(metrics="squad")

In [19]:
jury.evaluate(references=references, predictions=hypotheses)

{'empty_predictions': 0,
 'total_items': 6,
 'squad': {'exact_match': 0.0, 'f1': 0.0}}