In [1]:
import os
import sys

uppath = lambda _path, n: os.sep.join(_path.split(os.sep)[:-n])

cur_path = os.path.dirname(os.path.abspath("__file__"))
module_path = uppath(cur_path, 1)  # root_path
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
import logging
import os

In [3]:
from src.reranker import Reranker, RerankerDC
from src.reranker import RerankerTrainer, RerankerDCTrainer
from src.reranker.data import GroupedTrainDataset, PredictionDataset, GroupCollator
from src.reranker.arguments import ModelArguments, DataArguments, RerankerTrainingArguments as TrainingArguments

In [4]:
from transformers import AutoConfig, AutoTokenizer
from transformers import (
    HfArgumentParser,
    set_seed,
)
import torch

In [5]:
logger = logging.getLogger("__name__")

In [6]:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "7"

In [7]:
# device = torch.device("cuda:7")
# torch.cuda.set_device(7)

In [8]:
output_dir = os.path.join(module_path, 'DATA/output')
train_dir = os.path.join(module_path, 'DATA/train_data')
cache_dir = os.path.join(module_path, 'DATA/huggingface')
model_name = "bert-base-uncased"
max_len = 512
num_labels = 1

In [9]:
model_args = ModelArguments(
    model_name_or_path = model_name,
    config_name = model_name,
    tokenizer_name = model_name,
    cache_dir = cache_dir
)

In [10]:
data_args = DataArguments(
    train_dir = train_dir,
    max_len = 512
)

In [11]:
training_args = TrainingArguments(
    output_dir = output_dir,
    save_steps = 2000,
    do_train = True,
    per_device_train_batch_size = 1, 
    gradient_accumulation_steps = 1, 
    per_device_eval_batch_size = 64, 
    warmup_ratio = 0.1, 
    weight_decay = 0.01, 
    learning_rate = 1e-5, 
    num_train_epochs = 2,
    dataloader_num_workers = 8,
)

In [12]:
logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(message)s",
    datefmt="%m/%d/%Y %H:%M:%S",
    level=logging.INFO if training_args.local_rank in [-1, 0] else logging.WARN,
)

In [13]:
logger.warning(
    "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
    training_args.local_rank,
    training_args.device,
    training_args.n_gpu,
    bool(training_args.local_rank != -1),
    training_args.fp16,
)



In [14]:
# logger.info("Training/evaluation parameters %s", training_args)
# logger.info("Model parameters %s", model_args)
# logger.info("Data parameters %s", data_args)

In [14]:
# set seed
set_seed(training_args.seed)

In [15]:
config = AutoConfig.from_pretrained(
    model_name,
    num_labels=num_labels,
    cache_dir=cache_dir,
)

In [16]:
tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    cache_dir=cache_dir,
    use_fast=False,
)

In [17]:
model = Reranker.from_pretrained(
    model_args, 
    data_args, 
    training_args,
    model_name,
    from_tf=False,
    config=config,
    cache_dir=cache_dir,
)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [18]:
train_dataset = GroupedTrainDataset(data_args, data_args.train_path, tokenizer=tokenizer, train_args=training_args)

Resolving data files:   0%|          | 0/121 [00:00<?, ?it/s]



  0%|          | 0/1 [00:00<?, ?it/s]

In [19]:
trainer = RerankerTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    data_collator=GroupCollator(tokenizer),
)

In [None]:
trainer.train(
    model_path=model_args.model_name_or_path if os.path.isdir(model_args.model_name_or_path) else None
)
trainer.save_model()
# For convenience, we also re-save the tokenizer to the same directory,
# so that you can share your model easily on huggingface.co/models =)
if trainer.is_world_process_zero():
    tokenizer.save_pretrained(training_args.output_dir)



Step,Training Loss
500,2.0723
1000,2.0054
1500,1.9083
2000,1.8192
2500,1.7526
3000,1.6781
3500,1.6944
4000,1.591
4500,1.5681
5000,1.4262


03/24/2022 11:38:18 - INFO - Saving model checkpoint to /home/ubuntu/minung/Reranker/DATA/output/checkpoint-2000
03/24/2022 12:03:57 - INFO - Saving model checkpoint to /home/ubuntu/minung/Reranker/DATA/output/checkpoint-4000
03/24/2022 12:29:58 - INFO - Saving model checkpoint to /home/ubuntu/minung/Reranker/DATA/output/checkpoint-6000
03/24/2022 12:55:29 - INFO - Saving model checkpoint to /home/ubuntu/minung/Reranker/DATA/output/checkpoint-8000
03/24/2022 13:21:08 - INFO - Saving model checkpoint to /home/ubuntu/minung/Reranker/DATA/output/checkpoint-10000
03/24/2022 13:46:44 - INFO - Saving model checkpoint to /home/ubuntu/minung/Reranker/DATA/output/checkpoint-12000
03/24/2022 14:12:23 - INFO - Saving model checkpoint to /home/ubuntu/minung/Reranker/DATA/output/checkpoint-14000
03/24/2022 14:38:16 - INFO - Saving model checkpoint to /home/ubuntu/minung/Reranker/DATA/output/checkpoint-16000
03/24/2022 15:04:22 - INFO - Saving model checkpoint to /home/ubuntu/minung/Reranker/DATA/ou