https://huggingface.co/transformers/training.html

-----

In [None]:
from transformers import AdamW
optimizer = AdamW(model.parameters(), lr=1e-5)

In [None]:
no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]
optimizer = AdamW(optimizer_grouped_parameters, lr=1e-5)

In [None]:
from transformers import get_linear_schedule_with_warmup
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps, num_train_steps)

In [None]:
for param in model.base_model.parameters():
    param.requires_grad = False

-----
new

In [3]:
import os
from pathlib import Path
import logging
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from time import time
from datetime import date
import argparse
import matplotlib.pyplot as plt
from transformers import BertForSequenceClassification, Trainer, TrainingArguments, BertTokenizer

from modules.data import ImportData
from modules.models import SiameseBERT
from modules.utils import collate_fn_bert, setup_logger, compute_metrics, get_quora_huggingface


today = str(date.today())
path = Path(f'./logs/train_job_{today}/')
emb_path = Path('./logs/embeddings')
data_path = Path('./logs/data')

parser = argparse.ArgumentParser()
parser.add_argument("-model_name", "--model_name", type=str, help="Name of trained model. Needed only for correct logs output", default='bert')  
parser.add_argument("-log", "--logdir", type=str, help="Directory to save all downloaded files, and model checkpoints.", default=path)  
parser.add_argument("-df", "--data_file", type=str, help="Path to dataset.", default=data_path/"dataset.csv")
parser.add_argument("-s", "--split_seed", type=int, help="Seed for splitting the dataset.", default=44)
parser.add_argument("-b", "--batch_size", type=int, help="Batch Size.", default=8)
parser.add_argument("-epo", "--n_epoch", type=int, help="Number of epochs.", default=4)
parser.add_argument("-bert_cls", "--bert_cls", type=str, help="Type of BERT trained (classificator, siamese).", default='classifier')
parser.add_argument("-bert_backbone", "--bert_backbone", type=str, help="Either path to the model, or name of the BERT model that should be used, compatible with HuggingFace Transformers.", default='bert-base-uncased')

args = parser.parse_args('')
args.logdir = args.logdir/args.model_name
model_path = args.logdir/'best_model/'
if not args.logdir.exists():
    os.makedirs(args.logdir)

logger = setup_logger(str(args.logdir/'logs.log'))
logger.info("Begining job. All files and logs will be saved at: {}".format(args.logdir))


logger.info('Reading Dataset and splitting into train and test datasets with seed: {}'.format(args.split_seed))
data = ImportData(str(args.data_file))
data.train_test_split(seed=args.split_seed)


logger.info('')
logger.info('Number of training samples        :{}'.format(len(data.train)))
logger.info('Number of validation samples      :{}'.format(len(data.test)))
logger.info('')

2020-09-21 23:04:20,620 | Begining job. All files and logs will be saved at: logs\train_job_2020-09-21\bert
2020-09-21 23:04:20,621 | Reading Dataset and splitting into train and test datasets with seed: 44


  return f(*args, **kwds)


2020-09-21 23:04:21,515 | 
2020-09-21 23:04:21,516 | Number of training samples        :364287
2020-09-21 23:04:21,517 | Number of validation samples      :40000
2020-09-21 23:04:21,518 | 


In [11]:
model = BertForSequenceClassification.from_pretrained("bert-base-uncased")
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

training_args = TrainingArguments(
    output_dir=str(args.logdir/'results'),          # output directory
    overwrite_output_dir = True,
    evaluate_during_training = True,
    logging_first_step = True,
    num_train_epochs=3,              # total # of training epochs
    per_device_train_batch_size=8,  # batch size per device during training
    per_device_eval_batch_size=64,   # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01, # strength of weight decay
    logging_dir=str(args.logdir/'logs'),            # directory for storing logs
)

trainer = Trainer(
    model=model,                         # the instantiated 🤗 Transformers model to be trained
    args=training_args,  # training arguments, defined above
    data_collator=lambda x: collate_fn_classifier_bert(x, tokenizer, args.bert_cls), 
    train_dataset=data.train.values,         # training dataset
    eval_dataset=data.test.values,
    compute_metrics = compute_metrics# evaluation dataset
)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [69]:
trainer.train()

----------
old

In [None]:
import os
from pathlib import Path
import logging
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from time import time
from datetime import date
import argparse
import matplotlib.pyplot as plt

from modules.data import ImportData, QuoraQuestionDataset
from modules.embeddings import EmbeddedVocab
from modules.models import SiameseBERT, ClassifierBERT
from modules.utils import collate_fn_bert, train_bert, eval_bert, setup_logger


today = str(date.today())
path = Path(f'./logs/train_job_{today}/')
emb_path = Path('./logs/embeddings')
data_path = Path('./logs/data')

parser = argparse.ArgumentParser()
parser.add_argument("-model_name", "--model_name", type=str, help="Name of trained model. Needed only for correct logs output", default='bert')  
parser.add_argument("-log", "--logdir", type=str, help="Directory to save all downloaded files, and model checkpoints.", default=path)  
parser.add_argument("-df", "--data_file", type=str, help="Path to dataset.", default=data_path/"dataset.csv")
parser.add_argument("-s", "--split_seed", type=int, help="Seed for splitting the dataset.", default=44)
parser.add_argument("-b", "--batch_size", type=int, help="Batch Size.", default=8)
parser.add_argument("-epo", "--n_epoch", type=int, help="Number of epochs.", default=4)
parser.add_argument("-bert_cls", "--bert_cls", type=str, help="Type of BERT trained (classificator, siamese).", default='classifier')
parser.add_argument("-bert_backbone", "--bert_backbone", type=str, help="Either path to the model, or name of the BERT model that should be used, compatible with HuggingFace Transformers.", default='bert-base-uncased')

args = parser.parse_args('')
args.logdir = args.logdir/args.model_name
model_path = args.logdir/'best_model/'
if not args.logdir.exists():
    os.makedirs(args.logdir)

logger = setup_logger(str(args.logdir/'logs.log'))
logger.info("Begining job. All files and logs will be saved at: {}".format(args.logdir))


logger.info('Reading Dataset and splitting into train and test datasets with seed: {}'.format(args.split_seed))
data = ImportData(str(args.data_file))
data.train_test_split(seed=args.split_seed)


logger.info('')
logger.info('Number of training samples        :{}'.format(len(data.train)))
logger.info('Number of validation samples      :{}'.format(len(data.test)))
logger.info('')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

train_dataloader = DataLoader(data.train.values, batch_size = args.batch_size, shuffle=True, collate_fn=collate_fn_bert)
test_dataloader = DataLoader(data.test.values, batch_size= args.batch_size, shuffle=False, collate_fn=collate_fn_bert)

model = SiameseBERT(args.bert_backbone, device) if args.bert_cls=='siamese' else ClassifierBERT(args.bert_backbone, device)
model = model.float()
model = nn.DataParallel(model)
model = model.to(device)

criterion = nn.MSELoss() if args.bert_cls=='siamese' else nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

logger.info('Building model.')
logger.info('--------------------------------------')
logger.info('Model Parameters:')
logger.info('Bert Backbone:               :{}'.format(args.bert_backbone))
logger.info('--------------------------------------')
logger.info('Training Parameters:')
logger.info('Device                       :{}'.format(str(device)))
logger.info('Optimizer                    :{}'.format(' Adam'))
logger.info('Loss function                :{}'.format('MSE' if args.bert_cls == 'siamese' else 'CE'))
logger.info('Batch Size                   :{}'.format(args.batch_size))
logger.info('Number of Epochs             :{}'.format(args.n_epoch))
logger.info('--------------------------------------')

start = time()
all_train_losses = []
all_test_losses = []
train_accuracies = []
test_accuracies = []
best_acc = 0.5
logger.info("Training the model...")
for epoch in range(args.n_epoch):
    epoch_time = time()
    epoch_iteration = 0
    epoch_loss=[]
    preds_train = []

    train_bert(model, optimizer, criterion, train_dataloader, device, epoch_loss, preds_train, epoch, logger)

    eval_loss = []
    preds_test = []
    eval_bert(model, criterion, test_dataloader, device, eval_loss, preds_test)

    train_loss = np.mean(epoch_loss)
    train_accuracy = np.sum(preds_train)/data.train.shape[0]
    test_loss = np.mean(eval_loss)
    test_accuracy = np.sum(preds_test)/data.test.shape[0]

    if test_accuracy>best_acc:
        if not model_path.exists():
            os.mkdir(model_path)
        logger.info('Saving best model at: {}'.format(str(model_path/'checkpoint.pth')))
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.module.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'test_loss': test_loss,
            'test_accuracy':test_accuracy
            }, str(model_path/'checkpoint.pth'))

    all_train_losses.append(train_loss)
    all_test_losses.append(test_loss)
    train_accuracies.append(train_accuracy)
    test_accuracies.append(test_accuracy)

    logger.info('Mean loss and accuracy of epoch {} - train: {}, {}, test: {}, {}. Calculation time: {} hours'.format(epoch, train_loss, round(train_accuracy, 4), test_loss, round(test_accuracy, 4), (time() - epoch_time)/3600))

logger.info("Model training finished in: {}".format(np.round((time()-start)/60, 3)))

plt.figure(figsize=(10,6))
plt.title(f'Train and test losses during training of {args.model_name} model')
plt.plot(list(range(len(all_train_losses))), all_train_losses, label='train')
plt.plot(list(range(len(all_test_losses))), all_test_losses, label='test')
plt.legend()
plt.grid(alpha=0.5)
plt.xlabel('epoch')
plt.ylabel('loss')
plt.savefig(args.logdir/'loss_plots.png')
plt.show()

plt.figure(figsize=(10,6))
plt.title(f'Train and test losses during training of {args.model_name} model')
plt.plot(list(range(len(train_accuracies))), train_accuracies, label='train')
plt.plot(list(range(len(test_accuracies))), test_accuracies, label='test')
plt.legend()
plt.grid(alpha=0.5)
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.savefig(args.logdir/'acc_plots.png')
plt.show()

-------------

do wczytywania pretrenowanego BERTa z poporzednich treningow:

In [None]:
    def save(self, output_path: str):
        self.bert.save_pretrained(output_path)
        self.tokenizer.save_pretrained(output_path)

        with open(os.path.join(output_path, 'sentence_bert_config.json'), 'w') as fOut:
            json.dump(self.get_config_dict(), fOut, indent=2)

    @staticmethod
    def load(input_path: str):
        with open(os.path.join(input_path, 'sentence_bert_config.json')) as fIn:
            config = json.load(fIn)
        return BERT(model_name_or_path=input_path, **config)