In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3,4,5,6,7" 

from time import time

import dataset
import engine
import torch
import pandas as pd
import torch.nn as nn
import numpy as np
from model import BERTBaseUncased
from sklearn import model_selection
from sklearn import metrics

import transformers
from transformers import AdamW
from transformers import get_linear_schedule_with_warmup

In [2]:
class Config():
    def __init__(self) -> None:
        self.DEVICE = "cuda:3"
        self.MAX_LEN = 268
        self.TRAIN_BATCH_SIZE = 8
        self.VALID_BATCH_SIZE = 4
        self.EPOCHS = 10


        # 训练参数
        self.eps_thres=1e-4 
        self.es_max=1  # early stop

        self.BERT_PATH = "bert-base-uncased"

        self.MODEL_PATH = "/home/18307110500/pj3_workplace/pytorch_model.bin"

        self.TRAINING_FILE = "/home/18307110500/data/train.data"
        
        self.VALIDATION_FILE = "/home/18307110500/data/valid.data"
        self.TEST_FILE = "/home/18307110500/data/test.data"

        self.TOKENIZER = transformers.BertTokenizer.from_pretrained(self.BERT_PATH, do_lower_case=True)
        
config = Config()

In [3]:
# train_set = dataset.DatasetLoader(config.TRAINING_FILE)
# valid_set = dataset.DatasetLoader(config.VALIDATION_FILE)
_, train_dir= dataset.read_data(config.TRAINING_FILE)
_, valid_dir= dataset.read_data(config.VALIDATION_FILE)

train_dataset = dataset.BERTDataset(train_dir['x'], train_dir['y'],config=config)
valid_dataset = dataset.BERTDataset(valid_dir['x'], valid_dir['y'],config=config)
valid_data_loader = valid_dataset.get_dataloader(batch_size=config.VALID_BATCH_SIZE)
train_data_loader = train_dataset.get_dataloader(batch_size=config.TRAIN_BATCH_SIZE)

device = torch.device(config.DEVICE)
model = BERTBaseUncased(config)
model.to(device)
print(model)
param_optimizer = list(model.named_parameters())

no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
optimizer_parameters = [
    {
        "params": [
            p for n, p in param_optimizer if not any(nd in n for nd in no_decay)
        ],
        "weight_decay": 0.001,
    },
    {
        "params": [
            p for n, p in param_optimizer if any(nd in n for nd in no_decay)
        ],
        "weight_decay": 0.0,
    },
]

num_train_steps = int(len(train_dir['x']) / config.TRAIN_BATCH_SIZE * config.EPOCHS)
optimizer = AdamW(optimizer_parameters, lr=3e-5)
scheduler = get_linear_schedule_with_warmup(
    optimizer, num_warmup_steps=0, num_training_steps=num_train_steps
)

# samples: 8596
# samples: 1000


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTBaseUncased(
  (bert): BertForMaskedLM(
    (bert): BertModel(
      (embeddings): BertEmbeddings(
        (word_embeddings): Embedding(30522, 768, padding_idx=0)
        (position_embeddings): Embedding(512, 768)
        (token_type_embeddings): Embedding(2, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): BertEncoder(
        (layer): ModuleList(
          (0): BertLayer(
            (attention): BertAttention(
              (self): BertSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768, bias=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): BertSelfOutput(
                (dense): Linear(in_features=768, out_features=768, bias=True)
          



In [4]:
# run
best_accuracy = 0
best_epoch = -1
early_stop_count = 0

ev_acc_his = []
tr_loss_his = []
tr_time_his=[]

In [5]:
model_name= f"{len(train_dataset)}_seq{config.MAX_LEN}-hidden768-best.pth"
for epoch in range(config.EPOCHS):
    tr_start = time()
    loss_tr_his =engine.train_fn(train_data_loader, model, optimizer, device, scheduler)
    tr_loss_his.append(np.average(loss_tr_his))
    tr_time_his.append(time()-tr_start)
    
    outputs, targets, loss_ev_his = engine.eval_fn(valid_data_loader, model, device)
    outputs = np.array(outputs) >= 0.5
    accuracy = metrics.accuracy_score(targets, outputs)
    print(f"epoch: {epoch} | Train Loss: {np.average(loss_tr_his)} | Eval Loss: {np.average(loss_ev_his)} | Acc: {accuracy}")

    ev_acc_his.append(accuracy)
    
    if accuracy > best_accuracy:
        if accuracy - best_accuracy > config.eps_thres:
           early_stop_count = 0 # reset
        torch.save(model, model_name)
        best_accuracy = accuracy
        best_epoch = epoch
        best_time_eval_loss = np.average(loss_ev_his)
        
    else: 
        early_stop_count+=1
        if early_stop_count >= config.es_max:
            print('\n[Warning] Early stopping model')
            print('  | Best | Epoch {:d} | Acc {:5.4f} |'
                    .format(best_epoch,  best_accuracy))
            break

100%|██████████| 1075/1075 [04:17<00:00,  4.17it/s]
100%|██████████| 250/250 [00:11<00:00, 22.59it/s]


epoch: 0 | Train Loss: 0.40738344192504883 | Eval Loss: 0.2814761996269226 | Acc: 0.878


100%|██████████| 1075/1075 [04:14<00:00,  4.22it/s]
100%|██████████| 250/250 [00:11<00:00, 22.67it/s]


epoch: 1 | Train Loss: 0.18211789429187775 | Eval Loss: 0.2680104970932007 | Acc: 0.904


100%|██████████| 1075/1075 [04:15<00:00,  4.21it/s]
100%|██████████| 250/250 [00:11<00:00, 22.52it/s]

epoch: 2 | Train Loss: 0.07945828139781952 | Eval Loss: 0.3511075973510742 | Acc: 0.883

  | Best | Epoch 1 | Acc 0.9040 |





In [6]:
# eval
model = torch.load(model_name)
accuracy = best_accuracy
print(f"Accuracy Score = {accuracy}")
best_name=model_name= f"{len(train_dataset)}_seq{config.MAX_LEN}-hidden768-{accuracy}.pth"
torch.save(model, best_name)
print(f"Finally saved as {best_name}")

Accuracy Score = 0.904
Finally saved as 8596_seq268-hidden768-0.904.pth


In [7]:
metric_rec = {
    'epo':[i+1 for i in range(len(ev_acc_his))],
    'eval acc': ev_acc_his,
    'train loss': tr_loss_his ,
    'epoch time(s)': tr_time_his
}
data_f = pd.DataFrame(metric_rec)
data_f

Unnamed: 0,epo,eval acc,train loss,epoch time(s)
0,1,0.878,0.407383,258.209504
1,2,0.904,0.182118,254.772021
2,3,0.883,0.079458,255.76119


In [8]:
eval_acc = best_accuracy
avg_epo_time= np.average(tr_time_his)
print("train set {} | best acc {} | epoch {} | {:.3f}s".format(len(train_dataset), eval_acc, config.EPOCHS,avg_epo_time))

train set 8596 | best acc 0.904 | epoch 10 | 256.248s
