In [1]:
import os
import sys
import pickle
import torch
import pandas as pd
import random
import numpy as np
import matplotlib.pyplot as plt

from torch import nn as nn
from torch.utils.data import DataLoader, Dataset
from transformers import AdamW


import warnings
warnings.filterwarnings('ignore')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class Config():
    use_gpu = True
    gpu_id = 0
    preprocess = True
    bert_path = '/root/pretrained/bert-base-chinese'
    data_path = '/root/data/public/ChnSentiCorp'

    # 预处理后存放文件位置
    out_path = 'data/out'
    max_len = 256
    batch_size = 8
    dropout = 0.3
    num_hidden = 768
    num_classes = 119
    train_batch_size = 2
    epoch = 1
    seed = 1234
    early_stopping_patience = 6
    model_name = 'bert'
    train_log = 10
    log_interval = 10
cfg = Config()
cfg.cwd = os.getcwd()

In [3]:
# use GPU
if cfg.use_gpu and torch.cuda.is_available():
    device = torch.device('cuda', cfg.gpu_id)
else:
    device = torch.device('cpu')

In [4]:
# 设置随机种子
def manual_seed(seed: int = 1) -> None:
    """
        设置seed。
    """
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    #if torch.cuda.CUDA_ENABLED and use_deterministic_cudnn:
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
manual_seed(1234)

In [5]:
# dataload 加载数据集
sys.path.append('/root/projects/nlp-code-examples')
from src.easy_bert.TextClassifiy import tools

train_data_path = os.path.join('/root/projects/nlp-code-examples/example/TextClassifiy/data/', 'train.pkl')
valid_data_path = os.path.join('/root/projects/nlp-code-examples/example/TextClassifiy/data/', 'dev.pkl')
test_data_path = os.path.join('/root/projects/nlp-code-examples/example/TextClassifiy/data/', 'dev.pkl')

train_dataset = tools.CustomDataset(train_data_path)
valid_dataset = tools.CustomDataset(valid_data_path)
test_dataset = tools.CustomDataset(test_data_path)

In [6]:
train_dataloader = DataLoader(train_dataset, batch_size=cfg.batch_size, shuffle=True, collate_fn=tools.collate_fn(cfg))
valid_dataloader = DataLoader(valid_dataset, batch_size=cfg.batch_size, shuffle=True, collate_fn=tools.collate_fn(cfg))
test_dataloader = DataLoader(test_dataset, batch_size=cfg.batch_size, shuffle=True, collate_fn=tools.collate_fn(cfg))

In [7]:
# 构建模型
import torch.nn as nn

from transformers import BertModel

class BasicBert(nn.Module):
    def __init__(self, cfg):
        super(BasicBert, self).__init__()
        self.bert = BertModel.from_pretrained(cfg.bert_path)
        self.bert_drop = nn.Dropout(cfg.dropout)
        self.out = nn.Linear(cfg.num_hidden, cfg.num_classes)

    def forward(self, x):
        _, o2 = self.bert(**x, return_dict=False)
        bo = self.bert_drop(o2)
        output = self.out(bo)
        
        return output

In [8]:
model = BasicBert(cfg)
model.to(device)
pass

Some weights of the model checkpoint at /root/pretrained/bert-base-chinese were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [9]:
param_optimizer = list(model.named_parameters())
no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] # 官方默认
optimizer_parameters = [
    {
        "params": [
            p for n, p in param_optimizer if not any(nd in n for nd in no_decay)
        ],
        "weight_decay": 0.001,
    },
    {
        "params": [
            p for n, p in param_optimizer if any(nd in n for nd in no_decay)
        ],
        "weight_decay": 0.0,
    },
]
optimizer = AdamW(optimizer_parameters, lr=3e-5)

In [10]:
from transformers import get_linear_schedule_with_warmup
num_train_steps = int(len(train_dataset) / cfg.train_batch_size * cfg.epoch)
scheduler = get_linear_schedule_with_warmup(
    optimizer, num_warmup_steps=0, num_training_steps=num_train_steps
)

criterion = nn.CrossEntropyLoss()

In [11]:
best_f1, best_epoch = -1, 0
es_loss, es_f1, es_epoch, es_patience, best_es_epoch, best_es_f1, es_path, best_es_path = 1e8, -1, 0, 0, 0, -1, '', ''
train_losses, valid_losses = [], []

In [16]:
losses = []
from sklearn.metrics import precision_recall_fscore_support
import torch.nn.functional as F

def trainer(dataloader, model, criterion, optimizer):
    for batch_idx, (x, y) in enumerate(dataloader, 1):
        for key, value in x.items():
            x[key] = value.to(device)
        y = y.to(device)
        optimizer.zero_grad() # 梯度清0
        y_pred = model(x)
        # loss = F.cross_entropy(y_pred, y.long())
        # loss = criterion(y_pred, y.long())
        loss.backward() # 反向传播
        optimizer.step() # 提督更新
        losses.append(loss.item())
        if batch_idx % 10 == 0:
            # print('loss:', loss.item())
            y_pred = torch.max(y_pred, 1)[1].cpu().numpy()
            y_true = y.cpu().detach().numpy()
            print(y_pred)
            print(y_true)
            p, r, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='macro', warn_for=tuple())
            # print(f"""p:{p}, r:{r}, f1:{f1}""")
    plt.plot(losses)
    plt.title(f'epoch {1} train loss')
    plt.show()

In [None]:
trainer(train_dataloader, model, criterion, optimizer)

[54 70 56 25 48 70 83 53]
[55.  2. 58. 25. 51. 53. 85. 53.]
[116  34  17  46 106  92  36  65]
[116.   9.  17.  46. 106. 117.  36.  65.]
[17 17 71 17 49 70 24 35]
[53. 17. 71. 17. 49. 82. 24. 35.]
[ 28  48 106  22  28 106  70  54]
[ 28.  48. 106.  22.  28. 107. 101.  59.]
[17 13 95 53 34 16 20 53]
[21. 13. 95. 70. 34. 16. 20. 53.]
[70 25 71 18 18 46 88 17]
[70. 25. 71. 17. 14. 46. 71. 17.]
[ 70 102  36  70  44  70  95  70]
[113.  70.  36.  69. 118.  70.  95.  70.]
[ 96  71  88  20 106  83  70  24]
[ 98.  42.   4.  20. 111.  85.  70.  18.]
[70 17 70 17 17 17 97 53]
[70. 17. 70. 71. 17. 17. 97. 31.]
[17 95 94 18 53 95 36 34]
[70. 95. 94. 18. 53. 95. 36. 28.]
[ 8 17 21 95 70  8 17 45]
[  8.  17.  21.  95. 103.   8.  17.  11.]
[34 18 70 25 48 48 46 70]
[34. 18. 70. 25. 48. 48. 46. 87.]
[ 56 106 111  17  18  16  17  18]
[ 56. 111. 106.  21.  21.  15.  17.  22.]
[70 70 20 34 70 35 70 70]
[70. 70. 20. 11. 70. 35. 70. 95.]
[17 28 21 17 71 19 70 70]
[ 17.  28.  21.  19.  70.  19. 101.  70.]
[  8

In [60]:
y_true = y.cpu().detach().numpy()

In [69]:
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

In [61]:
p, r, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='macro', warn_for=tuple())

In [67]:
_, _, acc, _ = precision_recall_fscore_support(y_true, y_pred, average='micro', warn_for=tuple())

In [68]:
acc

0.46875

In [71]:
acc = accuracy_score(y_true, y_pred)

In [72]:
acc

0.46875

In [79]:
loss

tensor(0.7974, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)