In [41]:
import sys
sys.path.append('/root/nlp-code-examples')
import torch
from src.easy_bert.TextClassifiy.tools import preprocess, CustomDataset, collate_fn
import os
from torch.utils.data import DataLoader

In [57]:
class Config():
    use_gpu = True
    gpu_id = 0
    preprocess = True
    bert_path = '/root/NLP-learning-notes/pretrain_models/bert-base-chinese'
    data_path = '/root/nlp-code-examples/example/TextClassifiy/data/origin'

    # 预处理后存放文件位置
    out_path = 'data/out'

    max_len = 256
    batch_size = 32
    dropout = 0.3
    num_hidden = 768
    num_classes = 1
    pass
cfg = Config()
cfg.cwd = os.getcwd()

In [17]:
if cfg.use_gpu and torch.cuda.is_available():
    device = torch.device('cuda', cfg.gpu_id)
else:
    device = torch.device('cpu')

In [31]:
if cfg.preprocess:
    preprocess(cfg)

In [34]:
train_data_path = os.path.join(cfg.cwd, cfg.out_path, 'train.pkl')
valid_data_path = os.path.join(cfg.cwd, cfg.out_path, 'valid.pkl')
test_data_path = os.path.join(cfg.cwd, cfg.out_path, 'test.pkl')

train_dataset = CustomDataset(train_data_path)
valid_dataset = CustomDataset(valid_data_path)
test_dataset = CustomDataset(test_data_path)

In [37]:
train_data_path

'/root/nlp-code-examples/tutorial-notebooks/data/out/train.pkl'

In [42]:
train_dataloader = DataLoader(train_dataset, batch_size=cfg.batch_size, shuffle=True, collate_fn=collate_fn(cfg))
valid_dataloader = DataLoader(valid_dataset, batch_size=cfg.batch_size, shuffle=True, collate_fn=collate_fn(cfg))
test_dataloader = DataLoader(test_dataset, batch_size=cfg.batch_size, shuffle=True, collate_fn=collate_fn(cfg))

In [43]:
from transformers import BertModel, BertTokenizer

In [79]:
from torch import nn as nn

class BERTBaseUncased(nn.Module):
    def __init__(self, cfg):
        super(BERTBaseUncased, self).__init__()
        self.bert = BertModel.from_pretrained(cfg.bert_path)
        self.bert_drop = nn.Dropout(cfg.dropout)
        self.out = nn.Linear(cfg.num_hidden, cfg.num_classes)

    def forward(self, x):
        ids, mask, token_type_ids = x['input_ids'], x['attention_mask'], x['token_type_ids']
        
        _, o2 = self.bert(ids, attention_mask=mask, token_type_ids=token_type_ids, return_dict=False)
        # _, o2 = self.bert(**x, return_dict=False)
        bo = self.bert_drop(o2)
        output = self.out(bo)
        return output
    
model = BERTBaseUncased(cfg)
model.to(device)

Some weights of the model checkpoint at /root/NLP-learning-notes/pretrain_models/bert-base-chinese were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTBaseUncased(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(21128, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=Tr

In [78]:
from sklearn import metrics
import numpy as np
for x, y in train_dataloader:
    out = model(x).cpu().detach().numpy()
    y = y.cpu().detach().numpy()
    out = np.array(out) >= 0.5
    res = metrics.accuracy_score(y, out)
    print(res)
    print(y)
    break

0.46875
[1. 1. 0. 0. 0. 1. 1. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 1. 1. 1. 0. 1.
 1. 0. 1. 0. 1. 0. 1. 1.]


In [80]:
for batch_idx, (x, y) in enumerate(train_dataloader, 1):
    for key, value in x.items():
        x[key] = value.to(device)

    y = y.to(device)

    optimizer.zero_grad()
    y_pred = model(x)

    loss = loss_fn(y_pred, y)

    loss.backward()
    optimizer.step()

    metric.update(y_true=y, y_pred=y_pred)
    losses.append(loss.item())

    data_total = len(dataloader.dataset)
    data_cal = data_total if batch_idx == len(dataloader) else batch_idx * len(y)
    if (cfg.train_log and batch_idx % cfg.log_interval == 0) or batch_idx == len(dataloader):
        # p r f1 皆为 macro，因为micro时三者相同，定义为acc
        acc, p, r, f1 = metric.compute()
        logger.info(f'Train Epoch {epoch}: [{data_cal}/{data_total} ({100. * data_cal / data_total:.0f}%)]\t'
                    f'Loss: {loss.item():.6f}')
        logger.info(f'Train Epoch {epoch}: Acc: {100. * acc:.2f}%\t'
                    f'macro metrics: [p: {p:.4f}, r:{r:.4f}, f1:{f1:.4f}]')

NameError: name 'optimizer' is not defined