## requirements
### mindspore==2.3.1
### mindnlp==0.4.1

## Data
Download the data from this [link](https://gluebenchmark.com/tasks). There will be a main zip file download option at the right side of the page. Extract the contents of the zip file and place them in data/SST-2/

导入所需库

In [1]:
import time
import pandas as pd

import mindspore
from mindspore import save_checkpoint

from mindnlp.core.nn import BCEWithLogitsLoss, Tensor
from mindnlp.core.optim import Adam
from mindnlp.transformers import BertModel, BertTokenizer
from mindnlp.core import nn, value_and_grad
from mindnlp.core.ops import sigmoid

  from .autonotebook import tqdm as notebook_tqdm
Building prefix dict from the default dictionary ...
Loading model from cache /tmp/jieba.cache
Loading model cost 1.322 seconds.
Prefix dict has been built successfully.


自定义模型类

In [None]:
class SentimentClassifier(nn.Module):
    def __init__(self, base_model_name_or_path = 'bert-base-uncased', freeze_bert = True):
        super().__init__()
        #Instantiating BERT model object 
        self.bert_layer = BertModel.from_pretrained(base_model_name_or_path)

        #Freeze bert layers
        if freeze_bert:
            for p in self.bert_layer.parameters():
                p.requires_grad = False

        #Classification layer
        self.cls_layer = nn.Linear(768, 1)

    def forward(self, seq, attn_masks):
        '''
        Inputs:
            -seq : Tensor of shape [B, T] containing token ids of sequences
            -attn_masks : Tensor of shape [B, T] containing attention masks to be used to avoid contibution of PAD tokens
        '''

        #Feeding the input to BERT model
        last_hs = self.bert_layer(seq, attention_mask = attn_masks).last_hidden_state

        #Obtaining the representation of [CLS] head
        cls_rep = last_hs[:, 0]

        #Feeding cls_rep to the classifier layer
        logits = self.cls_layer(cls_rep)

        return logits

自定义数据加载

In [None]:
class SSTDataset():
    def __init__(self, base_model_name_or_path, filename, maxlen):

        #Store the contents of the file in a pandas dataframe
        self.df = pd.read_csv(filename, delimiter = '\t')

        #Initialize the BERT tokenizer
        self.tokenizer = BertTokenizer.from_pretrained(base_model_name_or_path)

        self.maxlen = maxlen

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):

        #Selecting the sentence and label at the specified index in the data frame
        sentence = self.df.loc[index, 'sentence']
        label = self.df.loc[index, 'label']

        #Preprocessing the text to be suitable for BERT
        tokens = self.tokenizer.tokenize(sentence) #Tokenize the sentence
        tokens = ['[CLS]'] + tokens + ['[SEP]'] #Insering the CLS and SEP token in the beginning and end of the sentence
        if len(tokens) < self.maxlen:
            tokens = tokens + ['[PAD]' for _ in range(self.maxlen - len(tokens))] #Padding sentences
        else:
            tokens = tokens[:self.maxlen-1] + ['[SEP]'] #Prunning the list to be of specified max length

        tokens_ids = self.tokenizer.convert_tokens_to_ids(tokens) #Obtaining the indices of the tokens in the BERT Vocabulary

        return tokens_ids, label

def get_loader(dataset, batchsize, shuffle=True, num_workers=1, drop_remainder=True):
    data_loader = mindspore.dataset.GeneratorDataset(source=dataset,
                                      column_names=['tokens_ids', 'label'],
                                      shuffle=shuffle,
                                      num_parallel_workers=num_workers
                                      )
    data_loader = data_loader.batch(batch_size=batchsize, 
                                    drop_remainder=drop_remainder,
                                    )
    return data_loader.create_dict_iterator()

自定义Trainer类

In [None]:
def get_accuracy_from_logits(logits, labels):
    probs = sigmoid(logits.unsqueeze(-1))
    soft_probs = (probs > 0.5).long()
    acc = (soft_probs.squeeze() == labels).float().mean()
    return acc

def evaluate(net, criterion, dataloader):
    mean_acc, mean_loss = 0, 0
    count = 0

    for data in dataloader:
        tokens_ids = data['tokens_ids']
        attn_mask = (tokens_ids != 0).long()
        label = data['label']
        logits = net(tokens_ids, attn_mask)
        mean_loss += criterion(logits.squeeze(-1), label.astype('float32')).asnumpy()
        mean_acc += get_accuracy_from_logits(logits, label)
        count += 1

    return mean_acc / count, mean_loss / count

class Trainer:

    def __init__(self, net, criterion, optimizer, args,
                 train_dataset, eval_dataset=None
                 ):
        self.net = net
        self.criterion = criterion
        self.opt = optimizer
        self.args = args
        self.train_dataset = train_dataset
        self.weights = self.net.trainable_params()
        self.value_and_grad = value_and_grad(fn=self.forward_fn, params_or_argnums=self.weights)
        self.run_eval = eval_dataset is not None
        if self.run_eval:
            self.eval_dataset = eval_dataset
        self.logits = None

    def forward_fn(self, tokens_ids_tensor, attn_mask, label):
        logits = self.net(tokens_ids_tensor, attn_mask)
        self.logits = logits
        loss = self.criterion(logits.squeeze(-1), label)
        return loss

    def train_single(self, tokens_ids_tensor, attn_mask, label):
        self.opt.zero_grad()
        loss = self.value_and_grad(tokens_ids_tensor, attn_mask, label)
        self.opt.step()
        return loss

    def train(self, epochs):
        best_acc = 0
        for epoch in range(0, epochs):
            self.net.set_train(True)
            for i, data in enumerate(self.train_dataset):
                tokens_ids = data['tokens_ids']
                attn_mask = Tensor((tokens_ids != 0).long())
                label = data['label']

                loss = self.train_single(tokens_ids, attn_mask, label.astype('float32'))

                if i % self.args.print_every == 0:
                    acc = get_accuracy_from_logits(self.logits, label)
                    print("Iteration {} of epoch {} complete. Loss : {} Accuracy : {}".format(i, epoch, loss.asnumpy(), acc))

            if self.run_eval:
                self.net.set_train(False)
                val_acc, val_loss = evaluate(self.net, self.criterion, self.eval_dataset)
                print("Epoch {} complete! Validation Accuracy : {}, Validation Loss : {}".format(epoch, val_acc, val_loss))
                if val_acc > best_acc:
                    print("Best validation accuracy improved from {} to {}".format(best_acc, val_acc))
                    best_acc = val_acc
                    if self.args.save_path is not None:
                        print("saving model...")
                        save_checkpoint(self.net, self.args.save_path + 'best_model.ckpt')

主函数入口，完整训练流程

In [None]:
def main(args):
    #Instantiating the classifier model
    print("Building model! (This might take time if you are running this for first time)")
    st = time.time()
    mindspore.set_context(device_target=args.device_target, device_id=args.device_id)
    net = SentimentClassifier(args.base_model_name_or_path, args.freeze_bert)
    print("Done in {} seconds".format(time.time() - st))

    print("Creating criterion and optimizer objects")
    st = time.time()
    criterion = BCEWithLogitsLoss()
    opti = Adam(net.trainable_params(), lr=args.lr)
    print("Done in {} seconds".format(time.time() - st))

    #Creating dataloaders
    print("Creating train and val dataloaders")
    st = time.time()
    train_set = SSTDataset(args.base_model_name_or_path, filename = args.dataset_name_or_path + '/train.tsv', maxlen = args.maxlen)
    val_set = SSTDataset(args.base_model_name_or_path, filename = args.dataset_name_or_path + '/dev.tsv', maxlen = args.maxlen)

    train_loader = get_loader(train_set, batchsize=args.batch_size)
    val_loader = get_loader(val_set, batchsize=args.batch_size, drop_remainder=False)
    print("Done in {} seconds".format(time.time() - st))

    print("Let the training begin")
    st = time.time()
    trainer = Trainer(net=net, criterion=criterion, optimizer=opti, args=args, train_dataset=train_loader, eval_dataset=val_loader)
    trainer.train(epochs=args.max_eps)
    print("Done in {} seconds".format(time.time() - st))

设置训练参数，开始训练(冻结BERT)

In [6]:
from types import SimpleNamespace

args = SimpleNamespace()
args.device_target = 'Ascend'
args.device_id = 0
args.base_model_name_or_path = 'bert-base-uncased'
args.dataset_name_or_path = './data/SST-2'
args.freeze_bert = True
args.maxlen = 25
args.batch_size = 32
args.lr = 2e-5
args.print_every = 500
args.max_eps = 5
args.save_path = None

main(args)

Building model! (This might take time if you are running this for first time)
[MS_ALLOC_CONF]Runtime config:  enable_vmm:True  vmm_align_size:2MB
Done in 11.163130283355713 seconds
Creating criterion and optimizer objects
Done in 0.0013878345489501953 seconds
Creating train and val dataloaders




Done in 3.1491587162017822 seconds
Let the training begin
Iteration 0 of epoch 0 complete. Loss : 0.72922682762146 Accuracy : 0.40625
Iteration 500 of epoch 0 complete. Loss : 0.6815673112869263 Accuracy : 0.5
Iteration 1000 of epoch 0 complete. Loss : 0.6588367223739624 Accuracy : 0.5625
Iteration 1500 of epoch 0 complete. Loss : 0.633255124092102 Accuracy : 0.59375
Iteration 2000 of epoch 0 complete. Loss : 0.6255167722702026 Accuracy : 0.6875
Epoch 0 complete! Validation Accuracy : 0.70870537, Validation Loss : 0.606412410736084
Best validation accuracy improved from 0 to 0.70870537
Iteration 0 of epoch 1 complete. Loss : 0.6649124622344971 Accuracy : 0.5625
Iteration 500 of epoch 1 complete. Loss : 0.5791333913803101 Accuracy : 0.78125
Iteration 1000 of epoch 1 complete. Loss : 0.5701107978820801 Accuracy : 0.875
Iteration 1500 of epoch 1 complete. Loss : 0.4862162470817566 Accuracy : 0.90625
Iteration 2000 of epoch 1 complete. Loss : 0.6048108339309692 Accuracy : 0.6875
Epoch 1 co

设置训练参数，开始训练(不冻结BERT)

In [7]:
from types import SimpleNamespace

args = SimpleNamespace()
args.device_target = 'Ascend'
args.device_id = 0
args.base_model_name_or_path = 'bert-base-uncased'
args.dataset_name_or_path = './data/SST-2'
args.freeze_bert = False
args.maxlen = 25
args.batch_size = 32
args.lr = 2e-5
args.print_every = 500
args.max_eps = 5
args.save_path = None

main(args)



Building model! (This might take time if you are running this for first time)
Done in 1.576096534729004 seconds
Creating criterion and optimizer objects
Done in 0.0017273426055908203 seconds
Creating train and val dataloaders




Done in 2.324294090270996 seconds
Let the training begin
Iteration 0 of epoch 0 complete. Loss : 0.7102465629577637 Accuracy : 0.5625
Iteration 500 of epoch 0 complete. Loss : 0.16873285174369812 Accuracy : 0.90625
Iteration 1000 of epoch 0 complete. Loss : 0.08603419363498688 Accuracy : 0.96875
Iteration 1500 of epoch 0 complete. Loss : 0.10500945895910263 Accuracy : 0.9375
Iteration 2000 of epoch 0 complete. Loss : 0.19792640209197998 Accuracy : 0.90625
Epoch 0 complete! Validation Accuracy : 0.8984375, Validation Loss : 0.28035964673784164
Best validation accuracy improved from 0 to 0.8984375
Iteration 0 of epoch 1 complete. Loss : 0.04876020550727844 Accuracy : 1.0
Iteration 500 of epoch 1 complete. Loss : 0.18226751685142517 Accuracy : 0.9375
Iteration 1000 of epoch 1 complete. Loss : 0.3231828808784485 Accuracy : 0.90625
Iteration 1500 of epoch 1 complete. Loss : 0.2185860425233841 Accuracy : 0.90625
Iteration 2000 of epoch 1 complete. Loss : 0.05135542154312134 Accuracy : 1.0
Ep