In [2]:
!nvidia-smi

Tue Apr 12 07:51:54 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   35C    P8    26W / 149W |      0MiB / 11441MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:
from google.colab import drive
import sys
drive.mount('/content/drive')
#设置路径
sys.path.append('/content/drive/MyDrive/Colab Notebooks')

Mounted at /content/drive


In [4]:
! pip install transformers==4.0.1

Collecting transformers==4.0.1
  Downloading transformers-4.0.1-py3-none-any.whl (1.4 MB)
[K     |████████████████████████████████| 1.4 MB 5.4 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.49-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 27.9 MB/s 
Collecting tokenizers==0.9.4
  Downloading tokenizers-0.9.4-cp37-cp37m-manylinux2010_x86_64.whl (2.9 MB)
[K     |████████████████████████████████| 2.9 MB 36.3 MB/s 
Installing collected packages: tokenizers, sacremoses, transformers
Successfully installed sacremoses-0.0.49 tokenizers-0.9.4 transformers-4.0.1


In [5]:
! pip install torch==1.4.0

Collecting torch==1.4.0
  Downloading torch-1.4.0-cp37-cp37m-manylinux1_x86_64.whl (753.4 MB)
[K     |████████████████████████████████| 753.4 MB 7.0 kB/s 
[?25hInstalling collected packages: torch
  Attempting uninstall: torch
    Found existing installation: torch 1.10.0+cu111
    Uninstalling torch-1.10.0+cu111:
      Successfully uninstalled torch-1.10.0+cu111
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
torchvision 0.11.1+cu111 requires torch==1.10.0, but you have torch 1.4.0 which is incompatible.
torchtext 0.11.0 requires torch==1.10.0, but you have torch 1.4.0 which is incompatible.
torchaudio 0.10.0+cu111 requires torch==1.10.0, but you have torch 1.4.0 which is incompatible.[0m
Successfully installed torch-1.4.0


In [6]:
import torch
import random
import numpy as np
import pandas as pd
from tqdm import tqdm

config = {
    'train_file_path':'/content/drive/MyDrive/Colab Notebooks/dataset/train.csv',
    'test_file_path':'/content/drive/MyDrive/Colab Notebooks/dataset/test.csv',
    'train_val_ratio':0.1,
    'model_path':'/content/drive/MyDrive/Colab Notebooks/dataset/NeZha_model',
    'batch_size':16,
    'head': 'CNN',
    'num_epochs':1,
    'warmup_ratio':0.1, # warm up
    'eps':0.1,    #对抗模型需要的参数
    'alpha':0.3,   #pgd需要的参数
    'adv':'fgm',   #对抗训练的方法
    'learning_rate':2e-5,
    'logging_step':500,
    'seed':2022
}

config['device'] = 'cuda' if torch.cuda.is_available() else 'cpu'

def seed_everything(seed):
  random.seed(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)
  torch.cuda.manual_seed_all(seed)
  return seed

seed_everything(config['seed'])

2022

In [7]:
from collections import defaultdict
def read_data(config, tokenizer, mode = 'train'):
  data_df = pd.read_csv(config[f'{mode}_file_path'], sep=',')
  if mode == 'train':
    X_train, y_train = defaultdict(list),[]
    X_val, y_val = defaultdict(list),[]
    num_val = int(len(data_df) * config['train_val_ratio'])
  else:
    X_test, y_test = defaultdict(list),[]

  for i, row in tqdm(data_df.iterrows(), desc=f'preprocess {mode} data', colour = 'blue', total = len(data_df)):
    label = row[1] if mode == 'train' else 0
    sentence = row[-1]

    inputs = tokenizer.encode_plus(sentence, add_special_tokens = True, return_token_type_ids = True, return_attention_mask = True)

    if mode == 'train':
      if i < num_val:
        X_val['inputs_ids'].append(inputs['input_ids'])
        y_val.append(label)
        X_val['token_type_ids'].append(inputs['token_type_ids'])
        X_val['attention_mask'].append(inputs['attention_mask'])
      else:
        X_train['inputs_ids'].append(inputs['input_ids'])
        y_train.append(label)
        X_train['token_type_ids'].append(inputs['token_type_ids'])
        X_train['attention_mask'].append(inputs['attention_mask'])

    else:
        X_test['inputs_ids'].append(inputs['input_ids'])
        y_test.append(label)
        X_test['token_type_ids'].append(inputs['token_type_ids'])
        X_test['attention_mask'].append(inputs['attention_mask'])

  if mode == 'train':
    label2id = {label: i for i, label in enumerate(np.unique(y_train))}
    id2label = {i: label for label, i in label2id.items()}

    y_train = torch.tensor([label2id[i] for i in y_train], dtype =torch.long)

    y_val = torch.tensor([label2id[i] for i in y_val], dtype =torch.long)
    return X_train, y_train, X_val, y_val, label2id, id2label

  else:
    y_test = torch.tensor(y_test, dtype = torch.long)
    return X_test, y_test


In [8]:
from torch.utils.data import Dataset
class TNEWSData(Dataset):
  def __init__(self, X, y):
    self.x = X
    self.y = y
  
 
  def __getitem__(self, idx):
    return{
        'inputs_ids': self.x['inputs_ids'][idx],
        'label':self.y[idx],
        'token_type_ids':self.x['token_type_ids'][idx],
        'attention_mask':self.x['attention_mask'][idx]

    }

 
  def __len__(self):
    return self.y.size(0)

In [9]:
def collate_fn(example):
  input_ids_list = []
  labels = []
  token_type_ids_list = []
  attention_mask_list = []

  for ex in example:
    input_ids_list.append(ex['inputs_ids'])
    labels.append(ex['label'])
    token_type_ids_list.append(ex['token_type_ids'])
    attention_mask_list.append(ex['attention_mask'])

  max_len = max(len(input_ids) for input_ids in input_ids_list)
  input_ids_tensor = torch.zeros((len(labels), max_len),dtype=torch.long)
  token_type_ids_tensor = torch.zeros_like(input_ids_tensor)
  attention_mask_tensor = torch.zeros_like(input_ids_tensor)

  for i, input_ids in enumerate(input_ids_list):
    input_ids_tensor[i, :len(input_ids)] = torch.tensor(input_ids, dtype = torch.long)
    token_type_ids_tensor[i, :len(input_ids)] = torch.tensor(token_type_ids_list[i], dtype = torch.long)
    attention_mask_tensor[i, :len(input_ids)] = torch.tensor(attention_mask_list[i], dtype = torch.long)

  return {
      'input_ids': input_ids_tensor,
      'labels': torch.tensor(labels ,dtype= torch.long),
      'token_type_ids':token_type_ids_tensor,
      'attention_mask':attention_mask_tensor
  }  

In [10]:
from transformers import BertTokenizer
from torch.utils.data import DataLoader
def build_dataloader(config):
  tokenizer = BertTokenizer.from_pretrained(config['model_path'])
  X_train, y_train, X_val, y_val, label2id, id2label = read_data(config, tokenizer, mode='train')
  X_test, y_test = read_data(config, tokenizer, mode='test')

  train_dataset = TNEWSData(X_train, y_train)
  val_dataset = TNEWSData(X_val, y_val)
  test_dataset = TNEWSData(X_test, y_test)

  train_dataloader = DataLoader(train_dataset, batch_size=config['batch_size'], num_workers = 4, shuffle = True, collate_fn=collate_fn)
  val_dataloader = DataLoader(val_dataset, batch_size=config['batch_size'], num_workers = 4, shuffle = False, collate_fn=collate_fn)
  test_dataloader = DataLoader(test_dataset, batch_size=config['batch_size'], num_workers = 4, shuffle = False, collate_fn=collate_fn)

  return train_dataloader, val_dataloader, test_dataloader, id2label


In [11]:
train_dataloader, val_dataloader, test_dataloader, id2label = build_dataloader(config)

preprocess train data: 100%|[34m██████████[0m| 53360/53360 [00:42<00:00, 1252.06it/s]
preprocess test data: 100%|[34m██████████[0m| 10000/10000 [00:05<00:00, 1822.67it/s]


In [12]:
import torch.nn.functional as F
import torch.nn as nn
from NeZha import *
class NeZhaForTNEWS(NeZhaPreTrainedModel):
  def __init__(self, config, model_path, classifier):
    super(NeZhaForTNEWS, self).__init__(config)

    self.nezha = NeZhaModel.from_pretrained(model_path, config=config)
    self.classifier = classifier  # head
    self.config = config

  def forward(self, input_ids, token_type_ids, attention_mask, labels):
    outputs = self.nezha(input_ids = input_ids,
                token_type_ids = token_type_ids,
                attention_mask = attention_mask)
    hidden_states = outputs[2]

    logits = self.classifier(hidden_states, input_ids)

    outputs = (logits, )

    if labels is not None:
      loss_fct = FocalLoss(num_classes=self.config.num_labels)
      loss = loss_fct(logits, labels.view(-1))
      outputs = (loss, )+ outputs

    return outputs

In [13]:
from typing import List
class ConvClassifier(nn.Module):
  def __init__(self, config):
    super().__init__()
    self.conv = nn.Conv1d(in_channels = config.hidden_size, out_channels = config.hidden_size, kernel_size = 3, padding=(3 - 1) // 2)
    self.global_max_pool = nn.AdaptiveMaxPool1d(1)
    self.dropout = nn.Dropout(config.hidden_dropout_prob)
    self.fc = nn.Linear(config.hidden_size, config.num_labels)

  def forward(self, hidden_states: List[torch.Tensor], input_ids: torch.Tensor):
    hidden_states = self.dropout(hidden_states[-1])
    hidden_states = hidden_states.permute(0 ,2, 1)

    out = F.relu(self.conv(hidden_states))
    out = self.global_max_pool(out).squeeze(dim=2)
    out = self.fc(out)
    
    return out

In [14]:
def build_model(model_path, config, head):
  heads = {
      'CNN': ConvClassifier
  }
  assert head in heads ,"head must have been implemented"
  print(f'>>> You are using {head} head, please wait...')
  model = NeZhaForTNEWS(config, model_path, heads[head](config))
  return model

In [15]:
from sklearn.metrics import f1_score
def evaluation(config, model, val_dataloader):
  model.eval()
  preds = []
  labels = []
  val_loss = 0.
  val_iterator = tqdm(val_dataloader, desc='Evaluation', total=len(val_dataloader))

  with torch.no_grad():
    for batch in val_iterator:
      labels.append(batch['labels'])
      batch = {item: value.to(config['device']) for item, value in batch.items()}
      loss, logits = model(**batch)[:2]
      val_loss += loss.item()
      preds.append(logits.argmax(dim = -1).detach().cpu())

  avg_val_loss = val_loss / len(val_dataloader)
  labels = torch.cat(labels, dim = 0).numpy()
  preds = torch.cat(preds, dim = 0).numpy()
  f1 = f1_score(labels, preds, average='macro')
  return avg_val_loss, f1

### 对抗样本：对人类看起来一样，对模型来说预测结果却完全不一样的样本。

## FGM-Fast Gradient Method
对于每个x:
1. 计算x的前向loss, 反向传播得到梯度；
2. 根据embeddign矩阵计算的梯度计算出r, 并加到当前embedding上，相当于x+r
3. 计算x+r的前向loss, 反向传播得到梯度，然后累加到(1)的梯度上；
4. 将embedding恢复为（1）时的embedding；
5. 根据（3）的梯度对参数进行更新。

## PGD-Projected Gradient Descent
FGM是一下子算出了对抗扰动，这样得到的扰动不一定是最优的。因此PGD进行了改进，多迭代了K(t)次，慢慢找到最优的扰动
对于每个x:

1.计算x的前向loss, 反向传播得到梯度；
  对于每步t：
  
  2. 根据embeddign矩阵计算的梯度计算出r, 并加到当前embedding上，相当于x+r；
  
  3. t如果不是最后一步，将梯度归0， 根据（2）的x+r计算前后向并得到梯度
  
  4. t是最后一步，恢复1的梯度，计算最后的x+r并将梯度累加到(1)上

5.将embedding恢复为（1）时的embedding；

6.根据（4）的梯度对参数进行更新。

In [22]:
from extra_loss import *
from extra_optim import *
from extra_fgm import *
from extra_pgd import *
from transformers import AdamW
from tqdm import trange


def train(config, id2label, train_dataloader, val_dataloader):
  nezha_config = NeZhaConfig.from_pretrained(config['model_path'])
  nezha_config.output_hidden_states = True
  nezha_config.num_labels = len(id2label)

  model = build_model(config['model_path'], nezha_config, config['head'])                                    

  

  # 得到模型的参数
  optimizer_grouped_parameters = model.parameters()
  # 定义一个基优化器
  optimizer = AdamW(model.parameters(), lr= config['learning_rate'])
  # Lookahead要有一个基优化器， k=5, alpha=1
  optimizer = Lookahead(optimizer, 5, 1)
  total_steps = config['num_epochs'] * len(train_dataloader)
  # 每调用warmup_steps次， 对应的学习率就会调整一次
  lr_scheduler = WarmupLinearSchedule(optimizer, warmup_steps = int(config['learning_rate'] * total_steps), t_total = total_steps)
  
  model.to(config['device'])                                                                 


### ----- adversarial -------#
  if config['adv'] == 'fgm':
    fgm = FGM(model)
  else:
    pgd = PGD(model)
    K = 3
### ----- adversarial -------#

  epoch_iterator = trange(config['num_epochs'])
  global_steps = 0
  train_loss = 0.
  logging_loss = 0.

  for epoch in epoch_iterator:
    train_iterator = tqdm(train_dataloader, desc='Training', total=len(train_dataloader))
    model.train()
    for batch in train_iterator:
      batch = {item: value.to(config['device']) for item, value in batch.items()}
      # 计算x的前项loss
      loss = model(**batch)[0]
      model.zero_grad()
      # 反向传播得到梯度
      loss.backward()

### ----- adversarial -------#
      if config['adv'] == 'fgm':
        # 计算x+r的前向loss, 反向传播得到梯度，然后累加到(1)的梯度上；
        fgm.attack(epsilon = config['eps'])
        # 计算x+r的前向loss
        loss_adv = model(**batch)[0]
        # 反向传播得到梯度，然后累加到(1)的梯度上；
        loss_adv.backward()
         #将embedding恢复为（1）时的embedding；
        fgm.restore()
      else:
        pgd.backup_grad()
        for t in range(K):
          fgm.attack(epsilon=config(['eps'], alpha=config['alpha'], is_first_attack=(t == 0)))
          if t != K - 1:
            model.zero_grad()
          else:
            pgd.restore_grad()
          loss_adv = model(**batch)[0]
          loss_adv.backward()
        pgd.restore()
### ----- adversarial -------#

      optimizer.step()
      lr_scheduler.step()

      train_loss += loss.item()
      global_steps += 1

      if global_steps % config['logging_step'] == 0:
        print_train_loss = (train_loss - logging_loss) / config['logging_step']
        logging_loss = train_loss
        avg_val_loss, f1 = evaluation(config, model, val_dataloader)

        print_log = f'>>>traing loss:{print_train_loss: .5f}, valid loss:{avg_val_loss: .5f}, valid f1 score:{f1: .5f}'
        print(print_log)
        model.train()

  return model    

In [23]:
model = train(config, id2label, train_dataloader, val_dataloader)

>>> You are using CNN head, please wait...


Some weights of NeZhaModel were not initialized from the model checkpoint at /content/drive/MyDrive/Colab Notebooks/dataset/NeZha_model and are newly initialized: ['bert.encoder.layer.0.attention.self.relative_positions_encoding.positions_encoding', 'bert.encoder.layer.1.attention.self.relative_positions_encoding.positions_encoding', 'bert.encoder.layer.2.attention.self.relative_positions_encoding.positions_encoding', 'bert.encoder.layer.3.attention.self.relative_positions_encoding.positions_encoding', 'bert.encoder.layer.4.attention.self.relative_positions_encoding.positions_encoding', 'bert.encoder.layer.5.attention.self.relative_positions_encoding.positions_encoding', 'bert.encoder.layer.6.attention.self.relative_positions_encoding.positions_encoding', 'bert.encoder.layer.7.attention.self.relative_positions_encoding.positions_encoding', 'bert.encoder.layer.8.attention.self.relative_positions_encoding.positions_encoding', 'bert.encoder.layer.9.attention.self.relative_positions_encodi

>>>traing loss: 0.92746, valid loss: 0.70829, valid f1 score: 0.48904



Training:  17%|█▋        | 501/3002 [04:36<4:32:05,  6.53s/it][A
Training:  17%|█▋        | 502/3002 [04:37<3:17:20,  4.74s/it][A
Training:  17%|█▋        | 503/3002 [04:37<2:24:29,  3.47s/it][A
Training:  17%|█▋        | 504/3002 [04:38<1:47:19,  2.58s/it][A
Training:  17%|█▋        | 505/3002 [04:38<1:20:34,  1.94s/it][A
Training:  17%|█▋        | 506/3002 [04:39<1:02:48,  1.51s/it][A
Training:  17%|█▋        | 507/3002 [04:39<50:02,  1.20s/it]  [A
Training:  17%|█▋        | 508/3002 [04:40<41:06,  1.01it/s][A
Training:  17%|█▋        | 509/3002 [04:40<34:51,  1.19it/s][A
Training:  17%|█▋        | 510/3002 [04:41<30:26,  1.36it/s][A
Training:  17%|█▋        | 511/3002 [04:41<27:11,  1.53it/s][A
Training:  17%|█▋        | 512/3002 [04:42<25:18,  1.64it/s][A
Training:  17%|█▋        | 513/3002 [04:42<23:08,  1.79it/s][A
Training:  17%|█▋        | 514/3002 [04:42<22:25,  1.85it/s][A
Training:  17%|█▋        | 515/3002 [04:43<22:09,  1.87it/s][A
Training:  17%|█▋        

>>>traing loss: 0.68148, valid loss: 0.65332, valid f1 score: 0.49067



Training:  33%|███▎      | 1001/3002 [09:13<3:43:06,  6.69s/it][A
Training:  33%|███▎      | 1002/3002 [09:14<2:41:07,  4.83s/it][A
Training:  33%|███▎      | 1003/3002 [09:14<1:57:49,  3.54s/it][A
Training:  33%|███▎      | 1004/3002 [09:15<1:27:21,  2.62s/it][A
Training:  33%|███▎      | 1005/3002 [09:15<1:05:29,  1.97s/it][A
Training:  34%|███▎      | 1006/3002 [09:16<51:14,  1.54s/it]  [A
Training:  34%|███▎      | 1007/3002 [09:16<40:44,  1.23s/it][A
Training:  34%|███▎      | 1008/3002 [09:17<32:54,  1.01it/s][A
Training:  34%|███▎      | 1009/3002 [09:17<27:17,  1.22it/s][A
Training:  34%|███▎      | 1010/3002 [09:18<23:26,  1.42it/s][A
Training:  34%|███▎      | 1011/3002 [09:18<21:42,  1.53it/s][A
Training:  34%|███▎      | 1012/3002 [09:19<21:04,  1.57it/s][A
Training:  34%|███▎      | 1013/3002 [09:19<19:06,  1.74it/s][A
Training:  34%|███▍      | 1014/3002 [09:20<19:17,  1.72it/s][A
Training:  34%|███▍      | 1015/3002 [09:20<18:30,  1.79it/s][A
Training:  3

>>>traing loss: 0.65224, valid loss: 0.61398, valid f1 score: 0.49963



Training:  50%|█████     | 1501/3002 [13:51<2:45:34,  6.62s/it][A
Training:  50%|█████     | 1502/3002 [13:52<1:59:27,  4.78s/it][A
Training:  50%|█████     | 1503/3002 [13:52<1:27:21,  3.50s/it][A
Training:  50%|█████     | 1504/3002 [13:53<1:04:25,  2.58s/it][A
Training:  50%|█████     | 1505/3002 [13:53<49:12,  1.97s/it]  [A
Training:  50%|█████     | 1506/3002 [13:54<38:22,  1.54s/it][A
Training:  50%|█████     | 1507/3002 [13:54<30:04,  1.21s/it][A
Training:  50%|█████     | 1508/3002 [13:55<25:15,  1.01s/it][A
Training:  50%|█████     | 1509/3002 [13:55<20:56,  1.19it/s][A
Training:  50%|█████     | 1510/3002 [13:56<18:30,  1.34it/s][A
Training:  50%|█████     | 1511/3002 [13:56<16:30,  1.51it/s][A
Training:  50%|█████     | 1512/3002 [13:57<15:28,  1.61it/s][A
Training:  50%|█████     | 1513/3002 [13:57<14:38,  1.70it/s][A
Training:  50%|█████     | 1514/3002 [13:58<13:57,  1.78it/s][A
Training:  50%|█████     | 1515/3002 [13:58<13:24,  1.85it/s][A
Training:  50%

>>>traing loss: 0.61593, valid loss: 0.59712, valid f1 score: 0.52214



Training:  67%|██████▋   | 2001/3002 [18:31<1:49:33,  6.57s/it][A
Training:  67%|██████▋   | 2002/3002 [18:32<1:19:13,  4.75s/it][A
Training:  67%|██████▋   | 2003/3002 [18:32<57:34,  3.46s/it]  [A
Training:  67%|██████▋   | 2004/3002 [18:33<42:50,  2.58s/it][A
Training:  67%|██████▋   | 2005/3002 [18:33<32:44,  1.97s/it][A
Training:  67%|██████▋   | 2006/3002 [18:34<25:31,  1.54s/it][A
Training:  67%|██████▋   | 2007/3002 [18:34<20:17,  1.22s/it][A
Training:  67%|██████▋   | 2008/3002 [18:35<16:24,  1.01it/s][A
Training:  67%|██████▋   | 2009/3002 [18:35<14:00,  1.18it/s][A
Training:  67%|██████▋   | 2010/3002 [18:36<11:56,  1.38it/s][A
Training:  67%|██████▋   | 2011/3002 [18:36<11:19,  1.46it/s][A
Training:  67%|██████▋   | 2012/3002 [18:37<10:25,  1.58it/s][A
Training:  67%|██████▋   | 2013/3002 [18:37<09:47,  1.68it/s][A
Training:  67%|██████▋   | 2014/3002 [18:38<09:00,  1.83it/s][A
Training:  67%|██████▋   | 2015/3002 [18:38<08:28,  1.94it/s][A
Training:  67%|███

>>>traing loss: 0.59365, valid loss: 0.58302, valid f1 score: 0.52893



Training:  83%|████████▎ | 2501/3002 [23:07<54:52,  6.57s/it]  [A
Training:  83%|████████▎ | 2502/3002 [23:07<39:33,  4.75s/it][A
Training:  83%|████████▎ | 2503/3002 [23:08<28:52,  3.47s/it][A
Training:  83%|████████▎ | 2504/3002 [23:08<21:24,  2.58s/it][A
Training:  83%|████████▎ | 2505/3002 [23:09<16:08,  1.95s/it][A
Training:  83%|████████▎ | 2506/3002 [23:09<12:25,  1.50s/it][A
Training:  84%|████████▎ | 2507/3002 [23:10<09:53,  1.20s/it][A
Training:  84%|████████▎ | 2508/3002 [23:10<07:59,  1.03it/s][A
Training:  84%|████████▎ | 2509/3002 [23:11<06:52,  1.19it/s][A
Training:  84%|████████▎ | 2510/3002 [23:11<06:01,  1.36it/s][A
Training:  84%|████████▎ | 2511/3002 [23:12<05:30,  1.48it/s][A
Training:  84%|████████▎ | 2512/3002 [23:12<05:04,  1.61it/s][A
Training:  84%|████████▎ | 2513/3002 [23:13<04:46,  1.70it/s][A
Training:  84%|████████▎ | 2514/3002 [23:13<04:33,  1.78it/s][A
Training:  84%|████████▍ | 2515/3002 [23:14<04:15,  1.91it/s][A
Training:  84%|███████

>>>traing loss: 0.57458, valid loss: 0.57406, valid f1 score: 0.54271



Training: 100%|█████████▉| 3001/3002 [27:44<00:06,  6.61s/it][A
Training: 100%|██████████| 3002/3002 [27:44<00:00,  1.80it/s]
100%|██████████| 1/1 [27:44<00:00, 1664.84s/it]


In [26]:
def prediction(config, id2label, model, test_dataloader):
  test_iterator = tqdm(test_dataloader, desc='Prediction', total = len(test_dataloader))
  model.eval()
  test_preds = []

  with torch.no_grad():
    for batch in test_iterator:
      batch = {item: value.to(config['device']) for item, value in batch.items()}
      logits = model(**batch)[1]
      test_preds.append(logits.argmax(dim=-1).detach().cpu())
  
  test_preds = torch.cat(test_preds, dim=0).numpy()
  test_preds = [id2label[id_] for id_ in test_preds]

  test_df = pd.read_csv(config['test_file_path'], sep=',')
  test_df.insert(1, column='label', value=test_preds)
  test_df.drop(['sentence'], 1,inplace=True)#1表示按列删除
  test_df.to_csv('submission_Nezha_adversarial_training.csv', index=False, encoding= 'utf8')

In [27]:
prediction(config, id2label, model, test_dataloader)

Prediction: 100%|██████████| 625/625 [00:51<00:00, 12.09it/s]
