In [1]:
from bert_data import *
import torch
import torch.nn as nn
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 定义 LSTM 模型
from transformers import BertModel, BertTokenizer

class TransformerModel(nn.Module):
    def __init__(self, hidden_size, num_layers, output_size):
        super(TransformerModel, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-chinese')
        self.fc = nn.Linear(hidden_size, hidden_size)
        self.dropout = nn.Dropout(0.2)
        self.fc2 = nn.Linear(hidden_size, output_size)
    
    def forward(self, input_ids, attention_mask):
        with torch.no_grad():
            embedded = self.bert(input_ids, attention_mask=attention_mask)[0]  # 获取BERT的输出，即嵌入向量
        pooled = torch.mean(embedded, dim=1)  # 对嵌入向量进行池化，取平均值
        out = self.fc(pooled)
        out = self.dropout(out)
        out = self.fc2(out)
        return out

# 创建 BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')

# 定义模型参数
hidden_size = 768  # BERT模型输出的维度
num_layers = 2
output_size = 2

# 创建 Transformer 模型实例，并将其移动到 GPU 设备上
model = TransformerModel(hidden_size, num_layers, output_size).to(device)

# 迁移学习时，只更新分类器的参数，保持BERT模型的参数不变
optimizer = torch.optim.Adam(model.fc.parameters(), lr=0.0015)
criterion = nn.CrossEntropyLoss()

  from .autonotebook import tqdm as notebook_tqdm
Some weights of the model checkpoint at bert-base-chinese were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [2]:
num_epochs = 10
for epoch in range(num_epochs):
    total_loss = 0.0
    correct = 0
    total = 0
    
    # 在每个迭代步骤中，获取批次数据并进行前向传播、损失计算和反向传播
    for batch_idx, (inputs, labels) in enumerate(trainloader):
        inputs = tokenizer.batch_encode_plus(inputs, add_special_tokens=True, padding=True, truncation=True, max_length=512, return_tensors='pt')
        input_ids = inputs['input_ids'].to(device)
        attention_mask = inputs['attention_mask'].to(device)
        labels = labels.to(torch.long).to(device)

        optimizer.zero_grad()
        
        outputs = model(input_ids, attention_mask=attention_mask)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        _, predicted = outputs.max(1)
        correct += predicted.eq(labels).sum().item()
        total += labels.size(0)
        
        # 打印当前批次的进度
        print(f"Epoch {epoch+1}/{num_epochs}, Batch {batch_idx+1}/{len(trainloader)}: Loss = {loss.item():.4f}")
    
    # 打印每个 epoch 的训练损失和准确率
    print(f"Epoch {epoch+1}/{num_epochs}: Loss = {total_loss/len(trainloader):.4f}, Accuracy = {correct/total:.4f}")


Epoch 1/10, Batch 1/1250: Loss = 0.7522
Epoch 1/10, Batch 2/1250: Loss = 0.0120
Epoch 1/10, Batch 3/1250: Loss = 3.6442
Epoch 1/10, Batch 4/1250: Loss = 7.4113
Epoch 1/10, Batch 5/1250: Loss = 4.8731
Epoch 1/10, Batch 6/1250: Loss = 0.1333
Epoch 1/10, Batch 7/1250: Loss = 0.6282
Epoch 1/10, Batch 8/1250: Loss = 0.7334
Epoch 1/10, Batch 9/1250: Loss = 0.0797
Epoch 1/10, Batch 10/1250: Loss = 0.0665
Epoch 1/10, Batch 11/1250: Loss = 3.5805
Epoch 1/10, Batch 12/1250: Loss = 3.0577
Epoch 1/10, Batch 13/1250: Loss = 0.9046
Epoch 1/10, Batch 14/1250: Loss = 0.3583
Epoch 1/10, Batch 15/1250: Loss = 0.6717
Epoch 1/10, Batch 16/1250: Loss = 0.3344
Epoch 1/10, Batch 17/1250: Loss = 0.1355
Epoch 1/10, Batch 18/1250: Loss = 0.9439
Epoch 1/10, Batch 19/1250: Loss = 2.1762
Epoch 1/10, Batch 20/1250: Loss = 1.5613
Epoch 1/10, Batch 21/1250: Loss = 0.3309
Epoch 1/10, Batch 22/1250: Loss = 0.5134
Epoch 1/10, Batch 23/1250: Loss = 0.6545
Epoch 1/10, Batch 24/1250: Loss = 0.2561
Epoch 1/10, Batch 25/1250

KeyboardInterrupt: 

In [3]:
# 在测试集上评估模型
model.eval()  # 切换模型为评估模式
correct = 0
total = 0
with torch.no_grad():
        for inputs, labels in testloader:
            # 编码输入句子
            inputs = tokenizer.batch_encode_plus(inputs, add_special_tokens=True, padding=True, truncation=True, max_length=512, return_tensors='pt')
            input_ids = inputs['input_ids'].to(device)
            attention_mask = inputs['attention_mask'].to(device)
            labels = labels.to(torch.long).to(device)

            outputs = model(input_ids, attention_mask=attention_mask)
            _, predicted = outputs.max(1)
            correct += predicted.eq(labels).sum().item()
            total += labels.size(0)

accuracy = correct / total
print(f"Test Accuracy: {accuracy:.4f}")

KeyboardInterrupt: 