In [3]:
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer
import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
import pandas as pd
# 假设data和labels是你的数据和标签

df = pd.read_csv('squad_questions_and_types_balanced_500.csv')
data=df.loc[:,"Question"]
label=df.loc[:, "Confirmation":"Complex"]
# 分割数据集
train_texts, val_texts, train_labels, val_labels = train_test_split(data, label, test_size=.2)

train_texts=train_texts.values.tolist()
train_labels=train_labels.values.tolist()
val_texts=val_texts.values.tolist()
val_labels=val_labels.values.tolist()

In [5]:
import numpy as np
# 初始化Tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# 编码数据
train_encodings = tokenizer(train_texts, truncation=True, padding=True, max_length=128)
val_encodings = tokenizer(val_texts, truncation=True, padding=True, max_length=128)

# 转换为torch tensors
train_seq = torch.tensor(train_encodings['input_ids'])
train_mask = torch.tensor(train_encodings['attention_mask'])
train_y = torch.tensor(train_labels, dtype=torch.float)

val_seq = torch.tensor(val_encodings['input_ids'])
val_mask = torch.tensor(val_encodings['attention_mask'])
val_y = torch.tensor(val_labels, dtype=torch.float)

batch_size = 32

# DataLoader
train_data = TensorDataset(train_seq, train_mask, train_y)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

val_data = TensorDataset(val_seq, val_mask, val_y)
val_sampler = SequentialSampler(val_data)
val_dataloader = DataLoader(val_data, sampler=val_sampler, batch_size=batch_size)

In [6]:
epochs=500
early_stopping_patience = 10  # 设置容忍度（即连续多少个epoch没有改善就停止）
best_loss = np.Inf
early_stopping_counter = 0  # 用于跟踪没有改善的epoch数量

In [7]:
from transformers import AdamW,get_linear_schedule_with_warmup
from torch.nn import BCEWithLogitsLoss
from model import BERTMultiLabelBinaryClassification
from tqdm import tqdm
import copy

device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BERTMultiLabelBinaryClassification(num_labels=6)

optimizer = AdamW(model.parameters(), lr=1e-5)
loss_fn = BCEWithLogitsLoss()
scheduler = get_linear_schedule_with_warmup(optimizer, 
                                            num_warmup_steps=5,  # 不使用warmup步骤
                                            num_training_steps=len(train_dataloader)*epochs)


progress_bar = tqdm(range(epochs), desc='Training Progress')
# 训练模型的简化代码示例
model=model.to(device)
for epoch in progress_bar:  # 循环训练3个epoch
    model.train()
    total_loss = 0
    for batch in train_dataloader:
        batch = tuple(b.to(device) for b in batch)
        inputs = {'input_ids':      batch[0],
                  'attention_mask': batch[1],
                  'labels':         batch[2]}
        loss = model(**inputs)

        #loss = loss_fn(outputs, inputs['labels'])
        loss.backward()
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()
        total_loss += loss.item()
    
    total_loss=total_loss / len(train_dataloader)

    val_loss = 0
    model.eval()
    with torch.no_grad():
        for batch in val_dataloader:
            batch = tuple(b.to(device) for b in batch)
            inputs = {'input_ids': batch[0],
                      'attention_mask': batch[1],
                      'labels': batch[2]}
            loss = model(**inputs)
            #batch_loss = loss_fn(outputs, inputs['labels'])
            val_loss += loss.item()
    val_loss = val_loss / len(val_dataloader)

    if val_loss < best_loss:
        best_loss = val_loss
        # 保存最好的模型状态
        best_model_state = copy.deepcopy(model.state_dict())
        early_stopping_counter = 0  # 重置早停计数器
    else:
        early_stopping_counter += 1
        if early_stopping_counter >= early_stopping_patience:
            print(f"Early stopping triggered after {epoch} epochs.")
            break
    
    progress_bar.set_description("Epoch {:} train loss: {:.6f},val loss{:.6f}".format(epoch,total_loss,val_loss))
model.load_state_dict(best_model_state)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 15 train loss: 0.053178,val loss0.191471:   3%|▎         | 16/500 [04:24<2:13:09, 16.51s/it]

Early stopping triggered after 16 epochs.





<All keys matched successfully>

In [8]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score
import numpy as np

# 将模型设置为评估模式
model.eval()

# 存储预测和真实标签
predictions, true_labels = [], []

# 禁用梯度计算
with torch.no_grad():
    model.to(device)
    for batch in val_dataloader:
        batch = tuple(b.to(device) for b in batch)
        inputs = {
            'input_ids': batch[0],
            'attention_mask': batch[1]
        }
        labels = batch[2]

        # 获取模型输出
        outputs = model(**inputs)

        # 将输出移动到CPU并转换为numpy数组
        logits = outputs.detach().cpu().numpy()
        label_ids = labels.to('cpu').numpy()

        # 存储预测和真实标签
        predictions.append(logits)
        true_labels.append(label_ids)

# 计算性能指标
predictions = np.vstack(predictions)  # 将预测结果堆叠起来
true_labels = np.vstack(true_labels)  # 将真实标签堆叠起来

# 计算每个标签的预测值
pred_labels=np.zeros_like(predictions)
split=[0.5,0.5,0.5,0.5,0.5,0.5]
for i in range(len(split)):
    predictions[:,i]=predictions[:,i]/predictions[:,i].max()
    pred_labels[:,i]=(predictions[:,i] > split[i]).astype(int)

precision, recall, f1, _ = precision_recall_fscore_support(true_labels, pred_labels, average='micro')
accuracy = accuracy_score(true_labels, pred_labels)
roc_auc = roc_auc_score(true_labels, predictions, average='micro')

print("Accuracy:{:.4f} Precision:{:.4f} Recall:{:.4f} F1 Score:{:.4f} ROC AUC:{:.4f}".format(accuracy,precision,recall,f1,roc_auc))

list_name=['Confirmation','Factoid','List','Causal','Hypothetical','Complex']
for i in range(6):
# 计算指标
    precision, recall, f1, _ = precision_recall_fscore_support(true_labels[:,i], pred_labels[:,i], average='micro')
    accuracy = accuracy_score(true_labels[:,i], pred_labels[:,i])
    roc_auc = roc_auc_score(true_labels[:,i], predictions[:,i], average='micro')

    print("{:} Accuracy:{:.4f} Precision:{:.4f} Recall:{:.4f} F1 Score:{:.4f} ROC AUC:{:.4f}".format(list_name[i],accuracy,precision,recall,f1,roc_auc))

Accuracy:0.0000 Precision:0.2000 Recall:0.7282 F1 Score:0.3138 ROC AUC:0.3111
Confirmation Accuracy:0.1511 Precision:0.1511 Recall:0.1511 F1 Score:0.1511 ROC AUC:0.0627
Factoid Accuracy:0.7104 Precision:0.7104 Recall:0.7104 F1 Score:0.7104 ROC AUC:0.8150
List Accuracy:0.0325 Precision:0.0325 Recall:0.0325 F1 Score:0.0325 ROC AUC:0.2237
Causal Accuracy:0.9151 Precision:0.9151 Recall:0.9151 F1 Score:0.9151 ROC AUC:0.9663
Hypothetical Accuracy:0.0275 Precision:0.0275 Recall:0.0275 F1 Score:0.0275 ROC AUC:0.1828
Complex Accuracy:0.1124 Precision:0.1124 Recall:0.1124 F1 Score:0.1124 ROC AUC:0.5195


In [59]:
torch.save(model.state_dict(), 'model_state_dict_1.pth')

In [84]:
sentence="Who commanded the Prussian 12 Cavalry Brigade?"

sentence_encodings = tokenizer(sentence, truncation=True, padding=True, max_length=128)
sentence_seq = torch.tensor(sentence_encodings['input_ids'])
sentence_mask = torch.tensor(sentence_encodings['attention_mask'])
model.eval()
with torch.no_grad():
    model.cpu()
    inputs = {
            'input_ids':sentence_seq.unsqueeze(0),
            'attention_mask':sentence_mask.unsqueeze(0)
        }
    outputs = model(**inputs)
    logits = outputs.detach().cpu().numpy()
pred_labels=(logits > 0.5).astype(int)
print(pred_labels)

[[0 0 0 0 0 0]]
