In [3]:
import pandas as pd
data_path = "../data/open_source_8454_combine_short_description.csv"  # 替換為你的檔案路徑
df = pd.read_csv(data_path)
df

Unnamed: 0,Inbound Message,Label
0,login issue verified user details employee man...,0
1,outlook hello team meetings skype meetings etc...,0
2,cant log vpn cannot log vpn best,0
3,unable access tool page unable access tool page,0
4,skype error skype error,0
...,...,...
8449,emails coming mail good afternoon receiving em...,22
8450,telephony software issue telephony software issue,0
8451,vip windows password reset tifpdchb pedxruyf v...,0
8452,machine est funcionando unable access machine ...,44


# 十折驗證法

In [4]:
import torch
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(f"Using device: {device}") 
print(f"CUDA Available: {torch.cuda.is_available()}")
print(f"CUDA Device: {torch.cuda.get_device_name(0)}")
print(f"Current GPU Memory Allocated: {torch.cuda.memory_allocated() / 1e9} GB")
print(f"Current GPU Memory Cached: {torch.cuda.memory_reserved() / 1e9} GB")

Using device: cuda
CUDA Available: True
CUDA Device: NVIDIA GeForce RTX 3050 6GB Laptop GPU
Current GPU Memory Allocated: 0.0 GB
Current GPU Memory Cached: 0.0 GB


In [5]:
from transformers import BertTokenizer
from sklearn.model_selection import KFold
# 設定參數
MAX_LENGTH = 128
BATCH_SIZE = 16
EPOCHS = 20

# 初始化 BERT 的 tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# 初始化數據
messages = df['Inbound Message'].tolist()
labels = df['Label'].tolist()

# KFold 初始化
kf = KFold(n_splits=10, shuffle=True, random_state=42)

In [6]:
from torch.utils.data import Dataset
class CustomDataset(Dataset):
    def __init__(self, messages, labels, tokenizer, max_length):
        self.messages = messages
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.messages)

    def __getitem__(self, idx):
        message = str(self.messages[idx])
        label = self.labels[idx]
        
        encoding = self.tokenizer.encode_plus(
            message,
            max_length=self.max_length,
            add_special_tokens=True,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

In [7]:
# 定義訓練與評估函數
def train_epoch(model, dataloader, optimizer, criterion):
    model.train()
    total_loss = 0

    for batch in tqdm(dataloader):
        optimizer.zero_grad()

        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()

        loss.backward()
        optimizer.step()
    
    return total_loss / len(dataloader)

In [8]:
from sklearn.metrics import precision_score, recall_score, f1_score, balanced_accuracy_score, roc_auc_score
from sklearn.preprocessing import label_binarize
import numpy as np
import torch

def eval_model(model, dataloader, criterion, num_labels):
    model.eval()
    total_loss = 0
    all_labels = []
    all_preds = []
    all_probs = []
    
    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            loss = criterion(logits, labels)
            total_loss += loss.item()

            probs = torch.softmax(logits, dim=1)
            _, preds = torch.max(logits, dim=1)

            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())
            all_probs.extend(probs.cpu().numpy())

    val_loss = total_loss / len(dataloader)
    val_accuracy = (np.array(all_preds) == np.array(all_labels)).mean()
    precision = precision_score(all_labels, all_preds, average='weighted', zero_division=0)
    recall = recall_score(all_labels, all_preds, average='weighted', zero_division=0)
    f1 = f1_score(all_labels, all_preds, average='weighted', zero_division=0)
    macro_f1 = f1_score(all_labels, all_preds, average='macro', zero_division=0)
    balanced_acc = balanced_accuracy_score(all_labels, all_preds)

    # **檢查驗證集是否包含兩個以上的類別**
    unique_labels = np.unique(all_labels)
    if len(unique_labels) > 1:
        all_labels_binarized = label_binarize(all_labels, classes=np.arange(num_labels))
        try:
            macro_auc = roc_auc_score(all_labels_binarized, all_probs, multi_class='ovo', average='macro')
            weighted_auc = roc_auc_score(all_labels_binarized, all_probs, multi_class='ovo', average='weighted')
        except ValueError as e:
            print(f"ROC AUC 計算失敗: {e}")
            macro_auc = None
            weighted_auc = None
    else:
        print(f"跳過 ROC AUC 計算，因為驗證集中只有一個類別: {unique_labels}")
        macro_auc = None
        weighted_auc = None

    return {
        "val_loss": val_loss,
        "val_accuracy": val_accuracy,
        "precision": precision,
        "recall": recall,
        "f1": f1,
        "macro_f1": macro_f1,
        "balanced_accuracy": balanced_acc,
        "macro_auc": macro_auc,
        "weighted_auc": weighted_auc
    }


In [9]:
from sklearn.model_selection import KFold
from transformers import BertForSequenceClassification
from torch.utils.data import DataLoader
import torch
from torch.nn import CrossEntropyLoss
from torch.optim import AdamW
from tqdm import tqdm
import numpy as np

kf = KFold(n_splits=10, shuffle=True, random_state=42)

# 進行 10 折交叉驗證
fold_results = []

for fold, (train_idx, val_idx) in enumerate(kf.split(messages)):
    print(f"Fold {fold + 1}")

    train_messages = [messages[i] for i in train_idx]
    train_labels = [labels[i] for i in train_idx]
    val_messages = [messages[i] for i in val_idx]
    val_labels = [labels[i] for i in val_idx]

    train_dataset = CustomDataset(
        messages=train_messages,
        labels=train_labels,
        tokenizer=tokenizer,
        max_length=MAX_LENGTH
    )
    val_dataset = CustomDataset(
        messages=val_messages,
        labels=val_labels,
        tokenizer=tokenizer,
        max_length=MAX_LENGTH
    )
    train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

    NUM_LABELS = len(set(train_labels) | set(val_labels))  # 修正 NUM_LABELS 計算
    model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=NUM_LABELS)
    model.to(device)

    optimizer = AdamW(model.parameters(), lr=5e-5, weight_decay=1e-2)
    criterion = CrossEntropyLoss()

    patience = 3
    best_val_loss = float('inf')
    best_metrics = None
    epochs_without_improvement = 0
    
    for epoch in range(EPOCHS):
        print(f"Epoch {epoch + 1}/{EPOCHS}")
        train_loss = train_epoch(model, train_dataloader, optimizer, criterion)
        metrics = eval_model(model, val_dataloader, criterion, num_labels=NUM_LABELS)
        print(metrics)
        
        if metrics['val_loss'] < best_val_loss:
            best_val_loss = metrics['val_loss']
            best_metrics = metrics
            epochs_without_improvement = 0
        else:
            epochs_without_improvement += 1
        
        if epochs_without_improvement >= patience:
            print(f"Early stopping triggered at epoch {epoch + 1}")
            break
    
    fold_results.append(best_metrics)

# 確保 fold_results 不為空
if len(fold_results) > 0:
    avg_metrics = {metric: np.mean([result[metric] for result in fold_results if result[metric] is not None]) for metric in fold_results[0].keys()}
    print("10-fold Cross-Validation Results:")
    for metric, value in avg_metrics.items():
        print(f"{metric}: {value}")
else:
    print("交叉驗證未成功執行，fold_results 為空")


Fold 1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 476/476 [03:19<00:00,  2.38it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.6580788335710201, 'val_accuracy': 0.5910165484633569, 'precision': 0.42455818531370304, 'recall': 0.5910165484633569, 'f1': 0.4739585724757325, 'macro_f1': 0.09182643954932168, 'balanced_accuracy': 0.11180373872197664, 'macro_auc': None, 'weighted_auc': None}
Epoch 2/20


100%|██████████| 476/476 [03:20<00:00,  2.37it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.4263526774802298, 'val_accuracy': 0.6016548463356974, 'precision': 0.608756355781152, 'recall': 0.6016548463356974, 'f1': 0.5702482862884894, 'macro_f1': 0.2266477213687894, 'balanced_accuracy': 0.2578334313205209, 'macro_auc': None, 'weighted_auc': None}
Epoch 3/20


100%|██████████| 476/476 [03:21<00:00,  2.37it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.3080203724357318, 'val_accuracy': 0.6666666666666666, 'precision': 0.6337799187285923, 'recall': 0.6666666666666666, 'f1': 0.6277707038412302, 'macro_f1': 0.3061860447780919, 'balanced_accuracy': 0.30729855985174087, 'macro_auc': None, 'weighted_auc': None}
Epoch 4/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.3161248216089212, 'val_accuracy': 0.66548463356974, 'precision': 0.6408620545773867, 'recall': 0.66548463356974, 'f1': 0.6405458032150138, 'macro_f1': 0.3268837052823727, 'balanced_accuracy': 0.3302539339540627, 'macro_auc': None, 'weighted_auc': None}
Epoch 5/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.3680898940788124, 'val_accuracy': 0.6666666666666666, 'precision': 0.6379254266222352, 'recall': 0.6666666666666666, 'f1': 0.6362885604564257, 'macro_f1': 0.3137214976840335, 'balanced_accuracy': 0.3133966599790023, 'macro_auc': None, 'weighted_auc': None}
Epoch 6/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.3838179954942667, 'val_accuracy': 0.6761229314420804, 'precision': 0.6533854915794034, 'recall': 0.6761229314420804, 'f1': 0.6490655828447717, 'macro_f1': 0.354567830908175, 'balanced_accuracy': 0.35282056476009466, 'macro_auc': None, 'weighted_auc': None}
Early stopping triggered at epoch 6
Fold 2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.562162440902782, 'val_accuracy': 0.6146572104018913, 'precision': 0.4243845236476781, 'recall': 0.6146572104018913, 'f1': 0.4930528942394858, 'macro_f1': 0.10263549137786009, 'balanced_accuracy': 0.1180180763837967, 'macro_auc': None, 'weighted_auc': None}
Epoch 2/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.3103956844446794, 'val_accuracy': 0.6371158392434988, 'precision': 0.5801631660217138, 'recall': 0.6371158392434988, 'f1': 0.5816330984557815, 'macro_f1': 0.17999220417641332, 'balanced_accuracy': 0.18884503334151492, 'macro_auc': None, 'weighted_auc': None}
Epoch 3/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.2018907390675455, 'val_accuracy': 0.6773049645390071, 'precision': 0.6317207702262758, 'recall': 0.6773049645390071, 'f1': 0.6379441533170974, 'macro_f1': 0.2589144372064198, 'balanced_accuracy': 0.25124397117746217, 'macro_auc': None, 'weighted_auc': None}
Epoch 4/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.2892134529802035, 'val_accuracy': 0.6737588652482269, 'precision': 0.6609723324992962, 'recall': 0.6737588652482269, 'f1': 0.6465094323114133, 'macro_f1': 0.31069700541658213, 'balanced_accuracy': 0.32049314623327335, 'macro_auc': None, 'weighted_auc': None}
Epoch 5/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.238195166835245, 'val_accuracy': 0.6737588652482269, 'precision': 0.6711778297335632, 'recall': 0.6737588652482269, 'f1': 0.6514780400659493, 'macro_f1': 0.28794782405745484, 'balanced_accuracy': 0.28889860153371033, 'macro_auc': None, 'weighted_auc': None}
Epoch 6/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.428581637875089, 'val_accuracy': 0.6761229314420804, 'precision': 0.6741236267130988, 'recall': 0.6761229314420804, 'f1': 0.663741206751602, 'macro_f1': 0.32592921096036376, 'balanced_accuracy': 0.32683507138618356, 'macro_auc': None, 'weighted_auc': None}
Early stopping triggered at epoch 6
Fold 3


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.4891270601524498, 'val_accuracy': 0.6513002364066194, 'precision': 0.49448656914660616, 'recall': 0.6513002364066194, 'f1': 0.5538149155394396, 'macro_f1': 0.10669011618528267, 'balanced_accuracy': 0.1342144790950125, 'macro_auc': None, 'weighted_auc': None}
Epoch 2/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.3302068243611533, 'val_accuracy': 0.6843971631205674, 'precision': 0.6378875926843864, 'recall': 0.6843971631205674, 'f1': 0.6287322696386559, 'macro_f1': 0.20029425154067573, 'balanced_accuracy': 0.21320389748637503, 'macro_auc': None, 'weighted_auc': None}
Epoch 3/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.3532310105719656, 'val_accuracy': 0.6796690307328606, 'precision': 0.6361327025474126, 'recall': 0.6796690307328606, 'f1': 0.6423852078489383, 'macro_f1': 0.240792365905237, 'balanced_accuracy': 0.252592862655352, 'macro_auc': None, 'weighted_auc': None}
Epoch 4/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.2572681313415743, 'val_accuracy': 0.6903073286052009, 'precision': 0.6696775898165531, 'recall': 0.6903073286052009, 'f1': 0.6685759735277139, 'macro_f1': 0.297353605304868, 'balanced_accuracy': 0.30713783067346867, 'macro_auc': None, 'weighted_auc': None}
Epoch 5/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.2934088032200652, 'val_accuracy': 0.6832151300236406, 'precision': 0.6827512036841938, 'recall': 0.6832151300236406, 'f1': 0.6708437938566458, 'macro_f1': 0.31323167238969596, 'balanced_accuracy': 0.33273549425427235, 'macro_auc': None, 'weighted_auc': None}
Epoch 6/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.3543047814998987, 'val_accuracy': 0.6855791962174941, 'precision': 0.7135009483977663, 'recall': 0.6855791962174941, 'f1': 0.6758575737271469, 'macro_f1': 0.31758497009431047, 'balanced_accuracy': 0.3358653181667338, 'macro_auc': None, 'weighted_auc': None}
Epoch 7/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.3690216802763489, 'val_accuracy': 0.6867612293144209, 'precision': 0.7184329603241181, 'recall': 0.6867612293144209, 'f1': 0.6843230812839076, 'macro_f1': 0.3651212930994816, 'balanced_accuracy': 0.38885411455083535, 'macro_auc': None, 'weighted_auc': None}
Early stopping triggered at epoch 7
Fold 4


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.4854989962757759, 'val_accuracy': 0.6394799054373522, 'precision': 0.5293005863086633, 'recall': 0.6394799054373522, 'f1': 0.5478723524538789, 'macro_f1': 0.11591374724752648, 'balanced_accuracy': 0.1293360347065538, 'macro_auc': None, 'weighted_auc': None}
Epoch 2/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.3275996165455513, 'val_accuracy': 0.6453900709219859, 'precision': 0.6035383745856322, 'recall': 0.6453900709219859, 'f1': 0.5978734577716717, 'macro_f1': 0.2108012202702423, 'balanced_accuracy': 0.21953788766876026, 'macro_auc': None, 'weighted_auc': None}
Epoch 3/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.2306355271699294, 'val_accuracy': 0.6867612293144209, 'precision': 0.6783612458764681, 'recall': 0.6867612293144209, 'f1': 0.654479365417763, 'macro_f1': 0.26993084054543265, 'balanced_accuracy': 0.25558382845289984, 'macro_auc': None, 'weighted_auc': None}
Epoch 4/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.2255917149332334, 'val_accuracy': 0.6891252955082743, 'precision': 0.6687569224288511, 'recall': 0.6891252955082743, 'f1': 0.6696613714935375, 'macro_f1': 0.3092920809796456, 'balanced_accuracy': 0.2998617730446696, 'macro_auc': None, 'weighted_auc': None}
Epoch 5/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.3040869539638735, 'val_accuracy': 0.7056737588652482, 'precision': 0.691882489753912, 'recall': 0.7056737588652482, 'f1': 0.6839404347592827, 'macro_f1': 0.34077937576223, 'balanced_accuracy': 0.32952866180968476, 'macro_auc': None, 'weighted_auc': None}
Epoch 6/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.3194194523231038, 'val_accuracy': 0.7092198581560284, 'precision': 0.715897212589624, 'recall': 0.7092198581560284, 'f1': 0.6969084038369187, 'macro_f1': 0.4150221997168141, 'balanced_accuracy': 0.4152790855576987, 'macro_auc': None, 'weighted_auc': None}
Epoch 7/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.4724241467017047, 'val_accuracy': 0.6950354609929078, 'precision': 0.7106792274525345, 'recall': 0.6950354609929078, 'f1': 0.6894613941301169, 'macro_f1': 0.39193078517435387, 'balanced_accuracy': 0.41964350527993505, 'macro_auc': None, 'weighted_auc': None}
Early stopping triggered at epoch 7
Fold 5


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.5035894546868667, 'val_accuracy': 0.6082840236686391, 'precision': 0.4429051049821038, 'recall': 0.6082840236686391, 'f1': 0.5053699905708122, 'macro_f1': 0.10636221819707482, 'balanced_accuracy': 0.13281578749345288, 'macro_auc': None, 'weighted_auc': None}
Epoch 2/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.3488959850005384, 'val_accuracy': 0.642603550295858, 'precision': 0.5901900664771383, 'recall': 0.642603550295858, 'f1': 0.5891961526299245, 'macro_f1': 0.19524488381763053, 'balanced_accuracy': 0.2202311185280482, 'macro_auc': None, 'weighted_auc': None}
Epoch 3/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.2705351755304157, 'val_accuracy': 0.6698224852071006, 'precision': 0.6341271594907992, 'recall': 0.6698224852071006, 'f1': 0.6332010114710898, 'macro_f1': 0.25876087372827006, 'balanced_accuracy': 0.2645180917305897, 'macro_auc': None, 'weighted_auc': None}
Epoch 4/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.2675761990952041, 'val_accuracy': 0.6970414201183432, 'precision': 0.6514261466940993, 'recall': 0.6970414201183432, 'f1': 0.653924986055792, 'macro_f1': 0.29010032133966385, 'balanced_accuracy': 0.2809460584952409, 'macro_auc': None, 'weighted_auc': None}
Epoch 5/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.3283831927011598, 'val_accuracy': 0.6982248520710059, 'precision': 0.6690291287078797, 'recall': 0.6982248520710059, 'f1': 0.6660366198146727, 'macro_f1': 0.339788489961774, 'balanced_accuracy': 0.34298026173061663, 'macro_auc': None, 'weighted_auc': None}
Epoch 6/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.298721285361164, 'val_accuracy': 0.6816568047337278, 'precision': 0.7263689974449555, 'recall': 0.6816568047337278, 'f1': 0.6798371682166515, 'macro_f1': 0.371075335069558, 'balanced_accuracy': 0.3808422375170966, 'macro_auc': None, 'weighted_auc': None}
Epoch 7/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.3186408804272705, 'val_accuracy': 0.6958579881656805, 'precision': 0.6876453897569537, 'recall': 0.6958579881656805, 'f1': 0.6709685640153508, 'macro_f1': 0.3755784598276157, 'balanced_accuracy': 0.3731473970561964, 'macro_auc': None, 'weighted_auc': None}
Early stopping triggered at epoch 7
Fold 6


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.5935835410963815, 'val_accuracy': 0.5964497041420118, 'precision': 0.4236738695381866, 'recall': 0.5964497041420118, 'f1': 0.4766780696936638, 'macro_f1': 0.10190188116138886, 'balanced_accuracy': 0.116995145407774, 'macro_auc': None, 'weighted_auc': None}
Epoch 2/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.3467951793715638, 'val_accuracy': 0.6284023668639053, 'precision': 0.541503974782779, 'recall': 0.6284023668639053, 'f1': 0.5587287959427448, 'macro_f1': 0.19654199163480826, 'balanced_accuracy': 0.19820229961813113, 'macro_auc': None, 'weighted_auc': None}
Epoch 3/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.2794485249609318, 'val_accuracy': 0.6497041420118344, 'precision': 0.6430023763615147, 'recall': 0.6497041420118344, 'f1': 0.6315896813844067, 'macro_f1': 0.340979162412008, 'balanced_accuracy': 0.35692134400018033, 'macro_auc': None, 'weighted_auc': None}
Epoch 4/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.3001602632257174, 'val_accuracy': 0.676923076923077, 'precision': 0.6525842910232049, 'recall': 0.676923076923077, 'f1': 0.6487871732291481, 'macro_f1': 0.34685007201817625, 'balanced_accuracy': 0.3712874170403987, 'macro_auc': None, 'weighted_auc': None}
Epoch 5/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.4394887421491012, 'val_accuracy': 0.650887573964497, 'precision': 0.6676716593077502, 'recall': 0.650887573964497, 'f1': 0.6445418362461705, 'macro_f1': 0.33736928757563617, 'balanced_accuracy': 0.3664320566289133, 'macro_auc': None, 'weighted_auc': None}
Epoch 6/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.3831289728295129, 'val_accuracy': 0.6816568047337278, 'precision': 0.6651441324446892, 'recall': 0.6816568047337278, 'f1': 0.6591643162491944, 'macro_f1': 0.3646166795898697, 'balanced_accuracy': 0.38373340096582687, 'macro_auc': None, 'weighted_auc': None}
Early stopping triggered at epoch 6
Fold 7


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.4321529471649315, 'val_accuracy': 0.6497041420118344, 'precision': 0.5088102991665892, 'recall': 0.6497041420118344, 'f1': 0.5532855797471727, 'macro_f1': 0.12979437075710565, 'balanced_accuracy': 0.1384006333829029, 'macro_auc': None, 'weighted_auc': None}
Epoch 2/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.2468176191707827, 'val_accuracy': 0.6603550295857988, 'precision': 0.5972698765316244, 'recall': 0.6603550295857988, 'f1': 0.6144462244040695, 'macro_f1': 0.21938227768578783, 'balanced_accuracy': 0.2320150528483862, 'macro_auc': None, 'weighted_auc': None}
Epoch 3/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.1961804870164618, 'val_accuracy': 0.6863905325443787, 'precision': 0.668368395247207, 'recall': 0.6863905325443787, 'f1': 0.6573265669537093, 'macro_f1': 0.3019983985372971, 'balanced_accuracy': 0.31897617148993873, 'macro_auc': None, 'weighted_auc': None}
Epoch 4/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.2101147242312162, 'val_accuracy': 0.672189349112426, 'precision': 0.7028151358230015, 'recall': 0.672189349112426, 'f1': 0.6678201875290908, 'macro_f1': 0.32246884686023675, 'balanced_accuracy': 0.373661875285363, 'macro_auc': None, 'weighted_auc': None}
Epoch 5/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.2520786878072991, 'val_accuracy': 0.6781065088757396, 'precision': 0.6962602734006167, 'recall': 0.6781065088757396, 'f1': 0.6678782217754363, 'macro_f1': 0.3166153967124425, 'balanced_accuracy': 0.33735949012127575, 'macro_auc': None, 'weighted_auc': None}
Epoch 6/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.245207038128151, 'val_accuracy': 0.7029585798816568, 'precision': 0.6973511347416619, 'recall': 0.7029585798816568, 'f1': 0.682647220152048, 'macro_f1': 0.37924134484917343, 'balanced_accuracy': 0.39687053696315283, 'macro_auc': None, 'weighted_auc': None}
Early stopping triggered at epoch 6
Fold 8


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.6297770030093643, 'val_accuracy': 0.6130177514792899, 'precision': 0.44664096112829504, 'recall': 0.6130177514792899, 'f1': 0.5010679461063705, 'macro_f1': 0.12349752919283934, 'balanced_accuracy': 0.13241955699440863, 'macro_auc': None, 'weighted_auc': None}
Epoch 2/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.4294184823081177, 'val_accuracy': 0.6366863905325444, 'precision': 0.5812640521842435, 'recall': 0.6366863905325444, 'f1': 0.57790728439552, 'macro_f1': 0.18599630877170498, 'balanced_accuracy': 0.20392006724370199, 'macro_auc': None, 'weighted_auc': None}
Epoch 3/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.3874059724357892, 'val_accuracy': 0.6662721893491125, 'precision': 0.6457305151424159, 'recall': 0.6662721893491125, 'f1': 0.6310966872388274, 'macro_f1': 0.31868515857635643, 'balanced_accuracy': 0.31003338522214646, 'macro_auc': None, 'weighted_auc': None}
Epoch 4/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.357492300699342, 'val_accuracy': 0.6674556213017752, 'precision': 0.6381324698001747, 'recall': 0.6674556213017752, 'f1': 0.6326557057232273, 'macro_f1': 0.2869981018591528, 'balanced_accuracy': 0.2982755953079982, 'macro_auc': None, 'weighted_auc': None}
Epoch 5/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.3811524926491503, 'val_accuracy': 0.6627218934911243, 'precision': 0.6778172687740419, 'recall': 0.6627218934911243, 'f1': 0.6564600457986345, 'macro_f1': 0.3672148951504222, 'balanced_accuracy': 0.37938630905105575, 'macro_auc': None, 'weighted_auc': None}
Epoch 6/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.465057649702396, 'val_accuracy': 0.6733727810650888, 'precision': 0.6859075224494742, 'recall': 0.6733727810650888, 'f1': 0.666316711677255, 'macro_f1': 0.41223131780290095, 'balanced_accuracy': 0.4592668503671513, 'macro_auc': None, 'weighted_auc': None}
Epoch 7/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.5700558688280717, 'val_accuracy': 0.6733727810650888, 'precision': 0.6677811899982526, 'recall': 0.6733727810650888, 'f1': 0.6527021541357985, 'macro_f1': 0.3848039521321532, 'balanced_accuracy': 0.4073975249821727, 'macro_auc': None, 'weighted_auc': None}
Early stopping triggered at epoch 7
Fold 9


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.6824230428011913, 'val_accuracy': 0.5834319526627219, 'precision': 0.41941093547214625, 'recall': 0.5834319526627219, 'f1': 0.45980168157381296, 'macro_f1': 0.09802269335200009, 'balanced_accuracy': 0.12461744541111235, 'macro_auc': None, 'weighted_auc': None}
Epoch 2/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.4811925837453805, 'val_accuracy': 0.6059171597633136, 'precision': 0.5114256690714123, 'recall': 0.6059171597633136, 'f1': 0.5138160234068853, 'macro_f1': 0.14948702271850076, 'balanced_accuracy': 0.1674023600011366, 'macro_auc': None, 'weighted_auc': None}
Epoch 3/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.2693900139826648, 'val_accuracy': 0.650887573964497, 'precision': 0.6288778484776801, 'recall': 0.650887573964497, 'f1': 0.6211523088406421, 'macro_f1': 0.28575525311420175, 'balanced_accuracy': 0.30990643007414326, 'macro_auc': None, 'weighted_auc': None}
Epoch 4/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.3245448742835026, 'val_accuracy': 0.6710059171597633, 'precision': 0.6294790990112209, 'recall': 0.6710059171597633, 'f1': 0.6294089011263491, 'macro_f1': 0.31583906895957775, 'balanced_accuracy': 0.3121750161586908, 'macro_auc': None, 'weighted_auc': None}
Epoch 5/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.3746779426088873, 'val_accuracy': 0.6520710059171597, 'precision': 0.6696979863808731, 'recall': 0.6520710059171597, 'f1': 0.6377370754847319, 'macro_f1': 0.33976550320396415, 'balanced_accuracy': 0.3660279400534616, 'macro_auc': None, 'weighted_auc': None}
Epoch 6/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.3805130146584421, 'val_accuracy': 0.6674556213017752, 'precision': 0.6817872098723256, 'recall': 0.6674556213017752, 'f1': 0.6613460092408325, 'macro_f1': 0.4166517997082136, 'balanced_accuracy': 0.4324801194591292, 'macro_auc': None, 'weighted_auc': None}
Early stopping triggered at epoch 6
Fold 10


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 476/476 [03:21<00:00,  2.37it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.8178043230524603, 'val_accuracy': 0.5491124260355029, 'precision': 0.3471619042546274, 'recall': 0.5491124260355029, 'f1': 0.4136826510439275, 'macro_f1': 0.0665212876371024, 'balanced_accuracy': 0.07593366811700433, 'macro_auc': None, 'weighted_auc': None}
Epoch 2/20


100%|██████████| 476/476 [03:21<00:00,  2.37it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.4091702069876328, 'val_accuracy': 0.6236686390532544, 'precision': 0.558326100449589, 'recall': 0.6236686390532544, 'f1': 0.5624497271491525, 'macro_f1': 0.19646832715559692, 'balanced_accuracy': 0.2115555732784325, 'macro_auc': None, 'weighted_auc': None}
Epoch 3/20


100%|██████████| 476/476 [03:21<00:00,  2.37it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.2657367424020227, 'val_accuracy': 0.6781065088757396, 'precision': 0.626491543673788, 'recall': 0.6781065088757396, 'f1': 0.6327077900590798, 'macro_f1': 0.3017515213442337, 'balanced_accuracy': 0.311745729653998, 'macro_auc': None, 'weighted_auc': None}
Epoch 4/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.4228974249003068, 'val_accuracy': 0.6449704142011834, 'precision': 0.6177506841191052, 'recall': 0.6449704142011834, 'f1': 0.6005496280429132, 'macro_f1': 0.30071202333438757, 'balanced_accuracy': 0.2990325588733675, 'macro_auc': None, 'weighted_auc': None}
Epoch 5/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.3944833447348397, 'val_accuracy': 0.6615384615384615, 'precision': 0.6387406538539622, 'recall': 0.6615384615384615, 'f1': 0.6307347252642909, 'macro_f1': 0.31318845734551903, 'balanced_accuracy': 0.34304192284493235, 'macro_auc': None, 'weighted_auc': None}
Epoch 6/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


ROC AUC 計算失敗: Only one class present in y_true. ROC AUC score is not defined in that case.
{'val_loss': 1.405953770538546, 'val_accuracy': 0.6733727810650888, 'precision': 0.6627184330390584, 'recall': 0.6733727810650888, 'f1': 0.6532857885737835, 'macro_f1': 0.3610852726215593, 'balanced_accuracy': 0.3817911576662932, 'macro_auc': None, 'weighted_auc': None}
Early stopping triggered at epoch 6
10-fold Cross-Validation Results:
val_loss: 1.2628595225934713
val_accuracy: 0.6752990054135718
precision: 0.6460233981454737
recall: 0.6752990054135718
f1: 0.6433309241196437
macro_f1: 0.29793289268755824
balanced_accuracy: 0.30423134637688404
macro_auc: nan
weighted_auc: nan


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [10]:
fold_results

[{'val_loss': 1.3080203724357318,
  'val_accuracy': 0.6666666666666666,
  'precision': 0.6337799187285923,
  'recall': 0.6666666666666666,
  'f1': 0.6277707038412302,
  'macro_f1': 0.3061860447780919,
  'balanced_accuracy': 0.30729855985174087,
  'macro_auc': None,
  'weighted_auc': None},
 {'val_loss': 1.2018907390675455,
  'val_accuracy': 0.6773049645390071,
  'precision': 0.6317207702262758,
  'recall': 0.6773049645390071,
  'f1': 0.6379441533170974,
  'macro_f1': 0.2589144372064198,
  'balanced_accuracy': 0.25124397117746217,
  'macro_auc': None,
  'weighted_auc': None},
 {'val_loss': 1.2572681313415743,
  'val_accuracy': 0.6903073286052009,
  'precision': 0.6696775898165531,
  'recall': 0.6903073286052009,
  'f1': 0.6685759735277139,
  'macro_f1': 0.297353605304868,
  'balanced_accuracy': 0.30713783067346867,
  'macro_auc': None,
  'weighted_auc': None},
 {'val_loss': 1.2255917149332334,
  'val_accuracy': 0.6891252955082743,
  'precision': 0.6687569224288511,
  'recall': 0.6891252