In [1]:
import pandas as pd
data_path = "../data/open_source_8380.csv"  # 替換為你的檔案路徑
df = pd.read_csv(data_path)
df

Unnamed: 0,Inbound Message,Label
0,verified user details employee manager name ch...,0
1,hello team meetings skype meetings etc appeari...,0
2,cannot log vpn best,0
3,unable access tool page,0
4,skype error,0
...,...,...
8375,good afternoon receiving emails sent mail plea...,22
8376,telephony software issue,0
8377,vip windows password reset tifpdchb pedxruyf,0
8378,unable access machine utilities finish drawers...,44


# 十折驗證法

In [9]:
import torch
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(f"Using device: {device}") 
print(f"CUDA Available: {torch.cuda.is_available()}")
print(f"CUDA Device: {torch.cuda.get_device_name(0)}")
print(f"Current GPU Memory Allocated: {torch.cuda.memory_allocated() / 1e9} GB")
print(f"Current GPU Memory Cached: {torch.cuda.memory_reserved() / 1e9} GB")

Using device: cuda
CUDA Available: True
CUDA Device: NVIDIA GeForce RTX 3050 6GB Laptop GPU
Current GPU Memory Allocated: 2.309442048 GB
Current GPU Memory Cached: 5.385486336 GB


In [3]:
from transformers import DebertaTokenizer
from sklearn.model_selection import KFold
# 設定參數
MAX_LENGTH = 128
BATCH_SIZE = 16
EPOCHS = 20

# 初始化 BERT 的 tokenizer
tokenizer = DebertaTokenizer.from_pretrained('microsoft/deberta-base')

# 初始化數據
messages = df['Inbound Message'].tolist()
labels = df['Label'].tolist()

# KFold 初始化
kf = KFold(n_splits=10, shuffle=True, random_state=42)

In [4]:
from torch.utils.data import Dataset
class CustomDataset(Dataset):
    def __init__(self, messages, labels, tokenizer, max_length):
        self.messages = messages
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.messages)

    def __getitem__(self, idx):
        message = str(self.messages[idx])
        label = self.labels[idx]
        
        encoding = self.tokenizer.encode_plus(
            message,
            max_length=self.max_length,
            add_special_tokens=True,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

In [5]:
# 定義訓練與評估函數
def train_epoch(model, dataloader, optimizer, criterion):
    model.train()
    total_loss = 0

    for batch in tqdm(dataloader):
        optimizer.zero_grad()

        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()

        loss.backward()
        optimizer.step()
    
    return total_loss / len(dataloader)

In [6]:
def eval_model(model, dataloader, criterion):
    model.eval()
    total_loss = 0
    correct_predictions = 0

    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            loss = criterion(logits, labels)
            total_loss += loss.item()

            _, preds = torch.max(logits, dim=1)
            correct_predictions += torch.sum(preds == labels)
    
    accuracy = correct_predictions.double() / len(dataloader.dataset)
    return total_loss / len(dataloader), accuracy

In [7]:
from transformers import DebertaForSequenceClassification
from torch.utils.data import DataLoader
import torch
from torch.nn import CrossEntropyLoss
from torch.optim import AdamW
from tqdm import tqdm
import numpy as np

In [8]:
# 進行 10 折交叉驗證
fold_results = []

for fold, (train_idx, val_idx) in enumerate(kf.split(messages)):
    print(f"Fold {fold + 1}")

    # 折分數據
    train_messages = [messages[i] for i in train_idx]
    train_labels = [labels[i] for i in train_idx]
    val_messages = [messages[i] for i in val_idx]
    val_labels = [labels[i] for i in val_idx]

    # 建立 Dataset 和 DataLoader
    train_dataset = CustomDataset(
        messages=train_messages,
        labels=train_labels,
        tokenizer=tokenizer,
        max_length=MAX_LENGTH
    )
    val_dataset = CustomDataset(
        messages=val_messages,
        labels=val_labels,
        tokenizer=tokenizer,
        max_length=MAX_LENGTH
    )
    train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

    # 初始化模型
    NUM_LABELS = len(set(labels))
    model = DebertaForSequenceClassification.from_pretrained('microsoft/deberta-base', num_labels=NUM_LABELS)
    model.to(device)

    # 初始化優化器和損失函數
    optimizer = AdamW(model.parameters(), lr=5e-5)
    criterion = CrossEntropyLoss()

    # Early stopping 參數
    patience = 3  # 連續幾個 epoch 沒有提升就停止
    best_val_loss = float('inf')
    epochs_without_improvement = 0
    
    # 訓練模型
    for epoch in range(EPOCHS):
        print(f"Epoch {epoch + 1}/{EPOCHS}")
        train_loss = train_epoch(model, train_dataloader, optimizer, criterion)
        val_loss, val_accuracy = eval_model(model, val_dataloader, criterion)
        print(f"Train loss: {train_loss} | Val loss: {val_loss} | Val accuracy: {val_accuracy}")

        # Early Stopping 判斷
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_without_improvement = 0  # 重置計數
        else:
            epochs_without_improvement += 1  # 增加計數
        
        if epochs_without_improvement >= patience:
            print(f"Early stopping triggered at epoch {epoch + 1}")
            break  # 終止訓練

    # 保存每折的結果
    fold_results.append({"fold": fold + 1, "val_loss": val_loss, "val_accuracy": val_accuracy.item()})

# 計算總體平均結果
avg_val_loss = np.mean([result["val_loss"] for result in fold_results])
avg_val_accuracy = np.mean([result["val_accuracy"] for result in fold_results])

print(f"10-fold Cross-Validation Results:")
print(f"Average Validation Loss: {avg_val_loss}")
print(f"Average Validation Accuracy: {avg_val_accuracy}")

Fold 1


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 1.9885614679779036 | Val loss: 1.6822007883269832 | Val accuracy: 0.5894988066825776
Epoch 2/20


100%|██████████| 472/472 [04:40<00:00,  1.68it/s]


Train loss: 1.5072914554532302 | Val loss: 1.553063414288017 | Val accuracy: 0.6145584725536993
Epoch 3/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 1.223350696833962 | Val loss: 1.4861818627928787 | Val accuracy: 0.6288782816229117
Epoch 4/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 0.9834530279040337 | Val loss: 1.416076952556394 | Val accuracy: 0.6181384248210025
Epoch 5/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 0.7666825778245673 | Val loss: 1.562157395594525 | Val accuracy: 0.626491646778043
Epoch 6/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 0.5948934898064551 | Val loss: 1.611696996497658 | Val accuracy: 0.6276849642004774
Epoch 7/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 0.4754460053439489 | Val loss: 1.6691749284289918 | Val accuracy: 0.6467780429594272
Early stopping triggered at epoch 7
Fold 2


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 1.9984897008891833 | Val loss: 1.7430111912061583 | Val accuracy: 0.5835322195704058
Epoch 2/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 1.5001921017291182 | Val loss: 1.4999748502137527 | Val accuracy: 0.616945107398568
Epoch 3/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 1.2101029331906368 | Val loss: 1.4154850542545319 | Val accuracy: 0.6252983293556086
Epoch 4/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 0.9672615162523116 | Val loss: 1.4643432104362633 | Val accuracy: 0.6241050119331742
Epoch 5/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 0.7438960073957757 | Val loss: 1.5609365943467841 | Val accuracy: 0.6587112171837709
Epoch 6/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 0.5672973725013435 | Val loss: 1.59744345916892 | Val accuracy: 0.6384248210023866
Early stopping triggered at epoch 6
Fold 3


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 2.058634368671199 | Val loss: 1.792810028454043 | Val accuracy: 0.5859188544152745
Epoch 2/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 1.6596789741415088 | Val loss: 1.6513827814246125 | Val accuracy: 0.5966587112171838
Epoch 3/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 1.417825551798283 | Val loss: 1.5667397852213878 | Val accuracy: 0.6085918854415274
Epoch 4/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 1.198257873750339 | Val loss: 1.5180476707107615 | Val accuracy: 0.6252983293556086
Epoch 5/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 0.982068452794673 | Val loss: 1.4994987395574462 | Val accuracy: 0.6312649164677805
Epoch 6/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 0.7824407506071157 | Val loss: 1.532792811686138 | Val accuracy: 0.6503579952267303
Epoch 7/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 0.6495087053626776 | Val loss: 1.5729160601238035 | Val accuracy: 0.630071599045346
Epoch 8/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 0.5125761843617942 | Val loss: 1.7246006361718447 | Val accuracy: 0.6479713603818616
Early stopping triggered at epoch 8
Fold 4


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 472/472 [04:42<00:00,  1.67it/s]


Train loss: 1.9972626688116688 | Val loss: 1.6632614877988707 | Val accuracy: 0.5859188544152745
Epoch 2/20


100%|██████████| 472/472 [04:42<00:00,  1.67it/s]


Train loss: 1.5345532087198759 | Val loss: 1.439625693379708 | Val accuracy: 0.6014319809069213
Epoch 3/20


100%|██████████| 472/472 [04:41<00:00,  1.67it/s]


Train loss: 1.2334988300956913 | Val loss: 1.3584415474027958 | Val accuracy: 0.6241050119331742
Epoch 4/20


100%|██████████| 472/472 [04:42<00:00,  1.67it/s]


Train loss: 1.0041735174797348 | Val loss: 1.298585907468256 | Val accuracy: 0.6408114558472554
Epoch 5/20


100%|██████████| 472/472 [04:41<00:00,  1.67it/s]


Train loss: 0.7816488448909279 | Val loss: 1.45433479716193 | Val accuracy: 0.6431980906921241
Epoch 6/20


100%|██████████| 472/472 [04:41<00:00,  1.67it/s]


Train loss: 0.6021471143842249 | Val loss: 1.4402132444786575 | Val accuracy: 0.6551312649164678
Epoch 7/20


100%|██████████| 472/472 [04:41<00:00,  1.67it/s]


Train loss: 0.4808895410101671 | Val loss: 1.4681608210194785 | Val accuracy: 0.6694510739856803
Early stopping triggered at epoch 7
Fold 5


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 2.086793024282334 | Val loss: 1.7834377896110967 | Val accuracy: 0.5715990453460621
Epoch 2/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 1.6321311635991274 | Val loss: 1.6033673100876358 | Val accuracy: 0.594272076372315
Epoch 3/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 1.4285479386226605 | Val loss: 1.562267539073836 | Val accuracy: 0.60381861575179
Epoch 4/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 1.236784958675251 | Val loss: 1.5674147774588387 | Val accuracy: 0.6014319809069213
Epoch 5/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 1.0392455654376644 | Val loss: 1.5207667446361397 | Val accuracy: 0.630071599045346
Epoch 6/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 0.8621159159278465 | Val loss: 1.525531605729517 | Val accuracy: 0.616945107398568
Epoch 7/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 0.7115369449543246 | Val loss: 1.7449139570290189 | Val accuracy: 0.6205250596658711
Epoch 8/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 0.5641920625200589 | Val loss: 1.6651170731715437 | Val accuracy: 0.6336515513126492
Early stopping triggered at epoch 8
Fold 6


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 1.9523478687062101 | Val loss: 1.762871096718986 | Val accuracy: 0.5465393794749404
Epoch 2/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 1.5120113538104598 | Val loss: 1.5955248171428464 | Val accuracy: 0.568019093078759
Epoch 3/20


100%|██████████| 472/472 [04:41<00:00,  1.67it/s]


Train loss: 1.254189908378205 | Val loss: 1.5342495328975174 | Val accuracy: 0.5596658711217184
Epoch 4/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 1.0501735332911297 | Val loss: 1.5733180934528135 | Val accuracy: 0.5847255369928401
Epoch 5/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 0.8391678113553483 | Val loss: 1.6159810884943548 | Val accuracy: 0.5835322195704058
Epoch 6/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 0.6822454612386428 | Val loss: 1.581449492922369 | Val accuracy: 0.60381861575179
Early stopping triggered at epoch 6
Fold 7


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 1.9429707428661442 | Val loss: 1.5550868488707632 | Val accuracy: 0.594272076372315
Epoch 2/20


100%|██████████| 472/472 [04:41<00:00,  1.67it/s]


Train loss: 1.5051210300397064 | Val loss: 1.35119362765888 | Val accuracy: 0.6575178997613366
Epoch 3/20


100%|██████████| 472/472 [04:41<00:00,  1.67it/s]


Train loss: 1.2572052584866347 | Val loss: 2.4487808290517554 | Val accuracy: 0.6396181384248211
Epoch 4/20


100%|██████████| 472/472 [04:41<00:00,  1.67it/s]


Train loss: 1.319345289936005 | Val loss: 1.3622849797302823 | Val accuracy: 0.6682577565632458
Epoch 5/20


100%|██████████| 472/472 [04:42<00:00,  1.67it/s]


Train loss: 0.9801538523791705 | Val loss: 1.372130589665107 | Val accuracy: 0.6443914081145585
Early stopping triggered at epoch 5
Fold 8


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 1.9497756228734882 | Val loss: 1.7869092540920906 | Val accuracy: 0.568019093078759
Epoch 2/20


100%|██████████| 472/472 [04:41<00:00,  1.67it/s]


Train loss: 1.4979526310906572 | Val loss: 1.5127151765913334 | Val accuracy: 0.6145584725536993
Epoch 3/20


100%|██████████| 472/472 [04:41<00:00,  1.67it/s]


Train loss: 1.2127579645080082 | Val loss: 1.4356118058258633 | Val accuracy: 0.6109785202863962
Epoch 4/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 0.9648194558350212 | Val loss: 1.4865623791262788 | Val accuracy: 0.6527446300715991
Epoch 5/20


100%|██████████| 472/472 [04:41<00:00,  1.67it/s]


Train loss: 0.8050599541530897 | Val loss: 1.6077294034777947 | Val accuracy: 0.6205250596658711
Epoch 6/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 0.6377231063542225 | Val loss: 1.627506194249639 | Val accuracy: 0.6241050119331742
Early stopping triggered at epoch 6
Fold 9


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 1.9233439176075033 | Val loss: 1.6153233051300049 | Val accuracy: 0.594272076372315
Epoch 2/20


100%|██████████| 472/472 [04:41<00:00,  1.67it/s]


Train loss: 1.4719566772928683 | Val loss: 1.460752714355037 | Val accuracy: 0.6050119331742244
Epoch 3/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 1.2868367394400855 | Val loss: 1.3707598712084428 | Val accuracy: 0.636038186157518
Epoch 4/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 1.0009056447168527 | Val loss: 1.4057634427862347 | Val accuracy: 0.6455847255369929
Epoch 5/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 0.7893250682075523 | Val loss: 1.4107375336143206 | Val accuracy: 0.6599045346062052
Epoch 6/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 0.6172263075802791 | Val loss: 1.472796711719261 | Val accuracy: 0.6539379474940334
Early stopping triggered at epoch 6
Fold 10


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 472/472 [04:41<00:00,  1.67it/s]


Train loss: 2.19203003053948 | Val loss: 1.8008631647757765 | Val accuracy: 0.5847255369928401
Epoch 2/20


100%|██████████| 472/472 [04:41<00:00,  1.67it/s]


Train loss: 1.823643894266274 | Val loss: 1.6182346400224938 | Val accuracy: 0.6181384248210025
Epoch 3/20


100%|██████████| 472/472 [04:41<00:00,  1.67it/s]


Train loss: 1.592340607132952 | Val loss: 1.66537144780159 | Val accuracy: 0.6109785202863962
Epoch 4/20


100%|██████████| 472/472 [04:42<00:00,  1.67it/s]


Train loss: 1.3869655593850856 | Val loss: 1.5087267464061953 | Val accuracy: 0.6229116945107399
Epoch 5/20


100%|██████████| 472/472 [04:41<00:00,  1.67it/s]


Train loss: 1.1672083258123722 | Val loss: 1.4769325675267093 | Val accuracy: 0.6145584725536993
Epoch 6/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 1.5852778488296573 | Val loss: 2.427120213238698 | Val accuracy: 0.49642004773269693
Epoch 7/20


100%|██████████| 472/472 [04:41<00:00,  1.67it/s]


Train loss: 2.5137341492256877 | Val loss: 2.40699598924169 | Val accuracy: 0.49642004773269693
Epoch 8/20


100%|██████████| 472/472 [04:41<00:00,  1.68it/s]


Train loss: 2.5023574382066727 | Val loss: 2.4008536068898327 | Val accuracy: 0.49642004773269693
Early stopping triggered at epoch 8
10-fold Cross-Validation Results:
Average Validation Loss: 1.6579233513406986
Average Validation Accuracy: 0.6258949880668258
