In [1]:
import pandas as pd
data_path = "../data/open_source_8454_combine_short_description.csv"  # 替換為你的檔案路徑
df = pd.read_csv(data_path)
df

Unnamed: 0,Inbound Message,Label
0,login issue verified user details employee man...,0
1,outlook hello team meetings skype meetings etc...,0
2,cant log vpn cannot log vpn best,0
3,unable access tool page unable access tool page,0
4,skype error skype error,0
...,...,...
8449,emails coming mail good afternoon receiving em...,22
8450,telephony software issue telephony software issue,0
8451,vip windows password reset tifpdchb pedxruyf v...,0
8452,machine est funcionando unable access machine ...,44


# 十折驗證法

In [2]:
import torch
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(f"Using device: {device}") 
print(f"CUDA Available: {torch.cuda.is_available()}")
print(f"CUDA Device: {torch.cuda.get_device_name(0)}")
print(f"Current GPU Memory Allocated: {torch.cuda.memory_allocated() / 1e9} GB")
print(f"Current GPU Memory Cached: {torch.cuda.memory_reserved() / 1e9} GB")

Using device: cuda
CUDA Available: True
CUDA Device: NVIDIA GeForce RTX 3050 6GB Laptop GPU
Current GPU Memory Allocated: 0.0 GB
Current GPU Memory Cached: 0.0 GB


In [3]:
from transformers import DebertaTokenizer
from sklearn.model_selection import KFold
# 設定參數
MAX_LENGTH = 128
BATCH_SIZE = 16
EPOCHS = 20

# 初始化 BERT 的 tokenizer
tokenizer = DebertaTokenizer.from_pretrained('microsoft/deberta-base')

# 初始化數據
messages = df['Inbound Message'].tolist()
labels = df['Label'].tolist()

# KFold 初始化
kf = KFold(n_splits=10, shuffle=True, random_state=42)

In [4]:
from torch.utils.data import Dataset
class CustomDataset(Dataset):
    def __init__(self, messages, labels, tokenizer, max_length):
        self.messages = messages
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.messages)

    def __getitem__(self, idx):
        message = str(self.messages[idx])
        label = self.labels[idx]
        
        encoding = self.tokenizer.encode_plus(
            message,
            max_length=self.max_length,
            add_special_tokens=True,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

In [5]:
# 定義訓練與評估函數
def train_epoch(model, dataloader, optimizer, criterion):
    model.train()
    total_loss = 0

    for batch in tqdm(dataloader):
        optimizer.zero_grad()

        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()

        loss.backward()
        optimizer.step()
    
    return total_loss / len(dataloader)

In [6]:
def eval_model(model, dataloader, criterion):
    model.eval()
    total_loss = 0
    correct_predictions = 0

    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            loss = criterion(logits, labels)
            total_loss += loss.item()

            _, preds = torch.max(logits, dim=1)
            correct_predictions += torch.sum(preds == labels)
    
    accuracy = correct_predictions.double() / len(dataloader.dataset)
    return total_loss / len(dataloader), accuracy

In [7]:
from transformers import DebertaForSequenceClassification
from torch.utils.data import DataLoader
import torch
from torch.nn import CrossEntropyLoss
from torch.optim import AdamW
from tqdm import tqdm
import numpy as np

In [9]:
# 進行 10 折交叉驗證
fold_results = []

for fold, (train_idx, val_idx) in enumerate(kf.split(messages)):
    print(f"Fold {fold + 1}")

    # 折分數據
    train_messages = [messages[i] for i in train_idx]
    train_labels = [labels[i] for i in train_idx]
    val_messages = [messages[i] for i in val_idx]
    val_labels = [labels[i] for i in val_idx]

    # 建立 Dataset 和 DataLoader
    train_dataset = CustomDataset(
        messages=train_messages,
        labels=train_labels,
        tokenizer=tokenizer,
        max_length=MAX_LENGTH
    )
    val_dataset = CustomDataset(
        messages=val_messages,
        labels=val_labels,
        tokenizer=tokenizer,
        max_length=MAX_LENGTH
    )
    train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

    # 初始化模型
    NUM_LABELS = len(set(labels))
    model = DebertaForSequenceClassification.from_pretrained('microsoft/deberta-base', num_labels=NUM_LABELS)
    model.to(device)

    # 初始化優化器和損失函數
    optimizer = AdamW(model.parameters(), lr=5e-5)
    criterion = CrossEntropyLoss()

    # Early stopping 參數
    patience = 3  # 連續幾個 epoch 沒有提升就停止
    best_val_loss = float('inf')
    epochs_without_improvement = 0
    
    # 訓練模型
    for epoch in range(EPOCHS):
        print(f"Epoch {epoch + 1}/{EPOCHS}")
        train_loss = train_epoch(model, train_dataloader, optimizer, criterion)
        val_loss, val_accuracy = eval_model(model, val_dataloader, criterion)
        print(f"Train loss: {train_loss} | Val loss: {val_loss} | Val accuracy: {val_accuracy}")

        # Early Stopping 判斷
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_without_improvement = 0  # 重置計數
        else:
            epochs_without_improvement += 1  # 增加計數
        
        if epochs_without_improvement >= patience:
            print(f"Early stopping triggered at epoch {epoch + 1}")
            break  # 終止訓練

    # 保存每折的結果
    fold_results.append({"fold": fold + 1, "val_loss": val_loss, "val_accuracy": val_accuracy.item()})

# 計算總體平均結果
avg_val_loss = np.mean([result["val_loss"] for result in fold_results])
avg_val_accuracy = np.mean([result["val_accuracy"] for result in fold_results])

print(f"10-fold Cross-Validation Results:")
print(f"Average Validation Loss: {avg_val_loss}")
print(f"Average Validation Accuracy: {avg_val_accuracy}")

Fold 1


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 1.8653552874046213 | Val loss: 1.7109463484782093 | Val accuracy: 0.5803782505910166
Epoch 2/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 1.3318480672079975 | Val loss: 1.3846967135960202 | Val accuracy: 0.6193853427895981
Epoch 3/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 1.0345640858056164 | Val loss: 1.3397872262405899 | Val accuracy: 0.6513002364066194
Epoch 4/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 0.7996330928495702 | Val loss: 1.2612448750801806 | Val accuracy: 0.6501182033096926
Epoch 5/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 0.6270525293393421 | Val loss: 1.43003252767167 | Val accuracy: 0.652482269503546
Epoch 6/20


100%|██████████| 476/476 [04:40<00:00,  1.69it/s]


Train loss: 0.47373153394706424 | Val loss: 1.467323197508758 | Val accuracy: 0.6572104018912529
Epoch 7/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 0.380076784523157 | Val loss: 1.4804537734895382 | Val accuracy: 0.6477541371158392
Early stopping triggered at epoch 7
Fold 2


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 476/476 [04:40<00:00,  1.69it/s]


Train loss: 1.9949667023009612 | Val loss: 1.72453739508143 | Val accuracy: 0.5721040189125295
Epoch 2/20


100%|██████████| 476/476 [04:40<00:00,  1.69it/s]


Train loss: 1.5319597968409042 | Val loss: 1.4526040852069855 | Val accuracy: 0.5862884160756501
Epoch 3/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 1.3369506113231182 | Val loss: 1.3174021379002985 | Val accuracy: 0.6347517730496454
Epoch 4/20


100%|██████████| 476/476 [04:40<00:00,  1.69it/s]


Train loss: 1.1661249988970637 | Val loss: 1.2768035013720673 | Val accuracy: 0.640661938534279
Epoch 5/20


100%|██████████| 476/476 [04:40<00:00,  1.70it/s]


Train loss: 1.0501905401088611 | Val loss: 1.2281823796483706 | Val accuracy: 0.6536643026004728
Epoch 6/20


100%|██████████| 476/476 [04:40<00:00,  1.69it/s]


Train loss: 0.8776734398380548 | Val loss: 1.2421310932006475 | Val accuracy: 0.6560283687943262
Epoch 7/20


100%|██████████| 476/476 [04:40<00:00,  1.69it/s]


Train loss: 0.7118483597271833 | Val loss: 1.3122689875791658 | Val accuracy: 0.648936170212766
Epoch 8/20


100%|██████████| 476/476 [04:40<00:00,  1.69it/s]


Train loss: 0.563695935575495 | Val loss: 1.343620888748259 | Val accuracy: 0.6619385342789598
Early stopping triggered at epoch 8
Fold 3


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 1.9904550614477206 | Val loss: 1.5636755959042963 | Val accuracy: 0.6182033096926713
Epoch 2/20


100%|██████████| 476/476 [04:40<00:00,  1.69it/s]


Train loss: 1.4634702519959761 | Val loss: 1.3113483606644396 | Val accuracy: 0.6607565011820331
Epoch 3/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 1.1364429907388045 | Val loss: 1.2264929725876395 | Val accuracy: 0.6867612293144207
Epoch 4/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 0.9010151361091798 | Val loss: 1.1969191797499388 | Val accuracy: 0.7009456264775413
Epoch 5/20


100%|██████████| 476/476 [04:40<00:00,  1.69it/s]


Train loss: 0.6824426068102613 | Val loss: 1.219733958817878 | Val accuracy: 0.6903073286052009
Epoch 6/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 0.510739767689164 | Val loss: 1.3929968829143722 | Val accuracy: 0.7056737588652482
Epoch 7/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 0.5664146877316555 | Val loss: 1.4157708083050993 | Val accuracy: 0.6867612293144207
Early stopping triggered at epoch 7
Fold 4


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 476/476 [04:40<00:00,  1.70it/s]


Train loss: 1.92318154683634 | Val loss: 1.5196260489382833 | Val accuracy: 0.6229314420803782
Epoch 2/20


100%|██████████| 476/476 [04:40<00:00,  1.69it/s]


Train loss: 1.409718207263646 | Val loss: 1.3165484953601405 | Val accuracy: 0.6359338061465721
Epoch 3/20


100%|██████████| 476/476 [04:40<00:00,  1.70it/s]


Train loss: 1.098381249938442 | Val loss: 1.2825724122659214 | Val accuracy: 0.6560283687943262
Epoch 4/20


100%|██████████| 476/476 [04:40<00:00,  1.70it/s]


Train loss: 0.8806072630363853 | Val loss: 1.2656869922044143 | Val accuracy: 0.6808510638297872
Epoch 5/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 1.160417557827064 | Val loss: 1.4250667963950139 | Val accuracy: 0.648936170212766
Epoch 6/20


100%|██████████| 476/476 [04:40<00:00,  1.69it/s]


Train loss: 0.7493930776924396 | Val loss: 1.2659104795388456 | Val accuracy: 0.6784869976359338
Epoch 7/20


100%|██████████| 476/476 [04:40<00:00,  1.69it/s]


Train loss: 0.5306829747376072 | Val loss: 1.3995142177028477 | Val accuracy: 0.6796690307328606
Early stopping triggered at epoch 7
Fold 5


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 1.8862357874627875 | Val loss: 1.647756574288854 | Val accuracy: 0.6011834319526628
Epoch 2/20


100%|██████████| 476/476 [04:40<00:00,  1.69it/s]


Train loss: 1.364721967329999 | Val loss: 1.5223928853025976 | Val accuracy: 0.6366863905325444
Epoch 3/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 1.0579847119936423 | Val loss: 1.2541757799544424 | Val accuracy: 0.6840236686390533
Epoch 4/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 0.7779969420722302 | Val loss: 1.3231247832190316 | Val accuracy: 0.6828402366863905
Epoch 5/20


100%|██████████| 476/476 [04:40<00:00,  1.69it/s]


Train loss: 0.6055723577268234 | Val loss: 1.3099008989783953 | Val accuracy: 0.6662721893491125
Epoch 6/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 0.4572472311478077 | Val loss: 1.3714864623715293 | Val accuracy: 0.7041420118343196
Early stopping triggered at epoch 6
Fold 6


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 2.1171895440887 | Val loss: 1.859461913693626 | Val accuracy: 0.5562130177514792
Epoch 2/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 1.778795372538206 | Val loss: 1.7305743356920638 | Val accuracy: 0.5692307692307692
Epoch 3/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 1.6394243420673018 | Val loss: 1.7243174269514263 | Val accuracy: 0.5609467455621302
Epoch 4/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 1.4547646655505444 | Val loss: 1.57111105828915 | Val accuracy: 0.5739644970414202
Epoch 5/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 1.2572649222712557 | Val loss: 1.4727211875735589 | Val accuracy: 0.5775147928994083
Epoch 6/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 1.1014294993414098 | Val loss: 1.5382670748908565 | Val accuracy: 0.5905325443786983
Epoch 7/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 1.003320847301423 | Val loss: 1.4762387590588264 | Val accuracy: 0.6248520710059172
Epoch 8/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 0.8586933775609281 | Val loss: 1.4752513670696403 | Val accuracy: 0.6011834319526628
Early stopping triggered at epoch 8
Fold 7


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 1.9042330946491546 | Val loss: 1.4235232348711986 | Val accuracy: 0.6307692307692307
Epoch 2/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 1.347606899701998 | Val loss: 1.245882382370391 | Val accuracy: 0.6579881656804734
Epoch 3/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 1.0316536953719724 | Val loss: 1.177515390908943 | Val accuracy: 0.6662721893491125
Epoch 4/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 0.810704786706121 | Val loss: 1.212229849032636 | Val accuracy: 0.6674556213017752
Epoch 5/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 0.6184193714357474 | Val loss: 1.2158848305918135 | Val accuracy: 0.676923076923077
Epoch 6/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 0.46806671825714974 | Val loss: 1.3880522695352446 | Val accuracy: 0.6532544378698225
Early stopping triggered at epoch 6
Fold 8


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 1.9650476533575219 | Val loss: 1.7566704243983862 | Val accuracy: 0.5905325443786983
Epoch 2/20


100%|██████████| 476/476 [04:40<00:00,  1.69it/s]


Train loss: 1.4456724803738232 | Val loss: 1.4917556053062655 | Val accuracy: 0.6094674556213018
Epoch 3/20


100%|██████████| 476/476 [04:40<00:00,  1.69it/s]


Train loss: 1.129846598608654 | Val loss: 1.2908084853640143 | Val accuracy: 0.6437869822485207
Epoch 4/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 0.9385906303254496 | Val loss: 1.3555192801187623 | Val accuracy: 0.6627218934911243
Epoch 5/20


100%|██████████| 476/476 [04:40<00:00,  1.69it/s]


Train loss: 0.7237115700830933 | Val loss: 1.274376223672111 | Val accuracy: 0.6828402366863905
Epoch 6/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 0.5516212441481086 | Val loss: 1.382941577794417 | Val accuracy: 0.6591715976331362
Epoch 7/20


100%|██████████| 476/476 [04:40<00:00,  1.69it/s]


Train loss: 0.439551411474421 | Val loss: 1.4116903132987473 | Val accuracy: 0.6650887573964497
Epoch 8/20


100%|██████████| 476/476 [04:40<00:00,  1.70it/s]


Train loss: 0.3750127069691212 | Val loss: 1.4632670328302204 | Val accuracy: 0.6710059171597633
Early stopping triggered at epoch 8
Fold 9


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 476/476 [04:40<00:00,  1.69it/s]


Train loss: 2.595615477121177 | Val loss: 2.558087911245958 | Val accuracy: 0.4733727810650888
Epoch 2/20


100%|██████████| 476/476 [04:40<00:00,  1.70it/s]


Train loss: 2.503430057974423 | Val loss: 2.553018533958579 | Val accuracy: 0.4733727810650888
Epoch 3/20


100%|██████████| 476/476 [04:40<00:00,  1.70it/s]


Train loss: 2.494026333844962 | Val loss: 2.5545258971880065 | Val accuracy: 0.4733727810650888
Epoch 4/20


100%|██████████| 476/476 [04:40<00:00,  1.69it/s]


Train loss: 2.4561459199220192 | Val loss: 2.5754192595211967 | Val accuracy: 0.4733727810650888
Epoch 5/20


100%|██████████| 476/476 [04:40<00:00,  1.70it/s]


Train loss: 2.434401922616638 | Val loss: 2.6156182311615854 | Val accuracy: 0.4733727810650888
Early stopping triggered at epoch 5
Fold 10


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 1.9688776903042273 | Val loss: 1.8330117542788666 | Val accuracy: 0.5289940828402367
Epoch 2/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 1.5076275589335866 | Val loss: 1.5366779037241667 | Val accuracy: 0.5976331360946746
Epoch 3/20


100%|██████████| 476/476 [04:40<00:00,  1.69it/s]


Train loss: 1.2229755282902919 | Val loss: 1.353169062227573 | Val accuracy: 0.621301775147929
Epoch 4/20


100%|██████████| 476/476 [04:40<00:00,  1.69it/s]


Train loss: 0.9656639140443641 | Val loss: 1.3191915176949411 | Val accuracy: 0.6473372781065089
Epoch 5/20


100%|██████████| 476/476 [04:41<00:00,  1.69it/s]


Train loss: 0.7560641027893088 | Val loss: 1.3996065888764724 | Val accuracy: 0.6473372781065089
Epoch 6/20


100%|██████████| 476/476 [04:40<00:00,  1.70it/s]


Train loss: 0.5794685588320013 | Val loss: 1.6521785416693058 | Val accuracy: 0.6497041420118344
Epoch 7/20


100%|██████████| 476/476 [04:40<00:00,  1.69it/s]


Train loss: 0.4653110612269898 | Val loss: 1.4717388451099396 | Val accuracy: 0.6520710059171597
Early stopping triggered at epoch 7
10-fold Cross-Validation Results:
Average Validation Loss: 1.5424773896323904
Average Validation Accuracy: 0.6431152517240897
