In [1]:
import pandas as pd
data_path = "../data/open_source_8454_combine_short_description.csv"  # 替換為你的檔案路徑
df = pd.read_csv(data_path)
df

Unnamed: 0,Inbound Message,Label
0,login issue verified user details employee man...,0
1,outlook hello team meetings skype meetings etc...,0
2,cant log vpn cannot log vpn best,0
3,unable access tool page unable access tool page,0
4,skype error skype error,0
...,...,...
8449,emails coming mail good afternoon receiving em...,22
8450,telephony software issue telephony software issue,0
8451,vip windows password reset tifpdchb pedxruyf v...,0
8452,machine est funcionando unable access machine ...,44


# 十折驗證法

In [2]:
import torch
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(f"Using device: {device}") 
print(f"CUDA Available: {torch.cuda.is_available()}")
print(f"CUDA Device: {torch.cuda.get_device_name(0)}")
print(f"Current GPU Memory Allocated: {torch.cuda.memory_allocated() / 1e9} GB")
print(f"Current GPU Memory Cached: {torch.cuda.memory_reserved() / 1e9} GB")

Using device: cuda
CUDA Available: True
CUDA Device: NVIDIA GeForce RTX 3050 6GB Laptop GPU
Current GPU Memory Allocated: 0.0 GB
Current GPU Memory Cached: 0.0 GB


In [3]:
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from sklearn.model_selection import KFold
# 設定參數
MAX_LENGTH = 128
BATCH_SIZE = 16
EPOCHS = 20

# 初始化 BERT 的 tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

# 初始化數據
messages = df['Inbound Message'].tolist()
labels = df['Label'].tolist()

# KFold 初始化
kf = KFold(n_splits=10, shuffle=True, random_state=42)

In [4]:
from torch.utils.data import Dataset
class CustomDataset(Dataset):
    def __init__(self, messages, labels, tokenizer, max_length):
        self.messages = messages
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.messages)

    def __getitem__(self, idx):
        message = str(self.messages[idx])
        label = self.labels[idx]
        
        encoding = self.tokenizer.encode_plus(
            message,
            max_length=self.max_length,
            add_special_tokens=True,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

In [5]:
# 定義訓練與評估函數
def train_epoch(model, dataloader, optimizer, criterion):
    model.train()
    total_loss = 0

    for batch in tqdm(dataloader):
        optimizer.zero_grad()

        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()

        loss.backward()
        optimizer.step()
    
    return total_loss / len(dataloader)

In [6]:
def eval_model(model, dataloader, criterion):
    model.eval()
    total_loss = 0
    correct_predictions = 0

    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            loss = criterion(logits, labels)
            total_loss += loss.item()

            _, preds = torch.max(logits, dim=1)
            correct_predictions += torch.sum(preds == labels)
    
    accuracy = correct_predictions.double() / len(dataloader.dataset)
    return total_loss / len(dataloader), accuracy

In [7]:
from transformers import BertForSequenceClassification
from torch.utils.data import DataLoader
import torch
from torch.nn import CrossEntropyLoss
from torch.optim import AdamW
from tqdm import tqdm
import numpy as np

In [8]:
# 進行 10 折交叉驗證
fold_results = []

for fold, (train_idx, val_idx) in enumerate(kf.split(messages)):
    print(f"Fold {fold + 1}")

    # 折分數據
    train_messages = [messages[i] for i in train_idx]
    train_labels = [labels[i] for i in train_idx]
    val_messages = [messages[i] for i in val_idx]
    val_labels = [labels[i] for i in val_idx]

    # 建立 Dataset 和 DataLoader
    train_dataset = CustomDataset(
        messages=train_messages,
        labels=train_labels,
        tokenizer=tokenizer,
        max_length=MAX_LENGTH
    )
    val_dataset = CustomDataset(
        messages=val_messages,
        labels=val_labels,
        tokenizer=tokenizer,
        max_length=MAX_LENGTH
    )
    train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

    # 初始化模型
    NUM_LABELS = len(set(labels))
    model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=NUM_LABELS)
    model.to(device)

    # 初始化優化器和損失函數
    optimizer = AdamW(model.parameters(), lr=5e-5)
    criterion = CrossEntropyLoss()

    # Early stopping 參數
    patience = 3  # 連續幾個 epoch 沒有提升就停止
    best_val_loss = float('inf')
    epochs_without_improvement = 0
    
    # 訓練模型
    for epoch in range(EPOCHS):
        print(f"Epoch {epoch + 1}/{EPOCHS}")
        train_loss = train_epoch(model, train_dataloader, optimizer, criterion)
        val_loss, val_accuracy = eval_model(model, val_dataloader, criterion)
        print(f"Train loss: {train_loss} | Val loss: {val_loss} | Val accuracy: {val_accuracy}")

        # Early Stopping 判斷
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_without_improvement = 0  # 重置計數
        else:
            epochs_without_improvement += 1  # 增加計數
        
        if epochs_without_improvement >= patience:
            print(f"Early stopping triggered at epoch {epoch + 1}")
            break  # 終止訓練

    # 保存每折的結果
    fold_results.append({"fold": fold + 1, "val_loss": val_loss, "val_accuracy": val_accuracy.item()})

# 計算總體平均結果
avg_val_loss = np.mean([result["val_loss"] for result in fold_results])
avg_val_accuracy = np.mean([result["val_accuracy"] for result in fold_results])

print(f"10-fold Cross-Validation Results:")
print(f"Average Validation Loss: {avg_val_loss}")
print(f"Average Validation Accuracy: {avg_val_accuracy}")

Fold 1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 476/476 [03:23<00:00,  2.34it/s]


Train loss: 1.959283966226738 | Val loss: 1.7457375728859093 | Val accuracy: 0.5697399527186761
Epoch 2/20


100%|██████████| 476/476 [03:21<00:00,  2.36it/s]


Train loss: 1.493193940580392 | Val loss: 1.545113871682365 | Val accuracy: 0.611111111111111
Epoch 3/20


100%|██████████| 476/476 [03:22<00:00,  2.35it/s]


Train loss: 1.2476121846179502 | Val loss: 1.3704478600115146 | Val accuracy: 0.6312056737588653
Epoch 4/20


100%|██████████| 476/476 [03:22<00:00,  2.35it/s]


Train loss: 1.0685551976080703 | Val loss: 1.3110008723330948 | Val accuracy: 0.6536643026004728
Epoch 5/20


100%|██████████| 476/476 [03:22<00:00,  2.35it/s]


Train loss: 0.8892504527163105 | Val loss: 1.2850929734841832 | Val accuracy: 0.6690307328605201
Epoch 6/20


100%|██████████| 476/476 [03:22<00:00,  2.35it/s]


Train loss: 0.7722475213319564 | Val loss: 1.463278373457351 | Val accuracy: 0.6229314420803782
Epoch 7/20


100%|██████████| 476/476 [03:22<00:00,  2.35it/s]


Train loss: 0.655396979990877 | Val loss: 1.364830511920857 | Val accuracy: 0.6761229314420804
Epoch 8/20


100%|██████████| 476/476 [03:22<00:00,  2.35it/s]


Train loss: 0.5623407782360661 | Val loss: 1.442901738972034 | Val accuracy: 0.6666666666666666
Early stopping triggered at epoch 8
Fold 2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 476/476 [03:22<00:00,  2.35it/s]


Train loss: 1.93666221865085 | Val loss: 1.5775683882101528 | Val accuracy: 0.5945626477541371
Epoch 2/20


100%|██████████| 476/476 [03:22<00:00,  2.35it/s]


Train loss: 1.4752072758283936 | Val loss: 1.3378039266703263 | Val accuracy: 0.6323877068557919
Epoch 3/20


100%|██████████| 476/476 [03:22<00:00,  2.35it/s]


Train loss: 1.2105113786934805 | Val loss: 1.251237579111783 | Val accuracy: 0.6631205673758865
Epoch 4/20


100%|██████████| 476/476 [03:22<00:00,  2.35it/s]


Train loss: 1.0012671730222822 | Val loss: 1.270612655945544 | Val accuracy: 0.6394799054373522
Epoch 5/20


100%|██████████| 476/476 [03:22<00:00,  2.35it/s]


Train loss: 0.8458274758475668 | Val loss: 1.288670519331716 | Val accuracy: 0.6678486997635934
Epoch 6/20


100%|██████████| 476/476 [03:22<00:00,  2.35it/s]


Train loss: 0.6946667589622886 | Val loss: 1.2960706623095386 | Val accuracy: 0.6619385342789598
Early stopping triggered at epoch 6
Fold 3


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 476/476 [03:22<00:00,  2.35it/s]


Train loss: 2.022965220405775 | Val loss: 1.6052656972183372 | Val accuracy: 0.6347517730496454
Epoch 2/20


100%|██████████| 476/476 [03:22<00:00,  2.35it/s]


Train loss: 1.5071125429968875 | Val loss: 1.3775720675036591 | Val accuracy: 0.6359338061465721
Epoch 3/20


100%|██████████| 476/476 [03:22<00:00,  2.35it/s]


Train loss: 1.2313138066532732 | Val loss: 1.3142870001073153 | Val accuracy: 0.6453900709219857
Epoch 4/20


100%|██████████| 476/476 [03:22<00:00,  2.35it/s]


Train loss: 1.028932406433991 | Val loss: 1.244603932466147 | Val accuracy: 0.6666666666666666
Epoch 5/20


100%|██████████| 476/476 [03:22<00:00,  2.34it/s]


Train loss: 0.848481238890095 | Val loss: 1.2358124261757113 | Val accuracy: 0.6690307328605201
Epoch 6/20


100%|██████████| 476/476 [03:22<00:00,  2.35it/s]


Train loss: 0.7148854154154283 | Val loss: 1.2251397666503798 | Val accuracy: 0.6820330969267139
Epoch 7/20


100%|██████████| 476/476 [03:22<00:00,  2.35it/s]


Train loss: 0.5851761886239553 | Val loss: 1.258602552256494 | Val accuracy: 0.6843971631205673
Epoch 8/20


100%|██████████| 476/476 [03:22<00:00,  2.35it/s]


Train loss: 0.5086803829900044 | Val loss: 1.2772188299107101 | Val accuracy: 0.7080378250591016
Epoch 9/20


100%|██████████| 476/476 [03:22<00:00,  2.35it/s]


Train loss: 0.4318050536945337 | Val loss: 1.351818155848755 | Val accuracy: 0.7009456264775413
Early stopping triggered at epoch 9
Fold 4


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 476/476 [03:23<00:00,  2.34it/s]


Train loss: 1.9364317450202813 | Val loss: 1.4746283152193393 | Val accuracy: 0.6335697399527187
Epoch 2/20


100%|██████████| 476/476 [03:23<00:00,  2.34it/s]


Train loss: 1.4279745136614608 | Val loss: 1.3487367028335355 | Val accuracy: 0.6300236406619385
Epoch 3/20


100%|██████████| 476/476 [03:23<00:00,  2.34it/s]


Train loss: 1.1710845131834013 | Val loss: 1.2638385953768245 | Val accuracy: 0.6820330969267139
Epoch 4/20


100%|██████████| 476/476 [03:24<00:00,  2.33it/s]


Train loss: 0.9687438344567263 | Val loss: 1.2752210402263786 | Val accuracy: 0.6631205673758865
Epoch 5/20


100%|██████████| 476/476 [03:23<00:00,  2.34it/s]


Train loss: 0.81229761252258 | Val loss: 1.2429697749749669 | Val accuracy: 0.6926713947990544
Epoch 6/20


100%|██████████| 476/476 [03:35<00:00,  2.21it/s]


Train loss: 0.6757817639491889 | Val loss: 1.3623859151354376 | Val accuracy: 0.6725768321513003
Epoch 7/20


100%|██████████| 476/476 [04:04<00:00,  1.95it/s]


Train loss: 0.5894340652377665 | Val loss: 1.3177604357588966 | Val accuracy: 0.6879432624113475
Epoch 8/20


100%|██████████| 476/476 [04:03<00:00,  1.95it/s]
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Train loss: 0.49463100528673215 | Val loss: 1.4037310973090946 | Val accuracy: 0.6702127659574468
Early stopping triggered at epoch 8
Fold 5
Epoch 1/20


100%|██████████| 476/476 [04:03<00:00,  1.95it/s]


Train loss: 1.8774861685869073 | Val loss: 1.5297006379883245 | Val accuracy: 0.6248520710059172
Epoch 2/20


100%|██████████| 476/476 [04:03<00:00,  1.95it/s]


Train loss: 1.3902277537003285 | Val loss: 1.3639276972356833 | Val accuracy: 0.6366863905325444
Epoch 3/20


100%|██████████| 476/476 [04:03<00:00,  1.95it/s]


Train loss: 1.129034061673559 | Val loss: 1.223364381014176 | Val accuracy: 0.6698224852071006
Epoch 4/20


100%|██████████| 476/476 [04:03<00:00,  1.95it/s]


Train loss: 0.9455007480285248 | Val loss: 1.2968083336105887 | Val accuracy: 0.6627218934911243
Epoch 5/20


100%|██████████| 476/476 [04:03<00:00,  1.95it/s]


Train loss: 0.7669890522956848 | Val loss: 1.2358459627853249 | Val accuracy: 0.6745562130177515
Epoch 6/20


100%|██████████| 476/476 [04:03<00:00,  1.96it/s]
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Train loss: 0.6299403309039459 | Val loss: 1.3117070518574625 | Val accuracy: 0.6792899408284023
Early stopping triggered at epoch 6
Fold 6
Epoch 1/20


100%|██████████| 476/476 [04:03<00:00,  1.96it/s]


Train loss: 1.9843883372279776 | Val loss: 1.6664233455118143 | Val accuracy: 0.5822485207100592
Epoch 2/20


100%|██████████| 476/476 [04:03<00:00,  1.96it/s]


Train loss: 1.5209599701421601 | Val loss: 1.4202259137945354 | Val accuracy: 0.6118343195266273
Epoch 3/20


100%|██████████| 476/476 [04:03<00:00,  1.96it/s]


Train loss: 1.2556606653858633 | Val loss: 1.441719629854526 | Val accuracy: 0.5846153846153846
Epoch 4/20


100%|██████████| 476/476 [04:03<00:00,  1.96it/s]


Train loss: 1.061759618024866 | Val loss: 1.2947529374428515 | Val accuracy: 0.6378698224852071
Epoch 5/20


100%|██████████| 476/476 [04:03<00:00,  1.96it/s]


Train loss: 0.8968452619654792 | Val loss: 1.3425317918354611 | Val accuracy: 0.6414201183431952
Epoch 6/20


100%|██████████| 476/476 [04:02<00:00,  1.96it/s]


Train loss: 0.7562367486681382 | Val loss: 1.3317303460723948 | Val accuracy: 0.676923076923077
Epoch 7/20


100%|██████████| 476/476 [04:04<00:00,  1.95it/s]
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Train loss: 0.6396044489591062 | Val loss: 1.4020457768215324 | Val accuracy: 0.6615384615384615
Early stopping triggered at epoch 7
Fold 7
Epoch 1/20


100%|██████████| 476/476 [04:04<00:00,  1.94it/s]


Train loss: 1.9548715485745118 | Val loss: 1.4879005922461457 | Val accuracy: 0.6331360946745562
Epoch 2/20


100%|██████████| 476/476 [04:03<00:00,  1.96it/s]


Train loss: 1.4672432759416705 | Val loss: 1.3219022469700508 | Val accuracy: 0.6366863905325444
Epoch 3/20


100%|██████████| 476/476 [04:03<00:00,  1.96it/s]


Train loss: 1.2370490740452493 | Val loss: 1.2091502367325548 | Val accuracy: 0.6698224852071006
Epoch 4/20


100%|██████████| 476/476 [04:02<00:00,  1.96it/s]


Train loss: 1.0285694525898004 | Val loss: 1.2026307574420605 | Val accuracy: 0.6804733727810651
Epoch 5/20


100%|██████████| 476/476 [04:02<00:00,  1.96it/s]


Train loss: 0.8827875037560192 | Val loss: 1.2197525641828213 | Val accuracy: 0.6603550295857988
Epoch 6/20


100%|██████████| 476/476 [04:02<00:00,  1.96it/s]


Train loss: 0.7294328158423931 | Val loss: 1.2102674841880798 | Val accuracy: 0.6923076923076923
Epoch 7/20


100%|██████████| 476/476 [04:02<00:00,  1.96it/s]
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Train loss: 0.6248349144960902 | Val loss: 1.2322167023172919 | Val accuracy: 0.6781065088757396
Early stopping triggered at epoch 7
Fold 8
Epoch 1/20


100%|██████████| 476/476 [04:03<00:00,  1.96it/s]


Train loss: 1.964671104395089 | Val loss: 1.6392722360368044 | Val accuracy: 0.5976331360946746
Epoch 2/20


100%|██████████| 476/476 [04:03<00:00,  1.96it/s]


Train loss: 1.4780536242643325 | Val loss: 1.441459206196497 | Val accuracy: 0.6224852071005917
Epoch 3/20


100%|██████████| 476/476 [04:03<00:00,  1.95it/s]


Train loss: 1.2290029105644267 | Val loss: 1.3303413863451976 | Val accuracy: 0.6591715976331362
Epoch 4/20


100%|██████████| 476/476 [04:03<00:00,  1.96it/s]


Train loss: 1.030145142837858 | Val loss: 1.2545264278942685 | Val accuracy: 0.6733727810650888
Epoch 5/20


100%|██████████| 476/476 [04:03<00:00,  1.96it/s]


Train loss: 0.8455214942766338 | Val loss: 1.349264477783779 | Val accuracy: 0.6757396449704142
Epoch 6/20


100%|██████████| 476/476 [04:03<00:00,  1.96it/s]


Train loss: 0.7038638483764244 | Val loss: 1.3239479644118615 | Val accuracy: 0.676923076923077
Epoch 7/20


100%|██████████| 476/476 [04:03<00:00,  1.96it/s]
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Train loss: 0.5944639189685342 | Val loss: 1.299493023247089 | Val accuracy: 0.6816568047337278
Early stopping triggered at epoch 7
Fold 9
Epoch 1/20


100%|██████████| 476/476 [04:02<00:00,  1.96it/s]


Train loss: 1.9053640494577022 | Val loss: 1.580464445195108 | Val accuracy: 0.5940828402366864
Epoch 2/20


100%|██████████| 476/476 [04:02<00:00,  1.96it/s]


Train loss: 1.427016587129661 | Val loss: 1.4334972928155143 | Val accuracy: 0.6059171597633136
Epoch 3/20


100%|██████████| 476/476 [04:02<00:00,  1.96it/s]


Train loss: 1.1743135272580034 | Val loss: 1.2021988489717808 | Val accuracy: 0.6804733727810651
Epoch 4/20


100%|██████████| 476/476 [04:02<00:00,  1.96it/s]


Train loss: 0.9676788736040852 | Val loss: 1.2507708252600904 | Val accuracy: 0.6520710059171597
Epoch 5/20


100%|██████████| 476/476 [04:02<00:00,  1.96it/s]


Train loss: 0.808226100912615 | Val loss: 1.2614083694961835 | Val accuracy: 0.6603550295857988
Epoch 6/20


100%|██████████| 476/476 [04:02<00:00,  1.96it/s]
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Train loss: 0.6712292597918701 | Val loss: 1.2994047293685518 | Val accuracy: 0.6828402366863905
Early stopping triggered at epoch 6
Fold 10
Epoch 1/20


100%|██████████| 476/476 [04:03<00:00,  1.96it/s]


Train loss: 1.8952664789281974 | Val loss: 1.7755534626402945 | Val accuracy: 0.5538461538461539
Epoch 2/20


100%|██████████| 476/476 [04:02<00:00,  1.96it/s]


Train loss: 1.422987279458707 | Val loss: 1.4224661419976432 | Val accuracy: 0.5940828402366864
Epoch 3/20


100%|██████████| 476/476 [04:02<00:00,  1.96it/s]


Train loss: 1.170208884575287 | Val loss: 1.3607004026197038 | Val accuracy: 0.6473372781065089
Epoch 4/20


100%|██████████| 476/476 [04:03<00:00,  1.96it/s]


Train loss: 0.9781504989359058 | Val loss: 1.303844321448848 | Val accuracy: 0.6627218934911243
Epoch 5/20


100%|██████████| 476/476 [04:02<00:00,  1.96it/s]


Train loss: 0.8000966706665373 | Val loss: 1.3257372446779936 | Val accuracy: 0.6591715976331362
Epoch 6/20


100%|██████████| 476/476 [04:03<00:00,  1.96it/s]


Train loss: 0.6831373906686526 | Val loss: 1.3555857893430963 | Val accuracy: 0.6603550295857988
Epoch 7/20


100%|██████████| 476/476 [04:03<00:00,  1.96it/s]


Train loss: 0.5692474883550606 | Val loss: 1.5215971036902014 | Val accuracy: 0.655621301775148
Early stopping triggered at epoch 7
10-fold Cross-Validation Results:
Average Validation Loss: 1.3560986041741552
Average Validation Accuracy: 0.6738816847818485
