In [1]:
import pandas as pd
data_path = "../data/open_source_8380.csv"  # 替換為你的檔案路徑
df = pd.read_csv(data_path)
df

Unnamed: 0,Inbound Message,Label
0,verified user details employee manager name ch...,0
1,hello team meetings skype meetings etc appeari...,0
2,cannot log vpn best,0
3,unable access tool page,0
4,skype error,0
...,...,...
8375,good afternoon receiving emails sent mail plea...,22
8376,telephony software issue,0
8377,vip windows password reset tifpdchb pedxruyf,0
8378,unable access machine utilities finish drawers...,44


# 十折驗證法

In [2]:
import torch
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(f"Using device: {device}") 
print(f"CUDA Available: {torch.cuda.is_available()}")
print(f"CUDA Device: {torch.cuda.get_device_name(0)}")
print(f"Current GPU Memory Allocated: {torch.cuda.memory_allocated() / 1e9} GB")
print(f"Current GPU Memory Cached: {torch.cuda.memory_reserved() / 1e9} GB")

Using device: cuda
CUDA Available: True
CUDA Device: NVIDIA GeForce RTX 3050 6GB Laptop GPU
Current GPU Memory Allocated: 0.0 GB
Current GPU Memory Cached: 0.0 GB


In [4]:
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from sklearn.model_selection import KFold
# 設定參數
MAX_LENGTH = 128
BATCH_SIZE = 16
EPOCHS = 20

# 初始化 BERT 的 tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

# 初始化數據
messages = df['Inbound Message'].tolist()
labels = df['Label'].tolist()

# KFold 初始化
kf = KFold(n_splits=10, shuffle=True, random_state=42)

In [5]:
from torch.utils.data import Dataset
class CustomDataset(Dataset):
    def __init__(self, messages, labels, tokenizer, max_length):
        self.messages = messages
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.messages)

    def __getitem__(self, idx):
        message = str(self.messages[idx])
        label = self.labels[idx]
        
        encoding = self.tokenizer.encode_plus(
            message,
            max_length=self.max_length,
            add_special_tokens=True,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

In [6]:
# 定義訓練與評估函數
def train_epoch(model, dataloader, optimizer, criterion):
    model.train()
    total_loss = 0

    for batch in tqdm(dataloader):
        optimizer.zero_grad()

        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()

        loss.backward()
        optimizer.step()
    
    return total_loss / len(dataloader)

In [7]:
def eval_model(model, dataloader, criterion):
    model.eval()
    total_loss = 0
    correct_predictions = 0

    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            loss = criterion(logits, labels)
            total_loss += loss.item()

            _, preds = torch.max(logits, dim=1)
            correct_predictions += torch.sum(preds == labels)
    
    accuracy = correct_predictions.double() / len(dataloader.dataset)
    return total_loss / len(dataloader), accuracy

In [8]:
from transformers import BertForSequenceClassification
from torch.utils.data import DataLoader
import torch
from torch.nn import CrossEntropyLoss
from torch.optim import AdamW
from tqdm import tqdm
import numpy as np

# 進行 10 折交叉驗證
fold_results = []

for fold, (train_idx, val_idx) in enumerate(kf.split(messages)):
    print(f"Fold {fold + 1}")

    # 折分數據
    train_messages = [messages[i] for i in train_idx]
    train_labels = [labels[i] for i in train_idx]
    val_messages = [messages[i] for i in val_idx]
    val_labels = [labels[i] for i in val_idx]

    # 建立 Dataset 和 DataLoader
    train_dataset = CustomDataset(
        messages=train_messages,
        labels=train_labels,
        tokenizer=tokenizer,
        max_length=MAX_LENGTH
    )
    val_dataset = CustomDataset(
        messages=val_messages,
        labels=val_labels,
        tokenizer=tokenizer,
        max_length=MAX_LENGTH
    )
    train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

    # 初始化模型
    NUM_LABELS = len(set(labels))
    model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=NUM_LABELS)
    model.to(device)

    # 初始化優化器和損失函數
    optimizer = AdamW(model.parameters(), lr=5e-5)
    criterion = CrossEntropyLoss()

    # Early stopping 參數
    patience = 3  # 連續幾個 epoch 沒有提升就停止
    best_val_loss = float('inf')
    epochs_without_improvement = 0
    
    # 訓練模型
    for epoch in range(EPOCHS):
        print(f"Epoch {epoch + 1}/{EPOCHS}")
        train_loss = train_epoch(model, train_dataloader, optimizer, criterion)
        val_loss, val_accuracy = eval_model(model, val_dataloader, criterion)
        print(f"Train loss: {train_loss} | Val loss: {val_loss} | Val accuracy: {val_accuracy}")

        # Early Stopping 判斷
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_without_improvement = 0  # 重置計數
        else:
            epochs_without_improvement += 1  # 增加計數
        
        if epochs_without_improvement >= patience:
            print(f"Early stopping triggered at epoch {epoch + 1}")
            break  # 終止訓練

    # 保存每折的結果
    fold_results.append({"fold": fold + 1, "val_loss": val_loss, "val_accuracy": val_accuracy.item()})

# 計算總體平均結果
avg_val_loss = np.mean([result["val_loss"] for result in fold_results])
avg_val_accuracy = np.mean([result["val_accuracy"] for result in fold_results])

print(f"10-fold Cross-Validation Results:")
print(f"Average Validation Loss: {avg_val_loss}")
print(f"Average Validation Accuracy: {avg_val_accuracy}")

Fold 1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.930966965727887 | Val loss: 1.6539252434136733 | Val accuracy: 0.5954653937947494
Epoch 2/20


100%|██████████| 472/472 [03:20<00:00,  2.36it/s]


Train loss: 1.5395983408000005 | Val loss: 1.5447191258646407 | Val accuracy: 0.5918854415274464
Epoch 3/20


100%|██████████| 472/472 [03:20<00:00,  2.36it/s]


Train loss: 1.3365808364824723 | Val loss: 1.4659984241116721 | Val accuracy: 0.6109785202863962
Epoch 4/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.1666146750419826 | Val loss: 1.407498299232069 | Val accuracy: 0.6193317422434368
Epoch 5/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.0105260230790256 | Val loss: 1.558355765241497 | Val accuracy: 0.626491646778043
Epoch 6/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 0.8904657345896555 | Val loss: 1.4789713623951066 | Val accuracy: 0.6312649164677805
Epoch 7/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 0.7405253309061972 | Val loss: 1.5058998336207192 | Val accuracy: 0.626491646778043
Early stopping triggered at epoch 7
Fold 2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.9618501884199806 | Val loss: 1.6392956769691323 | Val accuracy: 0.5966587112171838
Epoch 2/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.5365696238764261 | Val loss: 1.5160947284608517 | Val accuracy: 0.6276849642004774
Epoch 3/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.3139286857666606 | Val loss: 1.5033923241327394 | Val accuracy: 0.6181384248210025
Epoch 4/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.1341055948602952 | Val loss: 1.4214689135551453 | Val accuracy: 0.6443914081145585
Epoch 5/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 0.9756527894760593 | Val loss: 1.4847061544094446 | Val accuracy: 0.6324582338902148
Epoch 6/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 0.8256157717462314 | Val loss: 1.4117345585013337 | Val accuracy: 0.6551312649164678
Epoch 7/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 0.6927911569285443 | Val loss: 1.532249908964589 | Val accuracy: 0.6503579952267303
Epoch 8/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 0.5964411631921085 | Val loss: 1.5496673527753577 | Val accuracy: 0.6396181384248211
Epoch 9/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 0.5109674536812482 | Val loss: 1.6295400087563496 | Val accuracy: 0.6420047732696897
Early stopping triggered at epoch 9
Fold 3


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.9763753151489516 | Val loss: 1.712948260442266 | Val accuracy: 0.5883054892601433
Epoch 2/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.5434198987244043 | Val loss: 1.6035105451098028 | Val accuracy: 0.5966587112171838
Epoch 3/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.341518939867363 | Val loss: 1.426718166414297 | Val accuracy: 0.630071599045346
Epoch 4/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.1752201300114393 | Val loss: 1.4725976833757364 | Val accuracy: 0.6217183770883055
Epoch 5/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.001213413836845 | Val loss: 1.4695830120230622 | Val accuracy: 0.6408114558472554
Epoch 6/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 0.8525161661081395 | Val loss: 1.5462037454236228 | Val accuracy: 0.6503579952267303
Early stopping triggered at epoch 6
Fold 4


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 2.0460445438906296 | Val loss: 1.7299093131749135 | Val accuracy: 0.5763723150357996
Epoch 2/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.6626729585735474 | Val loss: 1.5400616341042068 | Val accuracy: 0.5930787589498807
Epoch 3/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.4482406377792358 | Val loss: 1.4651141267902446 | Val accuracy: 0.594272076372315
Epoch 4/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.276368484406148 | Val loss: 1.42397877292813 | Val accuracy: 0.6145584725536993
Epoch 5/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.1172330758013462 | Val loss: 1.3539559166386443 | Val accuracy: 0.6336515513126492
Epoch 6/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 0.9572075522046978 | Val loss: 1.3838508095381394 | Val accuracy: 0.636038186157518
Epoch 7/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 0.8312698256034973 | Val loss: 1.4091613528863438 | Val accuracy: 0.6276849642004774
Epoch 8/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 0.700353925408385 | Val loss: 1.4318823589468903 | Val accuracy: 0.6515513126491647
Early stopping triggered at epoch 8
Fold 5


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.9821439478862084 | Val loss: 1.660009244702897 | Val accuracy: 0.5906921241050119
Epoch 2/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.5918535348975051 | Val loss: 1.514230245689176 | Val accuracy: 0.5990453460620525
Epoch 3/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.3757418370726755 | Val loss: 1.457299403424533 | Val accuracy: 0.6312649164677805
Epoch 4/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.2023072956224619 | Val loss: 1.3912335139400553 | Val accuracy: 0.6312649164677805
Epoch 5/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.0561060658892838 | Val loss: 1.425899713669183 | Val accuracy: 0.616945107398568
Epoch 6/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 0.9083291846248558 | Val loss: 1.4635680790217418 | Val accuracy: 0.6348448687350836
Epoch 7/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 0.7746197502049854 | Val loss: 1.5276763517901581 | Val accuracy: 0.6085918854415274
Early stopping triggered at epoch 7
Fold 6


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 2.0030068319480296 | Val loss: 1.8262224444803201 | Val accuracy: 0.5501193317422435
Epoch 2/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.600488525073407 | Val loss: 1.5983203593290076 | Val accuracy: 0.5775656324582339
Epoch 3/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.3948025253238314 | Val loss: 1.5741772246810626 | Val accuracy: 0.581145584725537
Epoch 4/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.2252375586431916 | Val loss: 1.5339963919711563 | Val accuracy: 0.5906921241050119
Epoch 5/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.0604767522190588 | Val loss: 1.5132566960352771 | Val accuracy: 0.6073985680190931
Epoch 6/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 0.90637323714919 | Val loss: 1.5314758026374962 | Val accuracy: 0.6109785202863962
Epoch 7/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 0.7706902730515448 | Val loss: 1.6248486795515384 | Val accuracy: 0.6085918854415274
Epoch 8/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 0.6688250221343617 | Val loss: 1.6203688990395024 | Val accuracy: 0.6097852028639619
Early stopping triggered at epoch 8
Fold 7


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 2.0320960725263015 | Val loss: 1.6731502526211288 | Val accuracy: 0.6014319809069213
Epoch 2/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.7089256502814212 | Val loss: 1.7692018972252899 | Val accuracy: 0.5990453460620525
Epoch 3/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.563447024357521 | Val loss: 1.5239864970153232 | Val accuracy: 0.6121718377088305
Epoch 4/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.4177580664597325 | Val loss: 1.4339282197772332 | Val accuracy: 0.626491646778043
Epoch 5/20


100%|██████████| 472/472 [03:21<00:00,  2.35it/s]


Train loss: 1.265796262265767 | Val loss: 1.5128698438968298 | Val accuracy: 0.5715990453460621
Epoch 6/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.1097798342421903 | Val loss: 1.286689058789667 | Val accuracy: 0.6694510739856803
Epoch 7/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 0.9762121892960395 | Val loss: 1.3346978903941389 | Val accuracy: 0.6587112171837709
Epoch 8/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 0.8542081476268122 | Val loss: 1.3604333243280087 | Val accuracy: 0.6587112171837709
Epoch 9/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 0.7376342825591564 | Val loss: 1.3622694487841625 | Val accuracy: 0.6658711217183771
Early stopping triggered at epoch 9
Fold 8


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 472/472 [03:21<00:00,  2.35it/s]


Train loss: 1.9849029174548085 | Val loss: 1.6597990483607885 | Val accuracy: 0.594272076372315
Epoch 2/20


100%|██████████| 472/472 [03:21<00:00,  2.35it/s]


Train loss: 1.5823095301955432 | Val loss: 1.502359262052572 | Val accuracy: 0.6073985680190931
Epoch 3/20


100%|██████████| 472/472 [03:21<00:00,  2.35it/s]


Train loss: 1.3449031602338715 | Val loss: 1.426278882431534 | Val accuracy: 0.6420047732696897
Epoch 4/20


100%|██████████| 472/472 [03:21<00:00,  2.35it/s]


Train loss: 1.1714259722601559 | Val loss: 1.4271951210948657 | Val accuracy: 0.6455847255369929
Epoch 5/20


100%|██████████| 472/472 [03:21<00:00,  2.35it/s]


Train loss: 1.0080568753182888 | Val loss: 1.527159162287442 | Val accuracy: 0.6229116945107399
Epoch 6/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 0.8711140706377515 | Val loss: 1.4781569073785026 | Val accuracy: 0.6420047732696897
Early stopping triggered at epoch 6
Fold 9


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.9927789687605229 | Val loss: 1.772713084265871 | Val accuracy: 0.5572792362768497
Epoch 2/20


100%|██████████| 472/472 [03:21<00:00,  2.35it/s]


Train loss: 1.6074111445854276 | Val loss: 1.5281329076245147 | Val accuracy: 0.5894988066825776
Epoch 3/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.3956103996073796 | Val loss: 1.4195473149137676 | Val accuracy: 0.6205250596658711
Epoch 4/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.2302770849506734 | Val loss: 1.3860088024499282 | Val accuracy: 0.613365155131265
Epoch 5/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.0387902225099377 | Val loss: 1.3602269190662313 | Val accuracy: 0.6372315035799523
Epoch 6/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 0.8887842972210404 | Val loss: 1.39022033844354 | Val accuracy: 0.662291169451074
Epoch 7/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 0.7801617106770055 | Val loss: 1.4664902287834096 | Val accuracy: 0.6396181384248211
Epoch 8/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 0.6494745379945231 | Val loss: 1.4292860660912856 | Val accuracy: 0.6527446300715991
Early stopping triggered at epoch 8
Fold 10


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


100%|██████████| 472/472 [03:21<00:00,  2.35it/s]


Train loss: 1.9754609764885094 | Val loss: 1.6328743465666502 | Val accuracy: 0.5930787589498807
Epoch 2/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.552428737409034 | Val loss: 1.494701237048743 | Val accuracy: 0.6157517899761337
Epoch 3/20


100%|██████████| 472/472 [03:21<00:00,  2.35it/s]


Train loss: 1.357939823009705 | Val loss: 1.3624698677152958 | Val accuracy: 0.6372315035799523
Epoch 4/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 1.1606744334922503 | Val loss: 1.4334847437885572 | Val accuracy: 0.636038186157518
Epoch 5/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 0.997349559187384 | Val loss: 1.405117300321471 | Val accuracy: 0.6443914081145585
Epoch 6/20


100%|██████████| 472/472 [03:20<00:00,  2.35it/s]


Train loss: 0.8587079691242868 | Val loss: 1.4726711775896684 | Val accuracy: 0.626491646778043
Early stopping triggered at epoch 6
10-fold Cross-Validation Results:
Average Validation Loss: 1.500395479742086
Average Validation Accuracy: 0.6375894988066826
