<a href="https://colab.research.google.com/github/rlaaudrb1104/Ai/blob/PJH/GraphCodeBERT_dacon2__random.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import pandas as pd
import numpy as np
import os
import random
from itertools import combinations, product
import re
import sklearn
from sklearn.model_selection import train_test_split
import torch
torch.set_float32_matmul_precision('high')
from torch.utils.data import Dataset, DataLoader
import transformers
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AdamW
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

In [6]:
# Seed 고정 함수
def seed_everything(seed: int = 42, contain_cuda: bool = False):
  os.environ['PYTHONHASHSEED'] = str(seed)
  random.seed(seed)
  np.random.seed(seed)

  torch.backends.cudnn.deterministic = True
  torch.backends.cudnn.benchmark = False

  torch.manual_seed(seed)
  torch.cuda.manual_seed(seed)
  torch.cuda.manual_seed_all(seed)
  print(f"Seed set as {seed}")

seed = 42
seed_everything(seed)

Seed set as 42


In [7]:
# CUDA 사용 가능 여부 확인 및 GPU 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [8]:
def preprocess_and_remove_extras(filepath):
    with open(filepath, 'r', encoding='utf-8') as file:
        content = file.read()

    content = re.sub(re.compile("/\*.*?\*/", re.DOTALL), "", content) # 멀티 라인 주석 제거
    content = re.sub(re.compile("//.*?\n"), "", content) # 싱글 라인 주석 제거
    content = re.sub(re.compile("#include <.*?>\n"), "", content) # angle brackets를 사용하는 include 제거
    content = re.sub(re.compile("#include \".*?\"\n"), "", content) # double quotes를 사용하는 include 제거
    content = re.sub(re.compile("#define .*?\n"), "", content) # 매크로 정의 제거
    content = re.sub(re.compile("[\t ]+"), " ", content) # 공백 및 탭 정리
    content = re.sub(re.compile("\n\s*\n"), "\n", content)# 여러 줄바꿈을 하나로

    # 공백이 아닌 줄만 선택하여 리스트로 만든 후, 문자열로 결합
    processed_script = '\n'.join([line.strip() for line in content.splitlines() if line.strip()])

    return processed_script

In [9]:
#  전처리 적용
#  이미 create_train_data 만들 때 적용했지만, test.csv와, sample_train.csv에도 같은 전처리를 하기 위해 다시 정의
def remove_extras(code):
    code = re.sub(re.compile("/\*.*?\*/", re.DOTALL), "", code) # 멀티 라인 주석 제거
    code = re.sub(re.compile("//.*?\n"), "", code) # 싱글 라인 주석 제거
    code = re.sub(re.compile("#include <.*?>\n"), "", code)  # angle brackets를 사용하는 include 제거
    code = re.sub(re.compile("#include \".*?\"\n"), "", code)  # double quotes를 사용하는 include 제거
    code = re.sub(re.compile("#define .*?\n"), "", code)  # 매크로 정의 제거
    code = re.sub(re.compile("[\t ]+"), " ", code)  # 탭과 여러 공백을 하나의 공백으로
    code = re.sub(re.compile("\n\s*\n"), "\n", code)  # 여러 줄바꿈을 하나로

    return code.strip()

In [10]:
class CodeDataset(Dataset):
    def __init__(self, tokenizer, data, max_length=512, include_labels=True):
        self.tokenizer = tokenizer
        self.data = data
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        code = remove_extras(self.data.iloc[idx]['code'])  # 특정 인덱스의 코드를 가져와 추가 요소를 제거합니다.

        inputs = self.tokenizer(
            code,
            padding='max_length', truncation=True, max_length=self.max_length, return_tensors="pt"
        )
        inputs = {key: val.squeeze() for key, val in inputs.items()}

        label = torch.tensor(self.data.iloc[idx]['labels'], dtype=torch.long)  # 해당 코드의 정답 라벨을 가져옵니다.
        inputs['labels'] = label

        return inputs

In [12]:
filepath_train = '/content/drive/MyDrive/final_train5.csv'
filepath_val = '/content/drive/MyDrive/final_val2.csv'
train_df = pd.read_csv(filepath_train)
val_df = pd.read_csv(filepath_val)

In [13]:
train_df = train_df.drop(columns=['Unnamed: 0'])
val_df = val_df.drop(columns=['Unnamed: 0'])
train_df = train_df.drop(columns=['vul'])
val_df = val_df.drop(columns=['vul'])

In [14]:
# GraphCodeBERT 모델 및 토크나이저 로드
model_name = "microsoft/graphcodebert-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.truncation_side = "left"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=11).to(device)

# 훈련 세트와 검증 세트에 대한 데이터셋 생성
train_dataset = CodeDataset(tokenizer, train_df, max_length=512)
val_dataset = CodeDataset(tokenizer, val_df, max_length=512, include_labels=True)

# 데이터 로더 준비
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

# 파인 튜닝을 위한 옵티마이저 설정
optimizer = AdamW(model.parameters(), lr=2e-5)

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/539 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/772 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/graphcodebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [15]:
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
import torch
from torch.utils.data import DataLoader
from transformers import AdamW, AutoModelForSequenceClassification, AutoTokenizer
from sklearn.metrics import f1_score, classification_report
from tqdm import tqdm
import pandas as pd

# 설정
k = 5  # K-Fold 교차 검증에서의 fold 수
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# 모델 및 토크나이저 설정
model_name = "microsoft/graphcodebert-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 전체 데이터셋을 준비
total_df = pd.read_csv("/content/drive/MyDrive/Labels_Safe_MSR+Juliet_random.csv")
# 결과를 저장할 리스트
all_train_losses = []
all_val_losses = []
all_train_f1_scores = []
all_val_f1_scores = []

# 각 fold에 대한 훈련 시작
fold = 0
for train_index, val_index in kf.split(total_df):
    fold += 1
    train_data = total_df.iloc[train_index]
    val_data = total_df.iloc[val_index]

    train_dataset = CodeDataset(tokenizer, train_data, max_length=512)
    val_dataset = CodeDataset(tokenizer, val_data, max_length=512)

    train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

    # 모델 재설정 (각 fold에서 새로운 모델을 사용)
    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=11)
    model.to(device)
    optimizer = AdamW(model.parameters(), lr=2e-5)

    train_losses = []
    val_losses = []
    train_f1_scores = []
    val_f1_scores = []

    # 얼리스탑 설정
    patience = 5
    best_val_loss = float('inf')
    early_stopping_counter = 0

    for epoch in range(100):  # 에폭 수 변경 가능
        model.train()
        total_train_loss = 0
        all_predictions_train = []
        all_labels_train = []
        for batch in tqdm(train_loader):
            batch = {k: v.to(device) for k, v in batch.items()}
            outputs = model(**batch)
            loss = outputs.loss
            total_train_loss += loss.item()

            logits = outputs.logits
            predictions = torch.argmax(logits, dim=-1)
            labels = batch["labels"]

            all_predictions_train.extend(predictions.cpu().numpy())
            all_labels_train.extend(labels.cpu().numpy())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        train_loss = total_train_loss / len(train_loader)
        train_f1 = f1_score(all_labels_train, all_predictions_train, average='weighted')

        model.eval()
        total_val_loss = 0
        all_predictions_val = []
        all_labels_val = []
        for batch in tqdm(val_loader):
            batch = {k: v.to(device) for k, v in batch.items()}
            with torch.no_grad():
                outputs = model(**batch)
            loss = outputs.loss
            total_val_loss += loss.item()

            logits = outputs.logits
            predictions = torch.argmax(logits, dim=-1)
            labels = batch["labels"]

            all_predictions_val.extend(predictions.cpu().numpy())
            all_labels_val.extend(labels.cpu().numpy())

        val_loss = total_val_loss / len(val_loader)
        val_f1 = f1_score(all_labels_val, all_predictions_val, average='weighted')

        # 클래스별 F1-score 계산
        class_report = classification_report(all_labels_val, all_predictions_val, digits=4, output_dict=False)
        print(f"Fold {fold}, Epoch {epoch+1}, Training Loss: {train_loss}, Train F1 Score: {train_f1}, Validation Loss: {val_loss}, Validation F1 Score: {val_f1}")
        print("Classification Report:")
        print(class_report)

        train_losses.append(train_loss)
        val_losses.append(val_loss)
        train_f1_scores.append(train_f1)
        val_f1_scores.append(val_f1)

        # 얼리스탑 로직
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            early_stopping_counter = 0
        else:
            early_stopping_counter += 1
            if early_stopping_counter >= patience:
                print(f"Early stopping triggered at epoch {epoch+1} for fold {fold}")
                break

    # 폴드 결과를 저장
    all_train_losses.append(train_losses)
    all_val_losses.append(val_losses)
    all_train_f1_scores.append(train_f1_scores)
    all_val_f1_scores.append(val_f1_scores)

# 그래프로 나타내기
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
for i in range(k):
    plt.plot(range(1, len(all_train_losses[i]) + 1), all_train_losses[i], label=f'Train Loss Fold {i+1}')
    plt.plot(range(1, len(all_val_losses[i]) + 1), all_val_losses[i], label=f'Val Loss Fold {i+1}')
plt.title('Training and Validation Loss per Fold')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
for i in range(k):
    plt.plot(range(1, len(all_train_f1_scores[i]) + 1), all_train_f1_scores[i], label=f'Train F1 Score Fold {i+1}')
    plt.plot(range(1, len(all_val_f1_scores[i]) + 1), all_val_f1_scores[i], label=f'Val F1 Score Fold {i+1}')
plt.title('F1 Score per Fold')
plt.xlabel('Epochs')
plt.ylabel('F1 Score')
plt.legend()

plt.tight_layout()
plt.show()


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/graphcodebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 820/820 [07:18<00:00,  1.87it/s]
100%|██████████| 206/206 [00:38<00:00,  5.31it/s]


Fold 1, Epoch 1, Training Loss: 0.8494475497250876, Train F1 Score: 0.7062850391256588, Validation Loss: 0.5825486937040482, Validation F1 Score: 0.7883649124308776
Classification Report:
              precision    recall  f1-score   support

           0     0.9048    0.8702    0.8872       601
           1     0.5501    0.8100    0.6552       400
           2     0.8549    0.9947    0.9195       379
           3     0.9448    0.8228    0.8796       333
           4     0.8456    0.9543    0.8966       350
           5     0.9823    0.9327    0.9568       297
           6     0.3955    0.5212    0.4497       236
           7     0.9741    0.8210    0.8910       229
           8     0.9796    0.5549    0.7085       173
           9     0.7674    0.2102    0.3300       157
          10     0.8197    0.3968    0.5348       126

    accuracy                         0.7921      3281
   macro avg     0.8199    0.7172    0.7372      3281
weighted avg     0.8229    0.7921    0.7884      3281


100%|██████████| 820/820 [07:19<00:00,  1.87it/s]
100%|██████████| 206/206 [00:38<00:00,  5.31it/s]


Fold 1, Epoch 2, Training Loss: 0.4711221496301998, Train F1 Score: 0.8297271307442137, Validation Loss: 0.4956043277713425, Validation F1 Score: 0.8256538906830188
Classification Report:
              precision    recall  f1-score   support

           0     0.8406    0.8952    0.8670       601
           1     0.6820    0.7775    0.7266       400
           2     0.8549    0.9947    0.9195       379
           3     0.8680    0.8889    0.8783       333
           4     0.8902    0.8571    0.8734       350
           5     0.9720    0.9360    0.9537       297
           6     0.7191    0.5424    0.6184       236
           7     0.9785    0.7948    0.8771       229
           8     0.8592    0.7052    0.7746       173
           9     0.6727    0.7070    0.6894       157
          10     0.6697    0.5794    0.6213       126

    accuracy                         0.8278      3281
   macro avg     0.8188    0.7889    0.7999      3281
weighted avg     0.8302    0.8278    0.8257      3281


100%|██████████| 820/820 [07:19<00:00,  1.87it/s]
100%|██████████| 206/206 [00:38<00:00,  5.32it/s]


Fold 1, Epoch 3, Training Loss: 0.3327229736440974, Train F1 Score: 0.881946492728307, Validation Loss: 0.4870942801020099, Validation F1 Score: 0.8419484325904157
Classification Report:
              precision    recall  f1-score   support

           0     0.9541    0.8652    0.9075       601
           1     0.6393    0.8775    0.7397       400
           2     0.8549    0.9947    0.9195       379
           3     0.9392    0.8348    0.8839       333
           4     0.8434    0.9543    0.8954       350
           5     0.9717    0.9259    0.9483       297
           6     0.6725    0.6525    0.6624       236
           7     0.9757    0.8777    0.9241       229
           8     0.9098    0.6994    0.7908       173
           9     0.7385    0.6115    0.6690       157
          10     0.7945    0.4603    0.5829       126

    accuracy                         0.8427      3281
   macro avg     0.8449    0.7958    0.8112      3281
weighted avg     0.8550    0.8427    0.8419      3281



100%|██████████| 820/820 [07:19<00:00,  1.87it/s]
100%|██████████| 206/206 [00:38<00:00,  5.32it/s]


Fold 1, Epoch 4, Training Loss: 0.23869487702585301, Train F1 Score: 0.9120698271191369, Validation Loss: 0.45558194863865403, Validation F1 Score: 0.8513424728234986
Classification Report:
              precision    recall  f1-score   support

           0     0.9203    0.8835    0.9015       601
           1     0.7211    0.8275    0.7707       400
           2     0.8549    0.9947    0.9195       379
           3     0.8434    0.9219    0.8809       333
           4     0.9204    0.8257    0.8705       350
           5     0.9724    0.9495    0.9608       297
           6     0.7215    0.6695    0.6945       236
           7     0.9948    0.8428    0.9125       229
           8     0.9172    0.7688    0.8365       173
           9     0.6974    0.6752    0.6861       157
          10     0.6825    0.6825    0.6825       126

    accuracy                         0.8513      3281
   macro avg     0.8405    0.8220    0.8287      3281
weighted avg     0.8563    0.8513    0.8513      328

100%|██████████| 820/820 [07:19<00:00,  1.87it/s]
100%|██████████| 206/206 [00:38<00:00,  5.32it/s]


Fold 1, Epoch 5, Training Loss: 0.18657157706528357, Train F1 Score: 0.9298630493915436, Validation Loss: 0.4654074679857834, Validation F1 Score: 0.8600159448253785
Classification Report:
              precision    recall  f1-score   support

           0     0.9350    0.8852    0.9094       601
           1     0.7829    0.8025    0.7926       400
           2     0.8549    0.9947    0.9195       379
           3     0.8407    0.9189    0.8780       333
           4     0.9099    0.8371    0.8720       350
           5     0.9856    0.9226    0.9530       297
           6     0.7634    0.7246    0.7435       236
           7     0.9756    0.8734    0.9217       229
           8     0.8305    0.8497    0.8400       173
           9     0.6556    0.7516    0.7003       157
          10     0.7387    0.6508    0.6920       126

    accuracy                         0.8598      3281
   macro avg     0.8430    0.8374    0.8384      3281
weighted avg     0.8636    0.8598    0.8600      3281

100%|██████████| 820/820 [07:19<00:00,  1.87it/s]
100%|██████████| 206/206 [00:38<00:00,  5.30it/s]


Fold 1, Epoch 6, Training Loss: 0.16206603715900422, Train F1 Score: 0.937165840150146, Validation Loss: 0.479943415844865, Validation F1 Score: 0.8599336896275892
Classification Report:
              precision    recall  f1-score   support

           0     0.8954    0.9118    0.9035       601
           1     0.7558    0.8200    0.7866       400
           2     0.8529    0.9947    0.9184       379
           3     0.8750    0.8829    0.8789       333
           4     0.9099    0.8371    0.8720       350
           5     0.9823    0.9327    0.9568       297
           6     0.7674    0.6992    0.7317       236
           7     0.9398    0.8865    0.9124       229
           8     0.9301    0.7688    0.8418       173
           9     0.7312    0.7452    0.7382       157
          10     0.7395    0.6984    0.7184       126

    accuracy                         0.8604      3281
   macro avg     0.8527    0.8343    0.8417      3281
weighted avg     0.8627    0.8604    0.8599      3281



100%|██████████| 820/820 [07:19<00:00,  1.87it/s]
100%|██████████| 206/206 [00:38<00:00,  5.32it/s]


Fold 1, Epoch 7, Training Loss: 0.13976866281534567, Train F1 Score: 0.9416131657176571, Validation Loss: 0.5591911187039991, Validation F1 Score: 0.8507877696233724
Classification Report:
              precision    recall  f1-score   support

           0     0.9577    0.8669    0.9100       601
           1     0.7794    0.7775    0.7785       400
           2     0.8510    0.9947    0.9173       379
           3     0.8555    0.8889    0.8719       333
           4     0.8824    0.8571    0.8696       350
           5     0.9721    0.9394    0.9555       297
           6     0.7524    0.6695    0.7085       236
           7     0.9755    0.8690    0.9192       229
           8     0.8980    0.7630    0.8250       173
           9     0.5369    0.8344    0.6534       157
          10     0.6923    0.6429    0.6667       126

    accuracy                         0.8488      3281
   macro avg     0.8321    0.8276    0.8250      3281
weighted avg     0.8595    0.8488    0.8508      3281

100%|██████████| 820/820 [07:19<00:00,  1.87it/s]
100%|██████████| 206/206 [00:38<00:00,  5.30it/s]


Fold 1, Epoch 8, Training Loss: 0.13478146921786502, Train F1 Score: 0.9454087669701043, Validation Loss: 0.5114318522007557, Validation F1 Score: 0.8613763163371607
Classification Report:
              precision    recall  f1-score   support

           0     0.9586    0.8869    0.9213       601
           1     0.7443    0.8225    0.7815       400
           2     0.8529    0.9947    0.9184       379
           3     0.8709    0.8709    0.8709       333
           4     0.9468    0.8143    0.8756       350
           5     0.9073    0.9562    0.9311       297
           6     0.7240    0.7669    0.7449       236
           7     0.9757    0.8777    0.9241       229
           8     0.9627    0.7457    0.8404       173
           9     0.6789    0.8217    0.7435       157
          10     0.7456    0.6746    0.7083       126

    accuracy                         0.8604      3281
   macro avg     0.8516    0.8393    0.8418      3281
weighted avg     0.8685    0.8604    0.8614      3281

100%|██████████| 820/820 [07:18<00:00,  1.87it/s]
100%|██████████| 206/206 [00:38<00:00,  5.29it/s]


Fold 1, Epoch 9, Training Loss: 0.1275360457448675, Train F1 Score: 0.9466902144760074, Validation Loss: 0.5546515698666747, Validation F1 Score: 0.8649728268776405
Classification Report:
              precision    recall  f1-score   support

           0     0.9347    0.9052    0.9197       601
           1     0.7574    0.8350    0.7943       400
           2     0.8529    0.9947    0.9184       379
           3     0.9888    0.7988    0.8837       333
           4     0.8524    0.9571    0.9017       350
           5     0.9788    0.9327    0.9552       297
           6     0.6604    0.7500    0.7024       236
           7     0.9758    0.8821    0.9266       229
           8     0.8704    0.8150    0.8418       173
           9     0.7842    0.6943    0.7365       157
          10     0.8000    0.6032    0.6878       126

    accuracy                         0.8650      3281
   macro avg     0.8596    0.8335    0.8426      3281
weighted avg     0.8717    0.8650    0.8650      3281


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/graphcodebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 821/821 [07:18<00:00,  1.87it/s]
100%|██████████| 205/205 [00:38<00:00,  5.28it/s]


Fold 2, Epoch 1, Training Loss: 0.8519133973728307, Train F1 Score: 0.7082612972164848, Validation Loss: 0.5459344179255934, Validation F1 Score: 0.8056450402051439
Classification Report:
              precision    recall  f1-score   support

           0     0.8429    0.9148    0.8774       575
           1     0.6976    0.6709    0.6840       471
           2     0.8832    0.9918    0.9344       366
           3     0.9431    0.8103    0.8717       348
           4     0.8711    0.9228    0.8963       337
           5     0.9518    0.9338    0.9427       317
           6     0.5337    0.4826    0.5068       230
           7     0.9415    0.8655    0.9019       223
           8     0.9310    0.6279    0.7500       172
           9     0.4106    0.5041    0.4526       123
          10     0.5172    0.6356    0.5703       118

    accuracy                         0.8058      3280
   macro avg     0.7749    0.7600    0.7625      3280
weighted avg     0.8123    0.8058    0.8056      3280


100%|██████████| 821/821 [07:18<00:00,  1.87it/s]
100%|██████████| 205/205 [00:38<00:00,  5.30it/s]


Fold 2, Epoch 2, Training Loss: 0.4591962486192421, Train F1 Score: 0.8404552919065931, Validation Loss: 0.44717678149435214, Validation F1 Score: 0.843055699564872
Classification Report:
              precision    recall  f1-score   support

           0     0.9072    0.9009    0.9040       575
           1     0.7326    0.8259    0.7764       471
           2     0.9647    0.8962    0.9292       366
           3     0.7500    0.9914    0.8540       348
           4     0.9791    0.8338    0.9006       337
           5     0.9522    0.9432    0.9477       317
           6     0.7418    0.5870    0.6553       230
           7     0.9474    0.8879    0.9167       223
           8     0.9912    0.6512    0.7860       172
           9     0.5500    0.6260    0.5856       123
          10     0.6090    0.6864    0.6454       118

    accuracy                         0.8424      3280
   macro avg     0.8296    0.8027    0.8092      3280
weighted avg     0.8550    0.8424    0.8431      3280


100%|██████████| 821/821 [07:18<00:00,  1.87it/s]
100%|██████████| 205/205 [00:38<00:00,  5.33it/s]


Fold 2, Epoch 3, Training Loss: 0.3255231510572721, Train F1 Score: 0.883174010636217, Validation Loss: 0.43445151470283544, Validation F1 Score: 0.8566372170746874
Classification Report:
              precision    recall  f1-score   support

           0     0.9619    0.8783    0.9182       575
           1     0.7732    0.7962    0.7845       471
           2     0.8835    0.9945    0.9357       366
           3     0.8837    0.9167    0.8999       348
           4     0.9481    0.8665    0.9054       337
           5     0.9210    0.9558    0.9381       317
           6     0.7500    0.6391    0.6901       230
           7     0.9756    0.8969    0.9346       223
           8     0.7421    0.8198    0.7790       172
           9     0.5200    0.7398    0.6107       123
          10     0.7447    0.5932    0.6604       118

    accuracy                         0.8558      3280
   macro avg     0.8276    0.8270    0.8233      3280
weighted avg     0.8625    0.8558    0.8566      3280


100%|██████████| 821/821 [07:18<00:00,  1.87it/s]
100%|██████████| 205/205 [00:38<00:00,  5.34it/s]


Fold 2, Epoch 4, Training Loss: 0.22735190511841086, Train F1 Score: 0.9155760937129688, Validation Loss: 0.4594534876029484, Validation F1 Score: 0.8519602727076416
Classification Report:
              precision    recall  f1-score   support

           0     0.8917    0.9165    0.9039       575
           1     0.7627    0.7643    0.7635       471
           2     0.8835    0.9945    0.9357       366
           3     0.8486    0.9339    0.8892       348
           4     0.9757    0.8338    0.8992       337
           5     0.9766    0.9211    0.9481       317
           6     0.6190    0.7913    0.6947       230
           7     0.9434    0.8969    0.9195       223
           8     0.9225    0.7616    0.8344       172
           9     0.7284    0.4797    0.5784       123
          10     0.7170    0.6441    0.6786       118

    accuracy                         0.8527      3280
   macro avg     0.8426    0.8125    0.8223      3280
weighted avg     0.8581    0.8527    0.8520      3280

100%|██████████| 821/821 [07:18<00:00,  1.87it/s]
100%|██████████| 205/205 [00:38<00:00,  5.32it/s]


Fold 2, Epoch 5, Training Loss: 0.17572569067833263, Train F1 Score: 0.93280473117629, Validation Loss: 0.4886361360510175, Validation F1 Score: 0.8542296105679048
Classification Report:
              precision    recall  f1-score   support

           0     0.9022    0.9148    0.9085       575
           1     0.7960    0.7622    0.7787       471
           2     0.8835    0.9945    0.9357       366
           3     0.8715    0.8966    0.8839       348
           4     0.9522    0.8279    0.8857       337
           5     0.9734    0.9243    0.9482       317
           6     0.7431    0.7043    0.7232       230
           7     0.9393    0.9013    0.9199       223
           8     0.9058    0.7267    0.8065       172
           9     0.5535    0.7154    0.6241       123
          10     0.5752    0.7458    0.6494       118

    accuracy                         0.8527      3280
   macro avg     0.8269    0.8285    0.8240      3280
weighted avg     0.8603    0.8527    0.8542      3280



100%|██████████| 821/821 [07:18<00:00,  1.87it/s]
100%|██████████| 205/205 [00:38<00:00,  5.33it/s]


Fold 2, Epoch 6, Training Loss: 0.16213562027817813, Train F1 Score: 0.9354103199628694, Validation Loss: 0.4780572531392752, Validation F1 Score: 0.8649766695561756
Classification Report:
              precision    recall  f1-score   support

           0     0.9348    0.8974    0.9157       575
           1     0.7762    0.8174    0.7963       471
           2     0.8835    0.9945    0.9357       366
           3     0.9412    0.8276    0.8807       348
           4     0.8534    0.9674    0.9068       337
           5     0.9608    0.9274    0.9438       317
           6     0.7321    0.7130    0.7225       230
           7     0.9573    0.9058    0.9309       223
           8     0.9205    0.8081    0.8607       172
           9     0.5887    0.6748    0.6288       123
          10     0.7677    0.6441    0.7005       118

    accuracy                         0.8649      3280
   macro avg     0.8469    0.8343    0.8384      3280
weighted avg     0.8687    0.8649    0.8650      3280

100%|██████████| 821/821 [07:18<00:00,  1.87it/s]
100%|██████████| 205/205 [00:38<00:00,  5.31it/s]


Fold 2, Epoch 7, Training Loss: 0.14035190296008837, Train F1 Score: 0.9425778319302163, Validation Loss: 0.5103077234779825, Validation F1 Score: 0.8648692100114824
Classification Report:
              precision    recall  f1-score   support

           0     0.9264    0.8974    0.9117       575
           1     0.7996    0.8047    0.8021       471
           2     0.8835    0.9945    0.9357       366
           3     0.8465    0.9511    0.8958       348
           4     0.9752    0.8160    0.8885       337
           5     0.9705    0.9338    0.9518       317
           6     0.7467    0.7304    0.7385       230
           7     0.9751    0.8789    0.9245       223
           8     0.9844    0.7326    0.8400       172
           9     0.5641    0.7154    0.6308       123
          10     0.6174    0.7797    0.6891       118

    accuracy                         0.8631      3280
   macro avg     0.8445    0.8395    0.8371      3280
weighted avg     0.8732    0.8631    0.8649      3280

100%|██████████| 821/821 [07:18<00:00,  1.87it/s]
100%|██████████| 205/205 [00:38<00:00,  5.33it/s]


Fold 2, Epoch 8, Training Loss: 0.13846697963362425, Train F1 Score: 0.9446267241720505, Validation Loss: 0.5349145428351376, Validation F1 Score: 0.8575759205891526
Classification Report:
              precision    recall  f1-score   support

           0     0.9277    0.8922    0.9096       575
           1     0.7719    0.8047    0.7879       471
           2     0.8835    0.9945    0.9357       366
           3     0.9583    0.7931    0.8679       348
           4     0.8541    0.9555    0.9020       337
           5     0.9371    0.9401    0.9386       317
           6     0.7143    0.6739    0.6935       230
           7     0.9486    0.9103    0.9291       223
           8     0.9013    0.7965    0.8457       172
           9     0.6000    0.6829    0.6388       123
          10     0.6949    0.6949    0.6949       118

    accuracy                         0.8576      3280
   macro avg     0.8356    0.8308    0.8312      3280
weighted avg     0.8614    0.8576    0.8576      3280

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/graphcodebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 821/821 [07:18<00:00,  1.87it/s]
100%|██████████| 205/205 [00:38<00:00,  5.32it/s]


Fold 3, Epoch 1, Training Loss: 0.8726886275397947, Train F1 Score: 0.6998979257801746, Validation Loss: 0.5513172718855303, Validation F1 Score: 0.8050283820769452
Classification Report:
              precision    recall  f1-score   support

           0     0.9384    0.8562    0.8954       640
           1     0.6504    0.7277    0.6869       404
           2     0.8865    1.0000    0.9398       328
           3     0.8443    0.9115    0.8766       339
           4     0.8964    0.8886    0.8925       341
           5     0.9631    0.9099    0.9357       344
           6     0.5145    0.3184    0.3934       223
           7     0.8675    0.9062    0.8865       224
           8     0.8357    0.7178    0.7723       163
           9     0.4129    0.7171    0.5240       152
          10     0.7536    0.4262    0.5445       122

    accuracy                         0.8070      3280
   macro avg     0.7785    0.7618    0.7589      3280
weighted avg     0.8162    0.8070    0.8050      3280


100%|██████████| 821/821 [07:18<00:00,  1.87it/s]
100%|██████████| 205/205 [00:38<00:00,  5.34it/s]


Fold 3, Epoch 2, Training Loss: 0.48341076263486343, Train F1 Score: 0.8227592189457311, Validation Loss: 0.4609246354129892, Validation F1 Score: 0.83665047249368
Classification Report:
              precision    recall  f1-score   support

           0     0.8998    0.8984    0.8991       640
           1     0.7381    0.7673    0.7524       404
           2     0.8865    1.0000    0.9398       328
           3     0.8243    0.9410    0.8788       339
           4     0.9154    0.8563    0.8848       341
           5     0.9750    0.9070    0.9398       344
           6     0.6899    0.4888    0.5722       223
           7     0.8443    0.9196    0.8803       224
           8     0.8472    0.7485    0.7948       163
           9     0.6402    0.6908    0.6646       152
          10     0.6609    0.6230    0.6414       122

    accuracy                         0.8396      3280
   macro avg     0.8110    0.8037    0.8044      3280
weighted avg     0.8387    0.8396    0.8367      3280



100%|██████████| 821/821 [07:18<00:00,  1.87it/s]
100%|██████████| 205/205 [00:38<00:00,  5.33it/s]


Fold 3, Epoch 3, Training Loss: 0.3446307443876396, Train F1 Score: 0.8724778538831522, Validation Loss: 0.41294306650439777, Validation F1 Score: 0.8519607397403344
Classification Report:
              precision    recall  f1-score   support

           0     0.9213    0.8969    0.9089       640
           1     0.7279    0.8342    0.7774       404
           2     0.8859    0.9939    0.9368       328
           3     0.9408    0.8437    0.8896       339
           4     0.8320    0.9443    0.8846       341
           5     0.9544    0.9128    0.9331       344
           6     0.6457    0.6457    0.6457       223
           7     0.9266    0.9018    0.9140       224
           8     0.8313    0.8160    0.8235       163
           9     0.7913    0.5987    0.6816       152
          10     0.7778    0.5738    0.6604       122

    accuracy                         0.8534      3280
   macro avg     0.8395    0.8147    0.8233      3280
weighted avg     0.8559    0.8534    0.8520      3280

100%|██████████| 821/821 [07:18<00:00,  1.87it/s]
100%|██████████| 205/205 [00:38<00:00,  5.33it/s]


Fold 3, Epoch 4, Training Loss: 0.2548132999828505, Train F1 Score: 0.9050089384947961, Validation Loss: 0.4360869914861169, Validation F1 Score: 0.8635188464692448
Classification Report:
              precision    recall  f1-score   support

           0     0.9465    0.8844    0.9144       640
           1     0.7275    0.8787    0.7960       404
           2     0.8865    1.0000    0.9398       328
           3     0.8902    0.9086    0.8993       339
           4     0.8714    0.8944    0.8828       341
           5     0.9669    0.9331    0.9497       344
           6     0.7259    0.6413    0.6810       223
           7     0.9484    0.9018    0.9245       224
           8     0.8408    0.8098    0.8250       163
           9     0.7152    0.7105    0.7129       152
          10     0.8718    0.5574    0.6800       122

    accuracy                         0.8646      3280
   macro avg     0.8537    0.8291    0.8368      3280
weighted avg     0.8684    0.8646    0.8635      3280


100%|██████████| 821/821 [07:18<00:00,  1.87it/s]
100%|██████████| 205/205 [00:38<00:00,  5.34it/s]


Fold 3, Epoch 5, Training Loss: 0.19556705185581436, Train F1 Score: 0.9239765516048397, Validation Loss: 0.451563662215613, Validation F1 Score: 0.8657184637927846
Classification Report:
              precision    recall  f1-score   support

           0     0.9384    0.8812    0.9089       640
           1     0.7168    0.8960    0.7965       404
           2     0.8839    0.9512    0.9163       328
           3     0.9792    0.8348    0.9013       339
           4     0.8346    0.9619    0.8937       341
           5     0.9695    0.9244    0.9464       344
           6     0.7525    0.6816    0.7153       223
           7     0.9364    0.9196    0.9279       224
           8     0.8986    0.8160    0.8553       163
           9     0.7081    0.7500    0.7284       152
          10     0.8500    0.5574    0.6733       122

    accuracy                         0.8659      3280
   macro avg     0.8607    0.8340    0.8421      3280
weighted avg     0.8736    0.8659    0.8657      3280


 25%|██▌       | 207/821 [01:50<05:28,  1.87it/s]


KeyboardInterrupt: 

In [None]:
# 검증 세트를 이용한 모델 평가
model.eval()
total_eval_accuracy = 0
for batch in tqdm(val_loader):
    batch = {k: v.to(device) for k, v in batch.items()}
    with torch.no_grad():
        outputs = model(**batch)

    logits = outputs.logits
    predictions = torch.argmax(logits, dim=-1)
    labels = batch["labels"]

    # 정확도 계산
    accuracy = (predictions == labels).cpu().numpy().mean() * 100
    total_eval_accuracy += accuracy

# 에폭당 평균 검증 정확도 계산
avg_val_accuracy = total_eval_accuracy / len(val_loader)
print(f"Validation Accuracy: {avg_val_accuracy:.2f}%")