In [1]:
from scipy.stats import loguniform
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import itertools
from itertools import permutations
import torch
import torch.nn as nn
from transformers import (
    AutoTokenizer,
    AutoModel,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    EarlyStoppingCallback
)
import torch
import torch.nn as nn

In [2]:
import pandas as pd
import re

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# %cd /content/drive/MyDrive/강의/AI를 위한 딥러닝/AI_DL_Project/code

In [4]:
# 데이터 로드
train_path = pd.read_csv('/content/drive/MyDrive/AI를 위한 딥러닝/team_project/train_extended.csv')
test_df = pd.read_csv('/content/drive/MyDrive/AI를 위한 딥러닝/team_project/test.csv')
submission_path = pd.read_csv('/content/drive/MyDrive/AI를 위한 딥러닝/team_project/sample_submission.csv')

In [None]:
# submission_path = pd.read_csv(BASE_DIR + "sample_submission.csv")

In [5]:
submission_path.info()
submission_path.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1780 entries, 0 to 1779
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   ID        1780 non-null   object
 1   answer_0  1780 non-null   int64 
 2   answer_1  1780 non-null   int64 
 3   answer_2  1780 non-null   int64 
 4   answer_3  1780 non-null   int64 
dtypes: int64(4), object(1)
memory usage: 69.7+ KB


Unnamed: 0,ID,answer_0,answer_1,answer_2,answer_3
0,TEST_0000,0,1,2,3
1,TEST_0001,0,1,2,3
2,TEST_0002,0,1,2,3
3,TEST_0003,0,1,2,3
4,TEST_0004,0,1,2,3


In [6]:
# 텍스트 정제
def clean_text(text):
  # 특수문자 제거
  text = re.sub(r'[^\w\s]', '', text)
  # 소문자 변환: 한글에는 무의미
  text = text.lower()
  # 불필요한 공백 제거
  text = ' '.join(text.split())
  return text

In [7]:
# 텍스트 정제
for i in range(4):
    train_path[f'sentence_{i}'] = train_path[f'sentence_{i}'].apply(clean_text)
    test_df[f'sentence_{i}'] = test_df[f'sentence_{i}'].apply(clean_text)

In [8]:
# data_utils.py
import pandas as pd
import numpy as np

def load_data(train_path, test_path):
    train = pd.read_csv(train_path)
    test = pd.read_csv(test_path)
    return train, test

def make_labels(df):
    # answer_0 ~ answer_3 → [문장0은 몇 번째, 문장1은 몇 번째, ...]
    answers = df[[f'answer_{i}' for i in range(4)]].values
    labels = []
    for row in answers:
        label = [0]*4
        for pos, sent_idx in enumerate(row):
            label[sent_idx] = pos
        labels.append(label)
    return np.array(labels)

# Dataset 클래스
 4개의 문장을 [SEP]로 묶어서 BERT에 넣을 수 있게 바꿔줌

In [9]:
# ✅ 1. 데이터셋 클래스
from torch.utils.data import Dataset

class GlobalOrderDataset(Dataset):
    def __init__(self, df, tokenizer, labels=None, max_length=256):
        self.sentences = df[[f'sentence_{i}' for i in range(4)]].values
        self.tokenizer = tokenizer
        self.labels = labels
        self.max_length = max_length

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):
        sents = self.sentences[idx]
        text = '[CLS] ' + ' [SEP] '.join(sents) + ' [SEP]'
        encoding = self.tokenizer(
            text,
            padding='max_length',
            truncation=True,
            max_length=self.max_length,
            return_tensors='pt'
        )
        item = {k: v.squeeze(0) for k, v in encoding.items()}

        # labels가 None이 아닐 때만 labels 추가
        if self.labels is not None:
            item['labels'] = torch.tensor(self.labels[idx], dtype=torch.long)

        return item

# Model 클래스
- AutoModel (예: Roberta) 사용
- 문장 4개를 넣었을 때 그 순서를 예측
- 출력은 [batch, 4, 4] 크기의 행렬 → 각 문장이 어떤 위치에 있어야 하는지 예측

In [10]:
class GlobalOrderModel(nn.Module):
    def __init__(self, model_name='klue/roberta-large'):
        super().__init__()
        self.bert = AutoModel.from_pretrained(model_name)
        hidden_size = self.bert.config.hidden_size

        # 더 깊은 분류기 구조
        self.classifier = nn.Sequential(
            nn.Linear(hidden_size, 1024),
            nn.LayerNorm(1024),  # LayerNorm 추가
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(1024, 1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(1024, 4 * 4)  # 4문장 * 4 클래스
        )

    def forward(self, input_ids, attention_mask, labels=None):
        # BERT 인코딩
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)

        # [CLS] 토큰과 마지막 hidden state의 평균을 사용
        cls_token = outputs.last_hidden_state[:, 0]  # [CLS] 토큰
        last_hidden = outputs.last_hidden_state[:, 1:].mean(dim=1)  # 나머지 토큰들의 평균
        pooled = (cls_token + last_hidden) / 2  # 두 특징의 평균

        # 분류
        logits = self.classifier(pooled)

        if labels is not None:
            loss_fn = nn.CrossEntropyLoss()
            loss = loss_fn(logits.view(-1, 4), labels.view(-1))
            return {"loss": loss, "logits": logits.view(-1, 4, 4)}
        else:
            return {"logits": logits.view(-1, 4, 4)}

In [11]:
def compute_metrics(eval_pred):
    try:
        logits, labels = eval_pred

        preds = np.argmax(logits, axis=2)
        sentence_accuracy = (preds == labels).mean()
        full_order_accuracy = (preds == labels).all(axis=1).mean()

        return {
            "sentence_accuracy": sentence_accuracy,
            "full_order_accuracy": full_order_accuracy
        }
    except Exception as e:
        print(f"❌ compute_metrics 내부 오류: {e}")
        return {}


In [12]:
model = GlobalOrderModel(model_name="klue/roberta-large")  # ✅ RoBERTa로 교체
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # ✅ GPU 사용 여부 확인
model.to(device)  # ✅ 모델을 해당 디바이스로 이동

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/547 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.35G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


GlobalOrderModel(
  (bert): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(32000, 1024, padding_idx=1)
      (position_embeddings): Embedding(514, 1024, padding_idx=1)
      (token_type_embeddings): Embedding(1, 1024)
      (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-23): 24 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=1024, out_features=1024, bias=True)
              (key): Linear(in_features=1024, out_features=1024, bias=True)
              (value): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=1024, out_features=1024, bias=True)
              (L

In [13]:
from transformers import AutoTokenizer

# ✅ RoBERTa 전용 tokenizer 사용
tokenizer = AutoTokenizer.from_pretrained("klue/roberta-large")

tokenizer_config.json:   0%|          | 0.00/375 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/248k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/752k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/173 [00:00<?, ?B/s]

In [14]:
# ✅ train/val 분리 (20% → 검증에 사용)
train_df_split, val_df = train_test_split(train_path, test_size=0.2, random_state=42)

# ✅ 라벨 생성
train_labels = make_labels(train_df_split)
val_labels = make_labels(val_df)

In [15]:
# 🔹 학습 데이터셋 생성
train_dataset = GlobalOrderDataset(train_df_split, tokenizer, labels=train_labels)
val_dataset = GlobalOrderDataset(val_df, tokenizer, labels=val_labels)

In [16]:
train_dataset[0]

{'input_ids': tensor([    0,     0,  3857,  7285,  4206,  3794,  3747,  2170,  3844,  2530,
          6233,  2525,  3773,  2079,  4901,  2047,  2145,  5068, 11604,  7594,
          2897,  2062,     2,  7655,  7246,  2079, 18309,  2145,  3828,  2125,
          3979,  2069,  4146,  2085,   904,  3892, 31221,  4253, 25052,  2116,
          5588,  2125, 28674,     2,  3983,  7655,  4253, 25052,  2259,  3857,
          2145,  2079,  4203,  2069,  3644,  3940,  4021,  2651,  1295,  1513,
          2062,     2,  1504,  3747, 27135,  3857,  3844,  2259,  5767,  2047,
          2069,  4392,  2088,  3634,  3647,  2079,  4301,  2138,  4523,  2205,
          2259,  3748,  2470,  4008,  2069,  3605,     2,     2,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,   

In [17]:
val_dataset[0]

{'input_ids': tensor([    0,     0, 13025,  4331,  4119,  4525,  2170,  2259,  4262, 31221,
          4254,  6087,  2116,  5588,  2125, 28674,     2,  4331,  5189,  2079,
          4040,  2470, 10291,  2069,  3627, 13149,  2267,  3726,  2052,  3677,
          2205,  2062,     2,  4178,  6233,  4115, 31221,  4119,  4036,  2178,
          2113,  2522, 13197,  2116,  3838,  2079,  5550,  2069,  4651,  4538,
             2, 15259, 10455, 31221, 23731,  2259,  4295,  2522, 12263,   545,
          2079,  8262,  2470,  4740,  2069,  3691,  4538,     2,     2,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,   

In [16]:
import os
os.environ["WANDB_DISABLED"] = "true"

from transformers import AutoTokenizer, Trainer, TrainingArguments

# ✅ RoBERTa tokenizer
tokenizer = AutoTokenizer.from_pretrained("klue/roberta-large")

# 🔹 전체 라벨 생성
labels = make_labels(train_path)

# ✅ TrainingArguments
training_args = TrainingArguments(
    output_dir="./global_results",
    num_train_epochs=3,
    learning_rate=5e-5,
    per_device_train_batch_size=128,
    per_device_eval_batch_size=256,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_dir="./logs",
    logging_steps=100,
    warmup_steps=500,
    weight_decay=0.01,

    load_best_model_at_end=True,
    metric_for_best_model='full_order_accuracy',
    greater_is_better=True,
    seed=42,
    fp16=True
)

# ✅ Trainer 설정 (전체 학습용)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2, early_stopping_threshold=0.001)]
)

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


# 학습 실행

In [17]:
trainer.train()

Epoch,Training Loss,Validation Loss,Sentence Accuracy,Full Order Accuracy
1,0.0227,0.013649,0.997818,0.995494
2,0.0039,0.002317,0.999787,0.999575
3,0.0013,0.00079,1.0,1.0


TrainOutput(global_step=3309, training_loss=0.12037540766266708, metrics={'train_runtime': 2167.978, 'train_samples_per_second': 195.305, 'train_steps_per_second': 1.526, 'total_flos': 0.0, 'train_loss': 0.12037540766266708, 'epoch': 3.0})

In [18]:
import shutil

checkpoints = ["checkpoint-1103", "checkpoint-2206", "checkpoint-3309"]
for ckpt in checkpoints:
    shutil.rmtree(f"/content/global_results/{ckpt}", ignore_errors=True)


In [19]:
# ✅ best checkpoint 기준으로 모델 저장
save_path = "/content/global_results/best_model"

trainer.save_model(save_path)
tokenizer.save_pretrained(save_path)

('/content/global_results/best_model/tokenizer_config.json',
 '/content/global_results/best_model/special_tokens_map.json',
 '/content/global_results/best_model/vocab.txt',
 '/content/global_results/best_model/added_tokens.json',
 '/content/global_results/best_model/tokenizer.json')

# 튜닝

In [42]:
# ✅ 튜닝용 데이터 분리
train_split_df, val_df = train_test_split(train_path, test_size=0.2, random_state=42)
train_split_labels = make_labels(train_split_df)
val_labels = make_labels(val_df)

train_split_dataset = GlobalOrderDataset(train_split_df, tokenizer, labels=train_split_labels)
val_dataset = GlobalOrderDataset(val_df, tokenizer, labels=val_labels)

In [43]:
from transformers import TrainingArguments, Trainer
from scipy.stats import loguniform
import numpy as np
import pandas as pd

In [44]:
import os
import numpy as np
import pandas as pd
import torch
import shutil
from scipy.stats import loguniform
from transformers import AutoTokenizer, Trainer, TrainingArguments, EarlyStoppingCallback

def run_global_tuning(train_split_dataset, val_dataset, tokenizer, n_trials=2):
    results_path = './global_results/tuning_log.csv'
    if os.path.exists(results_path):
        results = pd.read_csv(results_path).to_dict(orient='records')
        start_trial = len(results)
    else:
        results = []
        start_trial = 0

    for trial in range(start_trial, n_trials):
        print(f"\n🎯 Trial {trial + 1} 시작")
        lr = float(loguniform.rvs(1.5e-5, 5.5e-5))
        wd = float(loguniform.rvs(0.01, 0.07))
        epochs = 10
        batch_size = int(np.random.randint(16, 32))
        total_steps = (len(train_split_dataset) // batch_size) * epochs
        warmup = int(total_steps * 0.02)

        args = TrainingArguments(
            output_dir=f'./global_results/trial_{trial+1}',
            learning_rate=lr,
            weight_decay=wd,
            warmup_steps=warmup,
            per_device_train_batch_size=batch_size,
            per_device_eval_batch_size=64,
            num_train_epochs=epochs,
            gradient_accumulation_steps=2,
            lr_scheduler_type='linear',
            logging_dir='./roberta_logs',
            logging_steps=100,
            save_strategy="no",
            eval_strategy='epoch',  # ✅ 평가 활성화
            seed=42,
            load_best_model_at_end=False,
            metric_for_best_model='full_order_accuracy',  # ✅ 이 기준으로 best 선택
            greater_is_better=True,
            report_to='none',
            fp16=True,
            optim='adamw_torch_fused'
        )

        trainer = Trainer(
            model=GlobalOrderModel("klue/roberta-large"),
            args=args,
            train_dataset=train_split_dataset,
            eval_dataset=val_dataset,
            tokenizer=tokenizer,
            compute_metrics=compute_metrics,  # ✅ 사용자 정의 메트릭
            callbacks=[EarlyStoppingCallback(early_stopping_patience=2, early_stopping_threshold=0.001)],
        )

        try:
            trainer.train()

            # 최종 평가
            eval_result = trainer.evaluate()
            sentence_acc = eval_result.get("eval_sentence_accuracy", None)
            full_order_acc = eval_result.get("eval_full_order_accuracy", None)
            eval_loss = eval_result.get("eval_loss", None)

            # 결과만 저장
            results.append({
                'trial': trial + 1,
                'learning_rate': lr,
                'weight_decay': wd,
                'warmup_steps': warmup,
                'epochs': epochs,
                'batch_size' : batch_size,
                'sentence_accuracy': sentence_acc,
                'full_order_accuracy': full_order_acc,
                'eval_loss': eval_loss
            })
            pd.DataFrame(results).to_csv(results_path, index=False)

            print(f"✅ Trial {trial+1} 완료 | learning rate: {lr}, weight decay: {wd}, epochs: {epochs}, batch size: {batch_size}, warmup steps: {warmup}")

        except Exception as e:
            print(f"⛔ Trial {trial+1} 중 오류 발생: {e}")
            continue

    print("\n🏆 상위 Trial:")
    if results:  # results가 비어있지 않은 경우에만 실행
        top_trials = pd.DataFrame(results).sort_values(by="full_order_accuracy", ascending=False).head(1)
        print(top_trials)
        return top_trials
    else:
        print("❌ 성공적으로 완료된 trial이 없습니다.")
        return None

In [45]:
top_trials = run_global_tuning(train_split_dataset, val_dataset, tokenizer, n_trials=5)


🎯 Trial 1 시작


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Sentence Accuracy,Full Order Accuracy
1,1.3355,1.019901,0.515976,0.046227
2,0.8614,0.514628,0.782801,0.503739
3,0.5223,0.408592,0.845513,0.683889
4,0.3644,0.331658,0.887661,0.777022
5,0.2517,0.29878,0.906526,0.813052
6,0.1729,0.322052,0.903807,0.811693
7,0.126,0.308406,0.911455,0.827328
8,0.0681,0.330429,0.911115,0.825969
9,0.0562,0.328819,0.914174,0.834126
10,0.0489,0.323291,0.915704,0.838205


✅ Trial 1 완료 | learning rate: 2.4402456522781148e-05, weight decay: 0.06359848890377333, epochs: 10, batch size: 26, warmup steps: 45

🎯 Trial 2 시작


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Sentence Accuracy,Full Order Accuracy
1,1.3149,1.093676,0.442726,0.031951
2,0.9933,0.509672,0.781101,0.475867
3,0.5324,0.398215,0.85792,0.704283
4,0.356,0.336277,0.89446,0.791298
5,0.2496,0.308498,0.906866,0.817811
6,0.1709,0.327656,0.903127,0.809653
7,0.1206,0.342179,0.906866,0.81985
8,0.0707,0.347477,0.910775,0.825289
9,0.0633,0.338824,0.915704,0.834126
10,0.0518,0.344539,0.914004,0.832087


✅ Trial 2 완료 | learning rate: 2.4402456522781148e-05, weight decay: 0.06359848890377333, epochs: 10, batch size: 26, warmup steps: 45

🎯 Trial 3 시작


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss


KeyboardInterrupt: 

In [None]:
import pandas as pd
import os

# ✅ 튜닝 로그 불러오기
df = pd.read_csv("./global_results/tuning_log.csv")

# ✅ 모델 저장된 trial만 남기기
# df = df[df["model_saved"] == True]

# ✅ 평가 결과가 있는 경우: full_order_accuracy 기준 선택, 없으면 trial 번호 기준
if "full_order_accuracy" in df.columns and df["full_order_accuracy"].notna().any():
    top_trial = df.sort_values("full_order_accuracy", ascending=False).iloc[0]
else:
    top_trial = df.sort_values("trial", ascending=True).iloc[0]  # fallback

# # ✅ 경로 확인
# best_model_path = top_trial["save_path"]
# print(f"🏆 선택된 Best Model 경로: {best_model_path}")

top_trial.head()

In [None]:
# ✅ 모델 로드
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = GlobalOrderModel("klue/roberta-large")

# safetensors 파일 로드
state_dict = load_file(f"{best_model_path}/model.safetensors")
model.load_state_dict(state_dict)
model.to(device)
model.eval()

In [None]:
import shutil

final_model_dir = "./global_results/best_model_custom"
shutil.copytree(best_model_path, final_model_dir, dirs_exist_ok=True)
print(f"📦 최종 best model 저장됨: {final_model_dir}")


# 추론

In [17]:
# 🔹 전체 학습 데이터셋 구성
train_labels = make_labels(train_path)
train_dataset = GlobalOrderDataset(train_path, tokenizer, labels=train_labels)

# 🔹 튜닝 결과에서 best 설정 추출
# (예: top_trial에서 learning_rate, weight_decay 등 가져오기)
args = TrainingArguments(
    output_dir="./global_results/best_model_custom",
    learning_rate=5e-05,
    weight_decay=0.01,
    warmup_steps=500,
    per_device_train_batch_size=128,
    num_train_epochs=3,
    logging_dir='./retrain_logs',
    save_strategy="no",  # ❌ 저장은 수동으로
    eval_strategy="no",
    report_to='none',
    fp16=True
)

trainer = Trainer(
    model=GlobalOrderModel("klue/roberta-large"),
    args=args,
    train_dataset=train_dataset,
    tokenizer=tokenizer
)

# 🔹 재학습
trainer.train()

# 🔹 최종 모델 저장
trainer.save_model("./global_results/final_model")
tokenizer.save_pretrained("./global_results/final_model")

Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Step,Training Loss
500,0.5618
1000,0.0642
1500,0.0246
2000,0.0107
2500,0.0064
3000,0.0027
3500,0.0015
4000,0.0008


('./global_results/final_model/tokenizer_config.json',
 './global_results/final_model/special_tokens_map.json',
 './global_results/final_model/vocab.txt',
 './global_results/final_model/added_tokens.json',
 './global_results/final_model/tokenizer.json')

In [18]:
from safetensors.torch import load_file
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

best_model_path = "./global_results/final_model"  # ← 경로 꼭 이걸로 맞춰주세요

model = GlobalOrderModel("klue/roberta-large")
state_dict = load_file(f"{best_model_path}/model.safetensors")
model.load_state_dict(state_dict)
model.to(device)
model.eval()

Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


GlobalOrderModel(
  (bert): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(32000, 1024, padding_idx=1)
      (position_embeddings): Embedding(514, 1024, padding_idx=1)
      (token_type_embeddings): Embedding(1, 1024)
      (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-23): 24 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=1024, out_features=1024, bias=True)
              (key): Linear(in_features=1024, out_features=1024, bias=True)
              (value): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=1024, out_features=1024, bias=True)
              (L

# 예측 및 저장

In [19]:
# inference.py
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader

In [20]:
def predict(model, test_df, tokenizer, device, batch_size=32):
    test_dataset = GlobalOrderDataset(test_df, tokenizer, labels=None)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    model.eval()
    all_preds = []

    with torch.no_grad():
        for batch in test_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)

            # 모델 출력이 [batch, 4, 4] 형태로 나옴
            # 각 문장별로 4개의 위치에 대한 확률
            logits = model(input_ids, attention_mask)['logits']

            # 각 문장이 어떤 위치에 가야하는지 예측
            # [batch, 4, 4] -> [batch, 4] (각 문장의 최적 위치)
            preds = logits.argmax(-1).cpu().numpy()
            all_preds.append(preds)

    all_preds = np.concatenate(all_preds, axis=0)

    # [문장0은 몇 번째, ...] → [answer_0, answer_1, ...]로 역변환
    answers = []
    for row in all_preds:
        answer = [0] * 4
        for sent_idx, pos in enumerate(row):
            answer[pos] = sent_idx
        answers.append(answer)

    return np.array(answers)

def save_submission(test_df, answers, submission_path, output_path):
    sub = pd.read_csv(submission_path)
    for i in range(4):
        sub[f'answer_{i}'] = answers[:, i]
    sub.to_csv(output_path, index=False)

In [21]:
# 예측 수행
answers = predict(
    model=model,
    test_df=test_df,
    tokenizer=tokenizer,
    device=device,
    batch_size=32
)

# 제출 파일 저장
save_submission(
    test_df=test_df,
    answers=answers,
    submission_path="/content/drive/MyDrive/AI를 위한 딥러닝/team_project/sample_submission.csv",
    output_path="/content/submission.csv"
)

print("✅ submission.csv 저장 완료")

✅ submission.csv 저장 완료
