In [None]:
from google.colab import drive
drive.mount('/content/drive')
import pandas as pd


# 베이스 경로 설정
BASE_DIR = '/content/drive/MyDrive/Dacon_FakeText/'
SAVE_PATH = BASE_DIR + 'data/embeddings/train_concat.npy'

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import os
from tqdm import tqdm

In [None]:
# Test 임베딩 및 ID 로드
X_test = np.load(BASE_DIR + 'data/embeddings/test_concat.npy')
test_df = pd.read_csv(BASE_DIR + 'data/test.csv')
test_ids = test_df['ID'].values

In [None]:
# 모델 정의 (BCE/Focal 동일 구조여야 함)
class MLP(nn.Module):
    def __init__(self, input_dim):
        super(MLP, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_dim, 1024), nn.ReLU(), nn.Dropout(0.4),
            nn.Linear(1024, 512), nn.ReLU(), nn.Dropout(0.4),
            nn.Linear(512, 256), nn.ReLU(), nn.Dropout(0.3),
            nn.Linear(256, 128), nn.ReLU(), nn.Dropout(0.3),
            nn.Linear(128, 64), nn.ReLU(), nn.Dropout(0.2),
            nn.Linear(64, 1)
        )

    def forward(self, x):
        return self.layers(x)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
n_folds = 5

bce_fold_preds = []
focal_fold_preds = []

print("\nFold별 Test 예측 시작...")



✅ Fold별 Test 예측 시작...


In [None]:
# Fold별 예측
for fold in range(1, n_folds + 1):
    print(f"\n=== Fold {fold} 예측 중 ===")

    # BCE 모델 로드
    model_bce = MLP(input_dim=X_test.shape[1]).to(device)
    bce_path = BASE_DIR + f'model/mlp_bce/mlp_fold{fold}.pt'
    model_bce.load_state_dict(torch.load(bce_path, map_location=device))
    model_bce.eval()

    # Focal 모델 로드
    model_focal = MLP(input_dim=X_test.shape[1]).to(device)
    focal_path = BASE_DIR + f'model/mlp_focal/mlp_fold{fold}.pt'
    model_focal.load_state_dict(torch.load(focal_path, map_location=device))
    model_focal.eval()

    bce_preds = []
    focal_preds = []

    with torch.no_grad():
        for i in range(0, len(X_test), 1024):
            batch_x = torch.tensor(X_test[i:i+1024], dtype=torch.float32).to(device)

            # BCE 예측
            bce_out = model_bce(batch_x).squeeze()
            bce_out = torch.sigmoid(bce_out)  # 시그모이드 적용
            bce_preds.extend(bce_out.cpu().numpy())

            # Focal 예측
            focal_out = model_focal(batch_x).squeeze()
            focal_out = torch.sigmoid(focal_out)
            focal_preds.extend(focal_out.cpu().numpy())

    bce_fold_preds.append(np.array(bce_preds))
    focal_fold_preds.append(np.array(focal_preds))

    print(f"Fold {fold} BCE & Focal 예측 완료")


=== Fold 1 예측 중 ===
✅ Fold 1 BCE & Focal 예측 완료

=== Fold 2 예측 중 ===
✅ Fold 2 BCE & Focal 예측 완료

=== Fold 3 예측 중 ===
✅ Fold 3 BCE & Focal 예측 완료

=== Fold 4 예측 중 ===
✅ Fold 4 BCE & Focal 예측 완료

=== Fold 5 예측 중 ===
✅ Fold 5 BCE & Focal 예측 완료


In [None]:
# 각 모델별 fold 앙상블
bce_mean = np.mean(bce_fold_preds, axis=0)
focal_mean = np.mean(focal_fold_preds, axis=0)

# 모델 간 평균 앙상블 (0.5 가중치)
final_preds = 0.7 * bce_mean + 0.3 * focal_mean
print(f"\n✅ 최종 앙상블 예측 완료! Shape: {final_preds.shape}")

# 제출파일 생성
submission = pd.DataFrame({
    'ID': test_ids,
    'generated': final_preds
})

output_path = BASE_DIR + 'output/concat_deep_mlp_4.csv'
submission.to_csv(output_path, index=False, encoding='utf-8')
print(f"\n제출파일 저장 완료: {output_path}")


✅ 최종 앙상블 예측 완료! Shape: (1962,)

✅ 제출파일 저장 완료: /content/drive/MyDrive/Dacon_FakeText/output/concat_deep_mlp_4.csv
