# Baseline 추천 모델
# 1. Most Popular - 인기도 기반 추천
# 2. BPR-MF - Matrix Factorization with BPR Loss (No Graph)

In [1]:
import os
import pickle
import random
from collections import defaultdict, Counter

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn
import torch.nn.functional as F

from tqdm.notebook import tqdm

# 시각화 설정
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
%matplotlib inline

print("=" * 60)
print("환경 설정")
print("=" * 60)
print(f"PyTorch 버전: {torch.__version__}")
print(f"CUDA 사용 가능: {torch.cuda.is_available()}")
print(f"MPS 사용 가능: {torch.backends.mps.is_available()}")
print("=" * 60)

환경 설정
PyTorch 버전: 2.9.0
CUDA 사용 가능: False
MPS 사용 가능: True


In [2]:
# 설정
CONFIG = {
    # 시스템
    'device': 'mps' if torch.backends.mps.is_available() else 'cpu',
    'seed': 42,
    
    # BPR-MF 파라미터 (V5와 동일하게)
    'embedding_dim': 64,
    'learning_rate': 0.0005,
    'weight_decay': 1e-4,
    'batch_size': 512,
    'epochs': 100,
    'patience': 15,
    'neg_ratio': 4,
    'top_k': 10,
    
    # 경로
    'data_dir': '../data',
    'processed_dir': '../data/processed',
    'model_dir': '../models',
    'result_dir': '../results',
}

# Random seed 고정
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
    if torch.backends.mps.is_available():
        torch.mps.manual_seed(seed)

set_seed(CONFIG['seed'])

print("Baseline 설정 완료!")

Baseline 설정 완료!


In [3]:
# V3에서 생성한 전처리 데이터 로드
print("=" * 60)
print("전처리된 데이터 로드")
print("=" * 60)

# ID mappings 로드
with open(os.path.join(CONFIG['processed_dir'], 'id_mappings.pkl'), 'rb') as f:
    mappings = pickle.load(f)

n_users = len(mappings['user_id_map'])
n_items = len(mappings['item_id_map'])

print(f"Users: {n_users}")
print(f"Items: {n_items}")

# Split 데이터 로드
train_df = pd.read_csv(os.path.join(CONFIG['processed_dir'], 'train_split_v3.csv'))
valid_df = pd.read_csv(os.path.join(CONFIG['processed_dir'], 'valid_split_v3.csv'))
test_df = pd.read_csv(os.path.join(CONFIG['processed_dir'], 'test_split_v3.csv'))

print(f"\nTrain: {len(train_df):,}")
print(f"Valid: {len(valid_df):,}")
print(f"Test:  {len(test_df):,}")
print("\n✅ 데이터 로드 완료!")

전처리된 데이터 로드
Users: 668
Items: 10321

Train: 44,542
Valid: 6,123
Test:  13,389

✅ 데이터 로드 완료!


In [4]:
# User-item dict 생성
def create_user_item_dict(df):
    """User별로 상호작용한 item 집합 생성"""
    user_items = defaultdict(set)
    for _, row in df.iterrows():
        user_items[row['user_id']].add(row['item_id'])
    return user_items

train_user_items = create_user_item_dict(train_df)
train_valid_df = pd.concat([train_df, valid_df])
train_valid_user_items = create_user_item_dict(train_valid_df)

print("User-item dictionary 생성 완료!")

User-item dictionary 생성 완료!


## 1. Most Popular Baseline

In [5]:
# Most Popular 모델
class MostPopular:
    """인기도 기반 추천 (가장 많이 상호작용된 아이템)"""
    def __init__(self):
        self.popular_items = None
    
    def fit(self, train_df):
        """학습 데이터에서 인기 아이템 추출"""
        item_counts = Counter(train_df['item_id'].values)
        # 가장 많이 등장한 순서대로 정렬
        self.popular_items = [item for item, count in item_counts.most_common()]
        print(f"Most Popular 모델 학습 완료!")
        print(f"총 {len(self.popular_items)}개 아이템의 인기도 순위 저장")
    
    def recommend(self, user_id, user_items_dict, k=10):
        """User가 이미 본 아이템 제외하고 인기 아이템 추천"""
        exclude_items = user_items_dict[user_id]
        recommendations = []
        
        for item in self.popular_items:
            if item not in exclude_items:
                recommendations.append(item)
            if len(recommendations) >= k:
                break
        
        return recommendations

# Most Popular 학습
mp_model = MostPopular()
mp_model.fit(train_df)

# 테스트 예시
print(f"\n테스트: User 0에게 Top-5 추천")
sample_recs = mp_model.recommend(0, train_user_items, k=5)
print(f"추천 아이템: {sample_recs}")

Most Popular 모델 학습 완료!
총 6398개 아이템의 인기도 순위 저장

테스트: User 0에게 Top-5 추천
추천 아이템: [np.int64(525), np.int64(316), np.int64(230), np.int64(0), np.int64(471)]


In [6]:
# Most Popular 평가 함수
def evaluate_most_popular(model, eval_df, user_items_dict, k=10):
    """Most Popular 모델 평가"""
    precisions, recalls, ndcgs = [], [], []
    
    for user_id, group in eval_df.groupby('user_id'):
        true_items = set(group['item_id'].values)
        
        # Top-k 추천
        top_k_items = model.recommend(user_id, user_items_dict, k=k)
        
        hits = len(set(top_k_items) & true_items)
        
        precision = hits / k
        recall = hits / len(true_items) if len(true_items) > 0 else 0
        
        dcg = sum([1 / np.log2(i + 2) for i, item in enumerate(top_k_items) if item in true_items])
        idcg = sum([1 / np.log2(i + 2) for i in range(min(len(true_items), k))])
        ndcg = dcg / idcg if idcg > 0 else 0
        
        precisions.append(precision)
        recalls.append(recall)
        ndcgs.append(ndcg)
    
    return {
        f'precision@{k}': np.mean(precisions),
        f'recall@{k}': np.mean(recalls),
        f'ndcg@{k}': np.mean(ndcgs),
    }

print("Most Popular 평가 함수 정의 완료!")

Most Popular 평가 함수 정의 완료!


In [7]:
# Most Popular - Validation 평가
print("=" * 60)
print("Most Popular - Validation 평가")
print("=" * 60)

val_metrics = evaluate_most_popular(mp_model, valid_df, train_user_items, k=10)
print(f"\nValidation Recall@10: {val_metrics['recall@10']:.4f}")
print(f"Validation Precision@10: {val_metrics['precision@10']:.4f}")
print(f"Validation NDCG@10: {val_metrics['ndcg@10']:.4f}")

Most Popular - Validation 평가

Validation Recall@10: 0.0650
Validation Precision@10: 0.0425
Validation NDCG@10: 0.0663


In [8]:
# Most Popular - Test 평가 (Train+Valid 사용)
print("=" * 60)
print("Most Popular - Test 평가")
print("=" * 60)

# Train+Valid로 재학습
mp_model_test = MostPopular()
mp_model_test.fit(train_valid_df)

for k in [5, 10, 20]:
    test_metrics = evaluate_most_popular(mp_model_test, test_df, train_valid_user_items, k=k)
    
    print(f"\nTop-{k} 추천:")
    print(f"  Precision@{k}: {test_metrics[f'precision@{k}']:.4f}")
    print(f"  Recall@{k}:    {test_metrics[f'recall@{k}']:.4f}")
    print(f"  NDCG@{k}:      {test_metrics[f'ndcg@{k}']:.4f}")

print("=" * 60)

Most Popular - Test 평가
Most Popular 모델 학습 완료!
총 6736개 아이템의 인기도 순위 저장

Top-5 추천:
  Precision@5: 0.1210
  Recall@5:    0.0490
  NDCG@5:      0.1361

Top-10 추천:
  Precision@10: 0.1037
  Recall@10:    0.0734
  NDCG@10:      0.1324

Top-20 추천:
  Precision@20: 0.0912
  Recall@20:    0.1245
  NDCG@20:      0.1417


## 2. BPR-MF (Matrix Factorization with BPR Loss)

In [9]:
# BPR-MF 모델 (Graph 없는 Matrix Factorization)
class BPRMF(nn.Module):
    """Matrix Factorization with BPR Loss (No Graph)"""
    def __init__(self, n_users, n_items, embedding_dim=64):
        super().__init__()
        
        self.n_users = n_users
        self.n_items = n_items
        self.embedding_dim = embedding_dim
        
        self.user_embedding = nn.Embedding(n_users, embedding_dim)
        self.item_embedding = nn.Embedding(n_items, embedding_dim)
        
        nn.init.normal_(self.user_embedding.weight, std=0.1)
        nn.init.normal_(self.item_embedding.weight, std=0.1)
    
    def forward(self, users, items):
        """User-Item 점수 계산 (내적)"""
        user_emb = self.user_embedding(users)
        item_emb = self.item_embedding(items)
        scores = (user_emb * item_emb).sum(dim=1)
        return scores
    
    def get_all_embeddings(self):
        """모든 embedding 반환 (평가용)"""
        return self.user_embedding.weight, self.item_embedding.weight

print("BPR-MF 모델 정의 완료!")

BPR-MF 모델 정의 완료!


In [10]:
# Negative Sampling
def negative_sampling(df, user_items_dict, n_items, neg_ratio=1):
    """Random negative sampling"""
    pos_users = df['user_id'].values
    pos_items = df['item_id'].values
    
    neg_users = []
    neg_items = []
    
    for user_id, pos_item in zip(pos_users, pos_items):
        user_pos_items = user_items_dict[user_id]
        
        for _ in range(neg_ratio):
            while True:
                neg_item = random.randint(0, n_items - 1)
                if neg_item not in user_pos_items:
                    break
            
            neg_users.append(user_id)
            neg_items.append(neg_item)
    
    return np.array(neg_users), np.array(neg_items)

print("Negative sampling 함수 정의 완료!")

Negative sampling 함수 정의 완료!


In [11]:
# BPR Loss
def bpr_loss(pos_scores, neg_scores, neg_ratio=1):
    """BPR Loss"""
    if neg_ratio > 1:
        batch_size = pos_scores.size(0)
        neg_scores = neg_scores.view(batch_size, neg_ratio)
        pos_scores_expanded = pos_scores.unsqueeze(1).expand_as(neg_scores)
        loss = -torch.log(torch.sigmoid(pos_scores_expanded - neg_scores) + 1e-10).mean()
    else:
        loss = -torch.log(torch.sigmoid(pos_scores - neg_scores) + 1e-10).mean()
    
    return loss

print("BPR Loss 함수 정의 완료!")

BPR Loss 함수 정의 완료!


In [12]:
# BPR-MF 평가 함수
def evaluate_bprmf(model, eval_df, user_items_dict, n_items, k=10, device='cpu'):
    """BPR-MF 모델 평가"""
    model.eval()
    
    with torch.no_grad():
        user_emb, item_emb = model.get_all_embeddings()
        
        precisions, recalls, ndcgs = [], [], []
        
        for user_id, group in eval_df.groupby('user_id'):
            true_items = set(group['item_id'].values)
            exclude_items = user_items_dict[user_id]
            
            user_emb_single = user_emb[user_id].unsqueeze(0)
            scores = torch.matmul(user_emb_single, item_emb.t()).squeeze()
            
            scores_np = scores.cpu().numpy()
            for item_id in exclude_items:
                scores_np[int(item_id)] = -np.inf
            
            top_k_items = np.argsort(scores_np)[-k:][::-1]
            
            hits = len(set(top_k_items) & true_items)
            
            precision = hits / k
            recall = hits / len(true_items) if len(true_items) > 0 else 0
            
            dcg = sum([1 / np.log2(i + 2) for i, item in enumerate(top_k_items) if item in true_items])
            idcg = sum([1 / np.log2(i + 2) for i in range(min(len(true_items), k))])
            ndcg = dcg / idcg if idcg > 0 else 0
            
            precisions.append(precision)
            recalls.append(recall)
            ndcgs.append(ndcg)
    
    return {
        f'precision@{k}': np.mean(precisions),
        f'recall@{k}': np.mean(recalls),
        f'ndcg@{k}': np.mean(ndcgs),
    }

print("BPR-MF 평가 함수 정의 완료!")

BPR-MF 평가 함수 정의 완료!


In [13]:
# BPR-MF Training 함수
def train_bprmf_one_epoch(model, train_df, user_items_dict, n_items, 
                          optimizer, batch_size, neg_ratio, device):
    """1 epoch training"""
    model.train()
    
    train_df_shuffled = train_df.sample(frac=1).reset_index(drop=True)
    
    total_loss = 0
    n_batches = 0
    
    for start_idx in range(0, len(train_df_shuffled), batch_size):
        end_idx = min(start_idx + batch_size, len(train_df_shuffled))
        batch_df = train_df_shuffled.iloc[start_idx:end_idx]
        
        pos_users = torch.tensor(batch_df['user_id'].values, dtype=torch.long).to(device)
        pos_items = torch.tensor(batch_df['item_id'].values, dtype=torch.long).to(device)
        
        neg_users_np, neg_items_np = negative_sampling(
            batch_df, user_items_dict, n_items, neg_ratio
        )
        neg_users = torch.tensor(neg_users_np, dtype=torch.long).to(device)
        neg_items = torch.tensor(neg_items_np, dtype=torch.long).to(device)
        
        pos_scores = model(pos_users, pos_items)
        neg_scores = model(neg_users, neg_items)
        
        loss = bpr_loss(pos_scores, neg_scores, neg_ratio)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        n_batches += 1
    
    return total_loss / n_batches

print("BPR-MF Training 함수 정의 완료!")

BPR-MF Training 함수 정의 완료!


In [14]:
# BPR-MF 학습
bprmf_model = BPRMF(
    n_users=n_users,
    n_items=n_items,
    embedding_dim=CONFIG['embedding_dim']
).to(CONFIG['device'])

optimizer = torch.optim.Adam(
    bprmf_model.parameters(), 
    lr=CONFIG['learning_rate'],
    weight_decay=CONFIG['weight_decay']
)

history = {
    'train_loss': [],
    'valid_recall': [],
}

best_recall = 0
patience_counter = 0

print("=" * 60)
print("BPR-MF Training 시작")
print("=" * 60)
print(f"Model: BPR-MF (Matrix Factorization)")
print(f"  - Users: {n_users}, Items: {n_items}")
print(f"  - Embedding dim: {CONFIG['embedding_dim']}")
total_params = sum(p.numel() for p in bprmf_model.parameters())
print(f"  - Total params: {total_params:,}")
print(f"\nDevice: {CONFIG['device']}")
print(f"Batch size: {CONFIG['batch_size']}")
print(f"Learning rate: {CONFIG['learning_rate']}")
print(f"Negative ratio: {CONFIG['neg_ratio']}")
print("=" * 60)

for epoch in range(CONFIG['epochs']):
    train_loss = train_bprmf_one_epoch(
        bprmf_model, train_df, train_user_items,
        n_items, optimizer, CONFIG['batch_size'], 
        CONFIG['neg_ratio'], CONFIG['device']
    )
    
    val_metrics = evaluate_bprmf(
        bprmf_model, valid_df, train_user_items,
        n_items, k=CONFIG['top_k'], device=CONFIG['device']
    )
    
    history['train_loss'].append(train_loss)
    history['valid_recall'].append(val_metrics[f'recall@{CONFIG["top_k"]}'])
    
    if (epoch + 1) % 5 == 0 or epoch == 0:
        print(f"Epoch {epoch+1:3d}/{CONFIG['epochs']} | "
              f"Loss: {train_loss:.4f} | "
              f"R@{CONFIG['top_k']}: {val_metrics[f'recall@{CONFIG["top_k"]}']:.4f}")
    
    current_recall = val_metrics[f'recall@{CONFIG["top_k"]}']
    if current_recall > best_recall:
        best_recall = current_recall
        patience_counter = 0
        torch.save(bprmf_model.state_dict(), 
                   os.path.join(CONFIG['model_dir'], 'bprmf_best.pth'))
    else:
        patience_counter += 1
    
    if patience_counter >= CONFIG['patience']:
        print(f"\nEarly stopping at epoch {epoch+1}")
        break

print("=" * 60)
print(f"BPR-MF Training 완료!")
print(f"Best Recall@{CONFIG['top_k']}: {best_recall:.4f}")
print("=" * 60)

BPR-MF Training 시작
Model: BPR-MF (Matrix Factorization)
  - Users: 668, Items: 10321
  - Embedding dim: 64
  - Total params: 703,296

Device: mps
Batch size: 512
Learning rate: 0.0005
Negative ratio: 4
Epoch   1/100 | Loss: 0.6944 | R@10: 0.0021
Epoch   5/100 | Loss: 0.6790 | R@10: 0.0148
Epoch  10/100 | Loss: 0.5954 | R@10: 0.0581
Epoch  15/100 | Loss: 0.3897 | R@10: 0.0686
Epoch  20/100 | Loss: 0.3181 | R@10: 0.0662
Epoch  25/100 | Loss: 0.2935 | R@10: 0.0647
Epoch  30/100 | Loss: 0.2803 | R@10: 0.0672

Early stopping at epoch 30
BPR-MF Training 완료!
Best Recall@10: 0.0686


In [15]:
# BPR-MF Test 평가
bprmf_model.load_state_dict(torch.load(os.path.join(CONFIG['model_dir'], 'bprmf_best.pth')))

print("=" * 60)
print("BPR-MF - Test 평가")
print("=" * 60)

for k in [5, 10, 20]:
    test_metrics = evaluate_bprmf(
        bprmf_model, test_df, train_valid_user_items,
        n_items, k=k, device=CONFIG['device']
    )
    
    print(f"\nTop-{k} 추천:")
    print(f"  Precision@{k}: {test_metrics[f'precision@{k}']:.4f}")
    print(f"  Recall@{k}:    {test_metrics[f'recall@{k}']:.4f}")
    print(f"  NDCG@{k}:      {test_metrics[f'ndcg@{k}']:.4f}")

print("=" * 60)

BPR-MF - Test 평가

Top-5 추천:
  Precision@5: 0.1210
  Recall@5:    0.0484
  NDCG@5:      0.1373

Top-10 추천:
  Precision@10: 0.1102
  Recall@10:    0.0800
  NDCG@10:      0.1389

Top-20 추천:
  Precision@20: 0.0926
  Recall@20:    0.1248
  NDCG@20:      0.1448


## Baseline 결과 요약

### Most Popular
- 가장 단순한 베이스라인
- 모든 사용자에게 동일한 인기 아이템 추천
- 개인화 없음

### BPR-MF
- Graph 없는 Matrix Factorization
- User/Item embedding만 학습
- LightGCN과의 차이: Graph propagation 없음

### 다음 단계
- V3/V4/V5와 비교하여 Graph의 효과 확인
- Most Popular보다 낮으면 문제 있음
- BPR-MF보다 LightGCN이 높으면 Graph 효과 입증