# LightGCN V4 - 하이퍼파라미터 최적화

## 목표
- V3 구조 유지 (덧붙이기 금지)
- 하이퍼파라미터 그리드 서치로 최적 조합 탐색
- 실험 결과 분석 및 최종 모델 선정

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import defaultdict
from itertools import product
import warnings
warnings.filterwarnings('ignore')

import torch
import torch.nn as nn

SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)

if torch.backends.mps.is_available():
    device = torch.device('mps')
elif torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
print(f'Device: {device}')

Device: mps


## 1. 데이터 전처리 (V3와 동일)

In [2]:
df = pd.read_csv('data/train.csv')

# ID 매핑
user2idx = {u: i for i, u in enumerate(sorted(df['user'].unique()))}
item2idx = {it: i for i, it in enumerate(sorted(df['item'].unique()))}
n_users, n_items = len(user2idx), len(item2idx)

df['user_idx'] = df['user'].map(user2idx)
df['item_idx'] = df['item'].map(item2idx)
df['label'] = (df['rating'] >= 4.0).astype(int)

# Positive만 사용
positive_df = df[df['label'] == 1].copy()

# Train/Val Split
train_data, val_data = [], []
for user_idx in range(n_users):
    user_pos = positive_df[positive_df['user_idx'] == user_idx]
    if len(user_pos) >= 2:
        user_pos = user_pos.sample(frac=1, random_state=SEED).reset_index(drop=True)
        split_idx = max(1, int(0.8 * len(user_pos)))
        train_data.append(user_pos.iloc[:split_idx])
        val_data.append(user_pos.iloc[split_idx:])
    elif len(user_pos) == 1:
        train_data.append(user_pos)

train_df = pd.concat(train_data, ignore_index=True)
val_df = pd.concat(val_data, ignore_index=True)

print(f"Users: {n_users}, Items: {n_items}")
print(f"Train: {len(train_df):,}, Val: {len(val_df):,}")

Users: 668, Items: 10321
Train: 41,214, Val: 10,616


In [3]:
# User positive items
user_pos_items = defaultdict(set)
for _, row in train_df.iterrows():
    user_pos_items[int(row['user_idx'])].add(int(row['item_idx']))

def sample_negatives(user_idx, num_neg=4):
    pos = user_pos_items[user_idx]
    neg_cands = list(set(range(n_items)) - pos)
    if len(neg_cands) < num_neg:
        return neg_cands
    return np.random.choice(neg_cands, size=num_neg, replace=False).tolist()

## 2. 모델 & 학습 함수 (V3와 동일)

In [4]:
def build_graph(train_df, n_users, n_items):
    users = train_df['user_idx'].values
    items = train_df['item_idx'].values
    edge_u2i = np.array([users, items + n_users])
    edge_i2u = np.array([items + n_users, users])
    edge_index = torch.LongTensor(np.concatenate([edge_u2i, edge_i2u], axis=1))
    
    num_nodes = n_users + n_items
    deg = torch.zeros(num_nodes).scatter_add(0, edge_index[0], torch.ones(edge_index.shape[1]))
    deg_inv_sqrt = deg.pow(-0.5)
    deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
    edge_weight = deg_inv_sqrt[edge_index[0]] * deg_inv_sqrt[edge_index[1]]
    
    return edge_index.to(device), edge_weight.to(device)

edge_index, edge_weight = build_graph(train_df, n_users, n_items)

In [5]:
class LightGCN(nn.Module):
    def __init__(self, n_users, n_items, emb_dim=32, n_layers=2):
        super().__init__()
        self.n_users = n_users
        self.n_items = n_items
        self.emb_dim = emb_dim
        self.n_layers = n_layers
        
        self.user_emb = nn.Embedding(n_users, emb_dim)
        self.item_emb = nn.Embedding(n_items, emb_dim)
        nn.init.xavier_uniform_(self.user_emb.weight)
        nn.init.xavier_uniform_(self.item_emb.weight)
    
    def forward(self, edge_index, edge_weight):
        all_emb = torch.cat([self.user_emb.weight, self.item_emb.weight], dim=0)
        embs = [all_emb]
        
        for _ in range(self.n_layers):
            row, col = edge_index
            messages = all_emb[col] * edge_weight.unsqueeze(1)
            all_emb = torch.zeros_like(all_emb).scatter_add(0, row.unsqueeze(1).expand(-1, self.emb_dim), messages)
            embs.append(all_emb)
        
        final_emb = torch.mean(torch.stack(embs), dim=0)
        return final_emb[:self.n_users], final_emb[self.n_users:]
    
    def predict(self, u_idx, i_idx, u_emb, i_emb):
        return (u_emb[u_idx] * i_emb[i_idx]).sum(dim=1)

def bpr_loss(pos_scores, neg_scores):
    diff = pos_scores.unsqueeze(1) - neg_scores
    return -torch.log(torch.sigmoid(diff) + 1e-8).mean()

In [6]:
@torch.no_grad()
def evaluate(model, edge_index, edge_weight, val_df, k=10, n_neg=99):
    model.eval()
    u_emb, i_emb = model(edge_index, edge_weight)
    
    hits, ndcgs = [], []
    for user_idx in val_df['user_idx'].unique():
        for _, row in val_df[val_df['user_idx'] == user_idx].iterrows():
            pos_item = int(row['item_idx'])
            neg_items = sample_negatives(user_idx, n_neg)
            if len(neg_items) < n_neg:
                continue
            
            candidates = [pos_item] + neg_items
            u_t = torch.full((len(candidates),), user_idx, dtype=torch.long).to(device)
            i_t = torch.LongTensor(candidates).to(device)
            scores = model.predict(u_t, i_t, u_emb, i_emb).cpu().numpy()
            
            rank = (scores > scores[0]).sum() + 1
            hits.append(1.0 if rank <= k else 0.0)
            ndcgs.append(1.0 / np.log2(rank + 1) if rank <= k else 0.0)
    
    return np.mean(hits), np.mean(ndcgs)

In [7]:
def train_model(emb_dim, n_layers, lr, weight_decay, epochs=30, batch_size=512, num_neg=4, verbose=False):
    """하나의 하이퍼파라미터 조합으로 모델 학습"""
    model = LightGCN(n_users, n_items, emb_dim, n_layers).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    
    best_hit = 0
    patience_cnt = 0
    
    for epoch in range(epochs):
        model.train()
        train_shuffled = train_df.sample(frac=1, random_state=SEED+epoch).reset_index(drop=True)
        
        epoch_loss = 0
        n_batches = 0
        
        for i in range(0, len(train_shuffled), batch_size):
            batch = train_shuffled.iloc[i:i+batch_size]
            pos_u = torch.LongTensor(batch['user_idx'].values).to(device)
            pos_i = torch.LongTensor(batch['item_idx'].values).to(device)
            neg_items = [sample_negatives(int(u), num_neg) for u in batch['user_idx'].values]
            neg_i = torch.LongTensor(neg_items).to(device)
            
            u_emb, i_emb = model(edge_index, edge_weight)
            pos_scores = model.predict(pos_u, pos_i, u_emb, i_emb)
            neg_u = pos_u.unsqueeze(1).expand(-1, num_neg).reshape(-1)
            neg_scores = model.predict(neg_u, neg_i.reshape(-1), u_emb, i_emb).reshape(-1, num_neg)
            
            loss = bpr_loss(pos_scores, neg_scores)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            epoch_loss += loss.item()
            n_batches += 1
        
        # Evaluate every 10 epochs
        if (epoch + 1) % 10 == 0:
            hit, ndcg = evaluate(model, edge_index, edge_weight, val_df)
            if verbose:
                print(f"  Epoch {epoch+1}: Loss={epoch_loss/n_batches:.4f}, Hit@10={hit:.4f}")
            
            if hit > best_hit:
                best_hit = hit
                best_ndcg = ndcg
                patience_cnt = 0
            else:
                patience_cnt += 1
            
            if patience_cnt >= 3:
                break
    
    return best_hit, best_ndcg, model

## 3. 하이퍼파라미터 실험

In [8]:
# 실험할 하이퍼파라미터
param_grid = {
    'emb_dim': [16, 32, 64],
    'n_layers': [1, 2, 3],
    'lr': [1e-3, 5e-3, 1e-2],
    'weight_decay': [1e-6, 1e-5, 1e-4]
}

# 전체 조합 수
total_experiments = np.prod([len(v) for v in param_grid.values()])
print(f"Total experiments: {total_experiments}")
print("This would take too long, so we'll do a smarter search...")

Total experiments: 81
This would take too long, so we'll do a smarter search...


In [9]:
# 단계별 탐색 (효율적)
results = []

# Step 1: Embedding dim & Layers (lr=5e-3, wd=1e-5 고정)
print("=" * 50)
print("Step 1: Finding optimal emb_dim & n_layers")
print("=" * 50)

for emb_dim in [16, 32, 64]:
    for n_layers in [1, 2, 3]:
        print(f"Testing emb_dim={emb_dim}, n_layers={n_layers}...", end=" ")
        hit, ndcg, _ = train_model(emb_dim, n_layers, lr=5e-3, weight_decay=1e-5, epochs=30)
        results.append({
            'emb_dim': emb_dim, 'n_layers': n_layers, 
            'lr': 5e-3, 'weight_decay': 1e-5,
            'hit@10': hit, 'ndcg@10': ndcg
        })
        print(f"Hit@10={hit:.4f}, NDCG@10={ndcg:.4f}")

# 최고 조합 찾기
best_arch = max(results, key=lambda x: x['hit@10'])
print(f"\nBest architecture: emb_dim={best_arch['emb_dim']}, n_layers={best_arch['n_layers']}")

Step 1: Finding optimal emb_dim & n_layers
Testing emb_dim=16, n_layers=1... Hit@10=0.7861, NDCG@10=0.5282
Testing emb_dim=16, n_layers=2... Hit@10=0.7814, NDCG@10=0.5208
Testing emb_dim=16, n_layers=3... Hit@10=0.7689, NDCG@10=0.5024
Testing emb_dim=32, n_layers=1... Hit@10=0.7923, NDCG@10=0.5383
Testing emb_dim=32, n_layers=2... Hit@10=0.7865, NDCG@10=0.5284
Testing emb_dim=32, n_layers=3... 

KeyboardInterrupt: 

In [None]:
# Step 2: Learning rate 탐색
print("\n" + "=" * 50)
print("Step 2: Finding optimal learning rate")
print("=" * 50)

lr_results = []
for lr in [1e-3, 5e-3, 1e-2]:
    print(f"Testing lr={lr}...", end=" ")
    hit, ndcg, _ = train_model(
        best_arch['emb_dim'], best_arch['n_layers'], 
        lr=lr, weight_decay=1e-5, epochs=30
    )
    lr_results.append({'lr': lr, 'hit@10': hit, 'ndcg@10': ndcg})
    print(f"Hit@10={hit:.4f}")

best_lr = max(lr_results, key=lambda x: x['hit@10'])['lr']
print(f"\nBest LR: {best_lr}")

In [None]:
# Step 3: Weight decay 탐색
print("\n" + "=" * 50)
print("Step 3: Finding optimal weight decay")
print("=" * 50)

wd_results = []
for wd in [1e-6, 1e-5, 1e-4]:
    print(f"Testing weight_decay={wd}...", end=" ")
    hit, ndcg, _ = train_model(
        best_arch['emb_dim'], best_arch['n_layers'], 
        lr=best_lr, weight_decay=wd, epochs=30
    )
    wd_results.append({'weight_decay': wd, 'hit@10': hit, 'ndcg@10': ndcg})
    print(f"Hit@10={hit:.4f}")

best_wd = max(wd_results, key=lambda x: x['hit@10'])['weight_decay']
print(f"\nBest Weight Decay: {best_wd}")

In [None]:
# 모든 결과 정리
print("\n" + "=" * 50)
print("Experiment Summary")
print("=" * 50)

results_df = pd.DataFrame(results)
print("\nArchitecture Search Results:")
print(results_df.sort_values('hit@10', ascending=False).to_string(index=False))

print(f"\n\nFinal Best Configuration:")
print(f"  emb_dim: {best_arch['emb_dim']}")
print(f"  n_layers: {best_arch['n_layers']}")
print(f"  lr: {best_lr}")
print(f"  weight_decay: {best_wd}")

## 4. 최종 모델 학습

In [None]:
# 최적 하이퍼파라미터로 더 오래 학습
print("=" * 50)
print("Training Final Model with Best Hyperparameters")
print("=" * 50)

FINAL_EMB_DIM = best_arch['emb_dim']
FINAL_N_LAYERS = best_arch['n_layers']
FINAL_LR = best_lr
FINAL_WD = best_wd
FINAL_EPOCHS = 50

print(f"Config: emb={FINAL_EMB_DIM}, layers={FINAL_N_LAYERS}, lr={FINAL_LR}, wd={FINAL_WD}")
print("")

final_model = LightGCN(n_users, n_items, FINAL_EMB_DIM, FINAL_N_LAYERS).to(device)
optimizer = torch.optim.Adam(final_model.parameters(), lr=FINAL_LR, weight_decay=FINAL_WD)

history = {'loss': [], 'hit@10': [], 'ndcg@10': []}
best_hit = 0

for epoch in range(FINAL_EPOCHS):
    final_model.train()
    train_shuffled = train_df.sample(frac=1, random_state=SEED+epoch).reset_index(drop=True)
    
    epoch_loss = 0
    n_batches = 0
    
    for i in range(0, len(train_shuffled), 512):
        batch = train_shuffled.iloc[i:i+512]
        pos_u = torch.LongTensor(batch['user_idx'].values).to(device)
        pos_i = torch.LongTensor(batch['item_idx'].values).to(device)
        neg_items = [sample_negatives(int(u), 4) for u in batch['user_idx'].values]
        neg_i = torch.LongTensor(neg_items).to(device)
        
        u_emb, i_emb = final_model(edge_index, edge_weight)
        pos_scores = final_model.predict(pos_u, pos_i, u_emb, i_emb)
        neg_u = pos_u.unsqueeze(1).expand(-1, 4).reshape(-1)
        neg_scores = final_model.predict(neg_u, neg_i.reshape(-1), u_emb, i_emb).reshape(-1, 4)
        
        loss = bpr_loss(pos_scores, neg_scores)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        n_batches += 1
    
    history['loss'].append(epoch_loss / n_batches)
    
    if (epoch + 1) % 5 == 0:
        hit, ndcg = evaluate(final_model, edge_index, edge_weight, val_df)
        history['hit@10'].append(hit)
        history['ndcg@10'].append(ndcg)
        print(f"Epoch {epoch+1:3d} | Loss: {epoch_loss/n_batches:.4f} | Hit@10: {hit:.4f} | NDCG@10: {ndcg:.4f}")
        
        if hit > best_hit:
            best_hit = hit
            torch.save(final_model.state_dict(), 'best_lightgcn_v4.pt')

print(f"\nFinal Best Hit@10: {best_hit:.4f}")

In [None]:
# 결과 시각화
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

axes[0].plot(history['loss'], 'b-', linewidth=2)
axes[0].set_title('Training Loss')
axes[0].set_xlabel('Epoch')
axes[0].grid(alpha=0.3)

epochs_val = np.arange(5, len(history['loss'])+1, 5)[:len(history['hit@10'])]
axes[1].plot(epochs_val, history['hit@10'], 'g-o', linewidth=2)
axes[1].set_title('Hit@10')
axes[1].set_xlabel('Epoch')
axes[1].grid(alpha=0.3)

axes[2].plot(epochs_val, history['ndcg@10'], 'r-o', linewidth=2)
axes[2].set_title('NDCG@10')
axes[2].set_xlabel('Epoch')
axes[2].grid(alpha=0.3)

plt.tight_layout()
plt.show()

## 5. Threshold 튜닝 & O/X 추론

In [None]:
# Load best model
final_model.load_state_dict(torch.load('best_lightgcn_v4.pt'))
final_model.eval()

with torch.no_grad():
    u_emb, i_emb = final_model(edge_index, edge_weight)

# Positive scores
val_scores, val_labels = [], []
with torch.no_grad():
    for _, row in val_df.iterrows():
        u_idx = torch.LongTensor([int(row['user_idx'])]).to(device)
        i_idx = torch.LongTensor([int(row['item_idx'])]).to(device)
        score = final_model.predict(u_idx, i_idx, u_emb, i_emb).item()
        val_scores.append(score)
        val_labels.append(1)

# Negative scores
with torch.no_grad():
    for user_idx in val_df['user_idx'].unique():
        n_pos = (val_df['user_idx'] == user_idx).sum()
        neg_items = sample_negatives(int(user_idx), n_pos)
        for neg_item in neg_items:
            u_idx = torch.LongTensor([int(user_idx)]).to(device)
            i_idx = torch.LongTensor([neg_item]).to(device)
            score = final_model.predict(u_idx, i_idx, u_emb, i_emb).item()
            val_scores.append(score)
            val_labels.append(0)

val_scores = np.array(val_scores)
val_labels = np.array(val_labels)

print(f"Validation: {len(val_scores)} samples (Pos: {val_labels.sum()}, Neg: {len(val_labels)-val_labels.sum()})")

In [None]:
# Threshold search
thresholds = np.percentile(val_scores, [30, 40, 50, 60, 70, 80, 90])

print("Threshold Tuning:")
print(f"{'Threshold':<12} {'Precision':<12} {'Recall':<12} {'F1':<12}")
print("-" * 50)

best_prec, best_th, best_f1 = 0, 0, 0
for th in thresholds:
    preds = (val_scores >= th).astype(int)
    tp = ((preds == 1) & (val_labels == 1)).sum()
    fp = ((preds == 1) & (val_labels == 0)).sum()
    fn = ((preds == 0) & (val_labels == 1)).sum()
    
    prec = tp / (tp + fp) if (tp + fp) > 0 else 0
    rec = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1 = 2 * prec * rec / (prec + rec) if (prec + rec) > 0 else 0
    
    print(f"{th:<12.4f} {prec:<12.4f} {rec:<12.4f} {f1:<12.4f}")
    
    if prec >= 0.6 and prec > best_prec:
        best_prec, best_th, best_f1 = prec, th, f1
    elif best_prec < 0.6 and f1 > best_f1:
        best_f1, best_th, best_prec = f1, th, prec

print("-" * 50)
print(f"Selected: {best_th:.4f} (Precision: {best_prec:.4f})")

In [None]:
def predict_ox(test_df):
    results = []
    with torch.no_grad():
        for _, row in test_df.iterrows():
            user, item = row['user'], row['item']
            if user not in user2idx or item not in item2idx:
                recommend = 'X'
            else:
                u_idx = torch.LongTensor([user2idx[user]]).to(device)
                i_idx = torch.LongTensor([item2idx[item]]).to(device)
                score = final_model.predict(u_idx, i_idx, u_emb, i_emb).item()
                recommend = 'O' if score >= best_th else 'X'
            results.append({'user': user, 'item': item, 'recommend': recommend})
    return pd.DataFrame(results)

# Test on validation
preds = predict_ox(val_df[['user', 'item']])
o_ratio = (preds['recommend'] == 'O').mean()
print(f"\nO ratio: {100*o_ratio:.1f}%")
print(preds.head(10).to_string(index=False))

## 6. 최종 결과 요약

In [None]:
print("=" * 50)
print("LightGCN V4 Final Summary")
print("=" * 50)
print(f"\nOptimal Hyperparameters:")
print(f"  Embedding dim: {FINAL_EMB_DIM}")
print(f"  GCN layers: {FINAL_N_LAYERS}")
print(f"  Learning rate: {FINAL_LR}")
print(f"  Weight decay: {FINAL_WD}")
print(f"  Parameters: {sum(p.numel() for p in final_model.parameters()):,}")

print(f"\nPerformance:")
print(f"  Best Hit@10: {best_hit:.4f}")
print(f"  Best NDCG@10: {max(history['ndcg@10']):.4f}")
print(f"  Precision: {best_prec:.4f}")
print(f"  O ratio: {100*o_ratio:.1f}%")

print(f"\nV3 vs V4 Comparison:")
print(f"  V3 Hit@10: 0.7894")
print(f"  V4 Hit@10: {best_hit:.4f}")
print(f"  Improvement: {100*(best_hit - 0.7894)/0.7894:.2f}%")

In [None]:
# Test 파일 추론
# test_df = pd.read_csv('data/test.csv')
# final_preds = predict_ox(test_df)
# final_preds.to_csv('predictions_gnn_v4.csv', index=False)

print("Test inference code ready.")