In [None]:
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

# 1) 모델 및 인코더 로드 (예시)
#    - 학습 시점에 저장했던 state_dict, item_encoder 로드
from your_project.training.trainer import TwoTowerModel
import joblib

# 모델 파라미터 로드
model = TwoTowerModel(num_users=..., num_items=..., embed_dim=32)
model.load_state_dict(torch.load("src/models/two_tower.pt"))
model.eval()

# item_encoder 로드 (LabelEncoder)
item_encoder = joblib.load("src/models/item_encoder.pkl")
num_items = len(item_encoder.classes_)

# 2) 전체 아이템 임베딩 추출
with torch.no_grad():
    item_indices = torch.arange(num_items)  # 0 ~ (num_items-1)
    item_emb = model.item_embedding(item_indices)  # shape: (num_items, embed_dim)
# 텐서 → 넘파이로 변환
item_emb_np = item_emb.cpu().numpy()

# 3) 차원 축소 (PCA 예시)
pca = PCA(n_components=2)
item_emb_2d = pca.fit_transform(item_emb_np)  # shape: (num_items, 2)

# 4) 클러스터링 (KMeans 예시)
kmeans = KMeans(n_clusters=5, random_state=42)
cluster_labels = kmeans.fit_predict(item_emb_2d)

# 5) 시각화
plt.figure(figsize=(8,6))
plt.scatter(item_emb_2d[:, 0], item_emb_2d[:, 1], c=cluster_labels, cmap="rainbow", alpha=0.7)
plt.colorbar()
plt.title("Item Embeddings Clustering (PCA + KMeans)")
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.show()


In [None]:
from sklearn.manifold import TSNE

tsne = TSNE(n_components=2, perplexity=30, random_state=42)
item_emb_2d = tsne.fit_transform(item_emb_np)
