In [7]:
import json
import numpy as np
from collections import defaultdict
from sklearn.preprocessing import normalize
from tqdm import tqdm

### JSONL 파일에서 리뷰 로드

In [8]:
file_path = "absa_mock_reviews_1000.json"
all_reviews = []

with open(file_path, "r", encoding="utf-8") as f:
    for line in f:
        obj = json.loads(line)
        obj["vector"] = np.array(obj["vector"])
        all_reviews.append(obj)

### 유저 / 식당 벡터 누적

In [9]:

user_vecs = defaultdict(list)
biz_vecs = defaultdict(list)

for r in all_reviews:
    uid = r["user_id"]
    bid = r["business_id"]
    vec = r["vector"]
    user_vecs[uid].append(vec)
    biz_vecs[bid].append(vec)

### 평균 벡터 계산

In [10]:
user_embed = {uid: np.mean(vecs, axis=0) for uid, vecs in user_vecs.items()}
biz_embed  = {bid: np.mean(vecs, axis=0) for bid, vecs in biz_vecs.items()}

### 벡터 정규화

In [11]:
user_matrix = normalize(np.stack(list(user_embed.values())))
biz_matrix  = normalize(np.stack(list(biz_embed.values())))

user_ids = list(user_embed.keys())
biz_ids  = list(biz_embed.keys())

### 코사인 유사도 계산 (dot product)

In [12]:
scores = np.dot(user_matrix, biz_matrix.T)

### 유저별 방문한 식당 기록

In [13]:
user2biz_seen = defaultdict(set)
for r in all_reviews:
    user2biz_seen[r["user_id"]].add(r["business_id"])

### 추천 생성 (미방문 식당 Top-N)

In [14]:
TOP_N = 10
recommendations = {}

for i, uid in enumerate(user_ids):
    user_score = scores[i]
    ranked_idx = np.argsort(user_score)[::-1]
    rec_biz = []
    for j in ranked_idx:
        bid = biz_ids[j]
        if bid not in user2biz_seen[uid]:
            rec_biz.append(bid)
        if len(rec_biz) == TOP_N:
            break
    recommendations[uid] = rec_biz

### 상위 5개 유저 추천 결과 출력

In [15]:
for uid in list(recommendations.keys())[:5]:
    print(f"{uid} → {recommendations[uid]}")

U0033 → ['B0005', 'B0028', 'B0002', 'B0033', 'B0031', 'B0035', 'B0042', 'B0034', 'B0020', 'B0045']
U0063 → ['B0042', 'B0005', 'B0047', 'B0044', 'B0016', 'B0009', 'B0034', 'B0020', 'B0018', 'B0015']
U0072 → ['B0049', 'B0002', 'B0005', 'B0003', 'B0042', 'B0039', 'B0032', 'B0044', 'B0015', 'B0034']
U0099 → ['B0050', 'B0039', 'B0015', 'B0046', 'B0003', 'B0029', 'B0043', 'B0026', 'B0035', 'B0007']
U0076 → ['B0032', 'B0041', 'B0047', 'B0042', 'B0044', 'B0016', 'B0023', 'B0039', 'B0029', 'B0009']
