In [1]:
from pymongo import MongoClient
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pandas as pd

# 1. MongoDB에서 데이터 불러오기
def load_user_data(uri, db_name, user_collection, song_collection):
    client = MongoClient(uri)
    db = client[db_name]
    user_col = db[user_collection]
    song_col = db[song_collection]
    
    users = list(user_col.find({}, {"_id": 0}))
    songs = list(song_col.find({}, {"_id": 0}))
    return users, songs

# 2. 장르 기반 벡터 생성 함수
def build_song_genre_vectors(songs):
    all_genres = sorted({genre.strip().lower() for song in songs for genre in song.get("genres", [])})
    genre_index = {genre: idx for idx, genre in enumerate(all_genres)}
    
    song_vectors = {}
    for song in songs:
        vec = np.zeros(len(all_genres))
        for genre in song.get("genres", []):
            genre = genre.strip().lower()
            if genre in genre_index:
                vec[genre_index[genre]] = 1
        song_vectors[song["title"]] = vec
    return song_vectors, all_genres

# 3. 사용자별 평균 벡터 생성
def build_user_vectors(users, song_vectors):
    user_vectors = {}
    for user in users:
        vectors = [song_vectors[title] for title in user["liked_songs"] if title in song_vectors]
        if vectors:
            user_vectors[user["user_id"]] = np.mean(vectors, axis=0)
    return user_vectors

# 4. 유저-유저 유사도 기반 추천
def recommend_by_user_similarity(target_user_id, users, user_vectors, top_k=2, top_n=5):
    if target_user_id not in user_vectors:
        raise ValueError(f"{target_user_id}에 대한 벡터가 없습니다.")
    
    target_vec = user_vectors[target_user_id].reshape(1, -1)
    similarities = []
    
    for user_id, vec in user_vectors.items():
        if user_id == target_user_id:
            continue
        sim = cosine_similarity(target_vec, vec.reshape(1, -1))[0][0]
        similarities.append((user_id, sim))
    
    similarities.sort(key=lambda x: x[1], reverse=True)
    top_users = [u for u, _ in similarities[:top_k]]

    target_likes = set(next(user["liked_songs"] for user in users if user["user_id"] == target_user_id))
    candidates = set()
    
    for user in users:
        if user["user_id"] in top_users:
            candidates.update(set(user["liked_songs"]))
    
    recommendations = list(candidates - target_likes)
    return recommendations[:top_n]

# ------------------ 실행 ------------------
if __name__ == "__main__":
    uri = "mongodb://localhost:27017/"
    db_name = "music"
    user_collection = "user_likes"
    song_collection = "music"
    
    users, songs = load_user_data(uri, db_name, user_collection, song_collection)
    song_vectors, genre_list = build_song_genre_vectors(songs)
    user_vectors = build_user_vectors(users, song_vectors)

    target_user = "user1"
    recs = recommend_by_user_similarity(target_user, users, user_vectors, top_k=2, top_n=5)

    print(f"\n 사용자 '{target_user}'에게 추천하는 곡:")
    for r in recs:
        print(f" - {r}")


KeyboardInterrupt: 

일단 가사는 제외하고 장르만 계산하는 거라서 word2vec을 제외한거야

In [None]:
from pymongo import MongoClient
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# 1. MongoDB 데이터 불러오기
def load_data(uri, db_name, user_collection, song_collection):
    client = MongoClient(uri)
    db = client[db_name]
    users = list(db[user_collection].find({}, {"_id": 0}))
    songs = list(db[song_collection].find({}, {"_id": 0}))
    return users, songs

# 2. 장르 리스트 정리
def make_genres_list(genres):
    if isinstance(genres, list):
        return [g.strip().lower() for g in genres]
    elif isinstance(genres, str):
        return [genres.strip().lower()]
    else:
        return ["unknown"]

# 3. 장르 기반 벡터 생성 (Soft One-Hot)
def build_song_genre_vectors(songs):
    all_genres = sorted({g for song in songs for g in make_genres_list(song.get("genres", []))})
    genre_idx = {genre: idx for idx, genre in enumerate(all_genres)}

    similar_genres = {
        "k-pop": ["k-ballad", "soundtrack", "pop"],
        "k-ballad": ["k-pop", "soundtrack"],
        "k-hiphop": ["k-r&b", "hip-hop"],
        "k-r&b": ["k-hiphop", "k-pop"],
        "rock": ["britpop", "pop"],
        "pop": ["k-pop", "k-ballad"],
        "soundtrack": ["k-pop", "k-ballad"],
        "hip-hop": ["lo-fi", "k-hiphop"],
        "lo-fi": ["hip-hop"],
        "indie, modern rock": ["rock"],
        "britpop": ["rock"]
    }

    song_vectors = {}
    for song in songs:
        title = song.get("title", "")
        if not title:
            continue

        vec = np.zeros(len(all_genres))
        genres = make_genres_list(song.get("genres", []))
        for g in genres:
            if g in genre_idx:
                vec[genre_idx[g]] = 0.4
            if g in similar_genres:
                for sim_g in similar_genres[g]:
                    if sim_g in genre_idx:
                        vec[genre_idx[sim_g]] = max(vec[genre_idx[sim_g]], 0.2)
        
        song_vectors[title] = vec

    return song_vectors, all_genres

# 4. 사용자 벡터 만들기 (좋아요 곡 평균)
def build_user_vectors(users, song_vectors):
    user_vectors = {}
    for user in users:
        liked = user.get("liked_songs", [])
        vectors = [song_vectors[title] for title in liked if title in song_vectors]
        if vectors:
            user_vectors[user["user_id"]] = np.mean(vectors, axis=0)
    return user_vectors

# 5. 유저-유저 기반 추천
def recommend_user_based(target_user_id, users, user_vectors, top_k=2, top_n=5):
    if target_user_id not in user_vectors:
        raise ValueError(f"{target_user_id}에 대한 벡터가 없습니다.")

    target_vec = user_vectors[target_user_id].reshape(1, -1)
    similarities = []

    for user_id, vec in user_vectors.items():
        if user_id == target_user_id:
            continue
        sim = cosine_similarity(target_vec, vec.reshape(1, -1))[0][0]
        similarities.append((user_id, sim))

    similarities.sort(key=lambda x: x[1], reverse=True)
    top_users = [u for u, _ in similarities[:top_k]]

    target_likes = set(next(user["liked_songs"] for user in users if user["user_id"] == target_user_id))
    candidates = set()
    for user in users:
        if user["user_id"] in top_users:
            candidates.update(set(user["liked_songs"]))

    recommendations = list(candidates - target_likes)
    return recommendations[:top_n]

# 6. 실행
if __name__ == "__main__":
    uri = "mongodb://localhost:27017/"
    db_name = "music"
    user_collection = "user_likes"
    song_collection = "music"

    users, songs = load_data(uri, db_name, user_collection, song_collection)
    print(f"불러온 사용자 수: {len(users)}")
    print(f"불러온 곡 수: {len(songs)}")

    song_vectors, all_genres = build_song_genre_vectors(songs)
    print(f"장르 종류 수: {len(all_genres)}")

    user_vectors = build_user_vectors(users, song_vectors)
    print(f"사용자 벡터 수: {len(user_vectors)}")

    target_user = "user1"
    recommendations = recommend_user_based(target_user, users, user_vectors, top_k=2, top_n=5)

    print(f"\n'{target_user}'에게 추천하는 곡:")
    for title in recommendations:
        print(f" - {title}")


In [None]:
from pymongo import MongoClient
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# 1. 사용자별 좋아요 곡 리스트 불러오기
def load_all_user_likes(uri):
    client = MongoClient(uri)
    db = client["user"]
    user_collections = db.list_collection_names()

    users = []
    for name in user_collections:
        liked_docs = list(db[name].find({}, {"_id": 0}))
        liked_titles = [doc["title"] for doc in liked_docs if "title" in doc]
        users.append({"user_id": name, "liked_songs": liked_titles})
    return users

# 2. 전체 곡 정보 불러오기
def load_all_songs(uri, db_name, collection_name):
    client = MongoClient(uri)
    db = client[db_name]
    col = db[collection_name]
    return list(col.find({}, {"_id": 0}))

# 3. 곡별 장르 벡터 생성
def build_song_genre_vectors(songs):
    all_genres = sorted({g.strip().lower() for s in songs for g in s.get("genres", [])})
    genre_index = {g: i for i, g in enumerate(all_genres)}

    song_vectors = {}
    for s in songs:
        vec = np.zeros(len(all_genres))
        for g in s.get("genres", []):
            g = g.strip().lower()
            if g in genre_index:
                vec[genre_index[g]] = 1
        song_vectors[s["title"]] = vec
    return song_vectors, all_genres

# 4. 사용자 평균 장르 벡터 생성
def build_user_vectors(users, song_vectors):
    user_vectors = {}
    for u in users:
        vectors = [song_vectors[t] for t in u["liked_songs"] if t in song_vectors]
        if vectors:
            avg_vec = np.mean(vectors, axis=0)
            print(f"\n[유저 '{u['user_id']}' 평균 벡터]: {avg_vec}")
            user_vectors[u["user_id"]] = avg_vec
    return user_vectors

# 5. 협업 필터링 기반 추천
def recommend_user_based(target_user_id, users, user_vectors, top_k=2, top_n=5):
    if target_user_id not in user_vectors:
        raise ValueError(f"{target_user_id} 벡터 없음")

    target_vec = user_vectors[target_user_id].reshape(1, -1)
    print(f"\n[기준 유저 '{target_user_id}' 벡터] (길이: {target_vec.shape[1]})")
    print(target_vec)

    sims = []

    for uid, vec in user_vectors.items():
        if uid == target_user_id:
            continue
        sim = cosine_similarity(target_vec, vec.reshape(1, -1))[0][0]
        sims.append((uid, sim))
        print(f"\n - 유저 '{uid}' 와의 유사도: {sim:.4f}")

    sims.sort(key=lambda x: x[1], reverse=True)
    top_users = [u for u, _ in sims[:top_k]]

    print(f"\n[상위 {top_k} 유사 사용자]: {top_users}")

    target_likes = set(next(u["liked_songs"] for u in users if u["user_id"] == target_user_id))
    print(f"\n[사용자 '{target_user_id}'가 이미 좋아한 곡들]: {target_likes}")

    candidate_songs = set()

    for u in users:
        if u["user_id"] in top_users:
            print(f"\n- 유사 유저 '{u['user_id']}'의 좋아요 곡들: {u['liked_songs']}")
            candidate_songs.update(set(u["liked_songs"]))

    recommendations = list(candidate_songs - target_likes)
    print(f"\n[최종 추천 후보 (좋아요 곡 제외)]: {recommendations}")

    return recommendations[:top_n]


In [None]:
uri = "mongodb://localhost:27017/"
users = load_all_user_likes(uri)
songs = load_all_songs(uri, db_name="user", collection_name="김태현")

song_vectors, genre_list = build_song_genre_vectors(songs)
user_vectors = build_user_vectors(users, song_vectors)

target_user = "최민호"
recommendations = recommend_user_based(target_user, users, user_vectors, top_k=2, top_n=5)

print(f"\n 사용자 '{target_user}'에게 추천:")
for title in recommendations:
    print(f" - {title}")


In [None]:
from pymongo import MongoClient
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# 1. 사용자별 좋아요 곡 리스트 불러오기
def load_all_user_likes(uri):
    client = MongoClient(uri)
    db = client["user"]
    user_collections = db.list_collection_names()

    users = []
    for name in user_collections:
        liked_docs = list(db[name].find({}, {"_id": 0}))
        liked_titles = [doc["title"] for doc in liked_docs if "title" in doc]
        users.append({"user_id": name, "liked_songs": liked_titles})
    return users

# 2. 전체 곡 정보 불러오기
def load_all_songs(uri, db_name, collection_name):
    client = MongoClient(uri)
    db = client[db_name]
    col = db[collection_name]
    return list(col.find({}, {"_id": 0}))

# 3. 곡별 장르 벡터 생성
def build_song_genre_vectors(songs):
    all_genres = sorted({g.strip().lower() for s in songs for g in s.get("genres", [])})
    genre_index = {g: i for i, g in enumerate(all_genres)}

    song_vectors = {}
    for s in songs:
        vec = np.zeros(len(all_genres))
        for g in s.get("genres", []):
            g = g.strip().lower()
            if g in genre_index:
                vec[genre_index[g]] = 1
        song_vectors[s["title"]] = vec
    return song_vectors, all_genres

# 4. 사용자 평균 장르 벡터 생성
def build_user_vectors(users, song_vectors):
    user_vectors = {}
    for u in users:
        song_vecs = [song_vectors[t] for t in u["liked_songs"] if t in song_vectors]
        if song_vecs:
            user_vectors[u["user_id"]] = np.mean(song_vecs, axis=0)
    return user_vectors

# 5. 협업 필터링 기반 추천
def recommend_user_based(target_user_id, users, user_vectors, top_k=2, top_n=5):
    if target_user_id not in user_vectors:
        raise ValueError(f"{target_user_id} 벡터 없음")

    target_vec = user_vectors[target_user_id].reshape(1, -1)
    sims = []

    for uid, vec in user_vectors.items():
        if uid == target_user_id:
            continue
        sim = cosine_similarity(target_vec, vec.reshape(1, -1))[0][0]
        print(f" - 유저 '{uid}' 와의 유사도: {sim:.4f}")
        sims.append((uid, sim))

    sims.sort(key=lambda x: x[1], reverse=True)
    top_users = [u for u, _ in sims[:top_k]]
    print(f"\n[상위 {top_k} 유사 사용자]: {top_users}")

    target_likes = set(next(u["liked_songs"] for u in users if u["user_id"] == target_user_id))
    print(f"\n[사용자 '{target_user_id}'가 이미 좋아한 곡들]: {target_likes}")

    candidate_songs = set()
    for u in users:
        if u["user_id"] in top_users:
            print(f"\n- 유사 유저 '{u['user_id']}'의 좋아요 곡들: {u['liked_songs']}")
            candidate_songs.update(set(u["liked_songs"]))

    final_candidates = list(candidate_songs - target_likes)
    print(f"\n[최종 추천 후보 (좋아요 곡 제외)]: {final_candidates}")

    return final_candidates[:top_n]

# 6. 유저별 TOP3 장르 출력
def print_user_top_genres(user_vectors, genre_list):
    for user_id, vec in user_vectors.items():
        print(f"\n[유저 '{user_id}' 평균 장르 벡터]:")
        print(f" - 벡터: {vec}")
        print(f" - 장르 순서: {genre_list}")

        top_indices = vec.argsort()[::-1][:3]
        top_genres = [(genre_list[i], vec[i]) for i in top_indices if vec[i] > 0]

        print(" - 선호 장르 Top 3:")
        for rank, (genre, value) in enumerate(top_genres, start=1):
            print(f"   {rank}. {genre} (비율: {value:.4f})")

# ------------------ 실행 ------------------
if __name__ == "__main__":
    uri = "mongodb://localhost:27017/"
    users = load_all_user_likes(uri)
    songs = load_all_songs(uri, db_name="user", collection_name="김태현")

    song_vectors, genre_list = build_song_genre_vectors(songs)
    user_vectors = build_user_vectors(users, song_vectors)

    print_user_top_genres(user_vectors, genre_list)  # 추가된 분석 출력

    target_user = "최민호"
    recommendations = recommend_user_based(target_user, users, user_vectors, top_k=2, top_n=5)

    print(f"\n 사용자 '{target_user}'에게 추천:")
    for title in recommendations:
        print(f" - {title}")


In [None]:
from pymongo import MongoClient
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# 1. 모든 유저 데이터 불러오기
def load_users(uri):
    client = MongoClient(uri)
    db = client["user"]
    users = []
    for collection_name in db.list_collection_names():
        collection = db[collection_name]
        liked_songs = [doc["title"] for doc in collection.find({}, {"_id": 0}) if "title" in doc]
        users.append({"user_id": collection_name, "liked_songs": liked_songs})
    return users

# 2. 모든 곡 데이터 불러오기
def load_songs(uri, db_name, collection_name):
    client = MongoClient(uri)
    db = client[db_name]
    return list(db[collection_name].find({}, {"_id": 0}))

# 3. 곡별 장르 벡터 만들기
def make_song_vectors(songs):
    genres = sorted({genre.strip().lower() for song in songs for genre in song.get("genres", [])})
    genre_idx = {genre: idx for idx, genre in enumerate(genres)}
    song_vectors = {}
    for song in songs:
        vec = np.zeros(len(genres))
        for genre in song.get("genres", []):
            genre = genre.strip().lower()
            if genre in genre_idx:
                vec[genre_idx[genre]] = 1
        song_vectors[song["title"]] = vec
    return song_vectors, genres

# 4. 유저별 평균 장르 벡터 만들기
def make_user_vectors(users, song_vectors):
    user_vectors = {}
    for user in users:
        genre_sum = np.zeros(len(next(iter(song_vectors.values()))))
        count = 0
        for title in user["liked_songs"]:
            if title in song_vectors:
                genre_sum += song_vectors[title]
                count += 1
        if count > 0:
            user_vectors[user["user_id"]] = genre_sum / count
    return user_vectors

# 5. 유저별 장르 TOP3 출력
def print_user_top_genres(user_vectors, genres):
    for user_id, vec in user_vectors.items():
        print(f"\n[유저: {user_id}]")
        top_idx = vec.argsort()[::-1][:3]
        for i, idx in enumerate(top_idx, start=1):
            print(f"{i}위: {genres[idx]} (점수: {vec[idx]:.4f})")

# 6. 유사한 유저 기반 추천
def recommend_songs(target_user_id, users, user_vectors, top_k=2, top_n=5):
    if target_user_id not in user_vectors:
        raise ValueError(f"{target_user_id} 벡터가 없습니다.")

    target_vec = user_vectors[target_user_id].reshape(1, -1)

    similarities = []
    for user_id, vec in user_vectors.items():
        if user_id == target_user_id:
            continue
        sim = cosine_similarity(target_vec, vec.reshape(1, -1))[0][0]
        similarities.append((user_id, sim))
        print(f"- '{user_id}'와 유사도: {sim:.4f}")

    similarities.sort(key=lambda x: x[1], reverse=True)
    top_users = [uid for uid, _ in similarities[:top_k]]
    print(f"\n[상위 {top_k}명]: {top_users}")

    target_likes = set(next(u["liked_songs"] for u in users if u["user_id"] == target_user_id))
    candidates = set()
    for user in users:
        if user["user_id"] in top_users:
            candidates.update(user["liked_songs"])

    recommendations = list(candidates - target_likes)
    print(f"\n[추천 후보 (기존 좋아요 제외)]: {recommendations}")

    return recommendations[:top_n]

# 7. 메인 실행부
if __name__ == "__main__":
    uri = "mongodb://localhost:27017/"

    # 데이터 불러오기
    users = load_users(uri)
    songs = load_songs(uri, db_name="user", collection_name="김태현")

    # 벡터 생성
    song_vectors, genres = make_song_vectors(songs)
    user_vectors = make_user_vectors(users, song_vectors)

    # 장르 TOP3 출력
    print_user_top_genres(user_vectors, genres)

    # 추천
    target_user = "최민호"
    recommendations = recommend_songs(target_user, users, user_vectors, top_k=2, top_n=5)

    # 결과 출력
    print(f"\n'{target_user}'님께 추천하는 곡:")
    for title in recommendations:
        print(f"- {title}")


In [5]:
from pymongo import MongoClient
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# 1. 모든 유저 데이터 불러오기
def load_users(uri):
    client = MongoClient(uri)
    db = client["user"]
    users = []
    
    for collection_name in db.list_collection_names():
        collection = db[collection_name]
        liked_songs = [doc["title"] for doc in collection.find({}, {"_id": 0}) if "title" in doc]
        users.append({"user_id": collection_name, "liked_songs": liked_songs})
    
    return users

# 2. 모든 곡 데이터 불러오기 (한 명 기준: 김태현 컬렉션)
def load_songs(uri, db_name, collection_name):
    client = MongoClient(uri)
    db = client[db_name]
    return list(db[collection_name].find({}, {"_id": 0}))

# 3. 곡별 장르 벡터 만들기
def make_song_vectors(songs):
    genres = sorted({genre.strip().lower() for song in songs for genre in song.get("genres", [])})
    genre_idx = {genre: idx for idx, genre in enumerate(genres)}
    
    song_vectors = {}
    for song in songs:
        vec = np.zeros(len(genres))
        for genre in song.get("genres", []):
            genre = genre.strip().lower()
            if genre in genre_idx:
                vec[genre_idx[genre]] = 1
        song_vectors[song["title"]] = vec
    
    return song_vectors, genres

# 4. 유저별 평균 장르 벡터 만들기
def make_user_vectors(users, song_vectors):
    user_vectors = {}
    for user in users:
        genre_sum = np.zeros(len(next(iter(song_vectors.values()))))
        count = 0
        for title in user["liked_songs"]:
            if title in song_vectors:
                genre_sum += song_vectors[title]
                count += 1
        if count > 0:
            user_vectors[user["user_id"]] = genre_sum / count
    return user_vectors

# 5. 유저별 장르 TOP3 출력
def print_user_top_genres(user_vectors, genres):
    for user_id, vec in user_vectors.items():
        print(f"\n[유저: {user_id}]")
        top_idx = vec.argsort()[::-1][:3]
        for i, idx in enumerate(top_idx, start=1):
            print(f"{i}위: {genres[idx]} (점수: {vec[idx]:.4f})")

# 6. 유사한 유저 기반 추천
def recommend_songs(target_user_id, users, user_vectors, top_k=2, top_n=5):
    if target_user_id not in user_vectors:
        raise ValueError(f"{target_user_id} 벡터가 없습니다.")
    
    target_vec = user_vectors[target_user_id].reshape(1, -1)
    similarities = []

    for user_id, vec in user_vectors.items():
        if user_id == target_user_id:
            continue
        sim = cosine_similarity(target_vec, vec.reshape(1, -1))[0][0]
        similarities.append((user_id, sim))
        print(f"- '{user_id}'와 유사도: {sim:.4f}")

    similarities.sort(key=lambda x: x[1], reverse=True)
    top_users = [uid for uid, _ in similarities[:top_k]]
    print(f"\n[상위 {top_k}명]: {top_users}")

    target_likes = set(next(u["liked_songs"] for u in users if u["user_id"] == target_user_id))
    candidates = set()
    for user in users:
        if user["user_id"] in top_users:
            candidates.update(user["liked_songs"])

    recommendations = list(candidates - target_likes)
    print(f"\n[추천 후보 (기존 좋아요 제외)]: {recommendations}")

    return recommendations[:top_n]

def load_all_songs(uri, db_name):
    client = MongoClient(uri)
    db = client[db_name]
    all_songs = []
    
    for collection_name in db.list_collection_names():
        collection = db[collection_name]
        songs = list(collection.find({}, {"_id": 0}))
        all_songs.extend(songs)
    
    return all_songs

# (선택) 두 사람의 유사도만 직접 비교하는 함수
def compare_users(user_vectors, user1, user2):
    vec1 = user_vectors[user1].reshape(1, -1)
    vec2 = user_vectors[user2].reshape(1, -1)
    sim = cosine_similarity(vec1, vec2)[0][0]
    print(f"'{user1}'와 '{user2}'의 유사도: {sim:.4f}")


if __name__ == "__main__":
    uri = "mongodb://localhost:27017/"
    
    
    users = load_users(uri)
    songs = load_all_songs(uri, db_name="user")
    
   
    song_vectors, genres = make_song_vectors(songs)
    user_vectors = make_user_vectors(users, song_vectors)

    
    print_user_top_genres(user_vectors, genres)

    
    target_user = "최민호"
    recommendations = recommend_songs(target_user, users, user_vectors, top_k=2, top_n=5)

    
    print(f"\n'{target_user}'님께 추천하는 곡:")
    for title in recommendations:
        print(f"- {title}")



[유저: 최민호]
1위: k-ballad (점수: 0.6000)
2위: k-pop (점수: 0.5333)
3위: k-indie (점수: 0.1333)

[유저: 윤승서]
1위: k-pop (점수: 0.5667)
2위: pop (점수: 0.3333)
3위: k-indie (점수: 0.2667)

[유저: 류희철]
1위: k-ballad (점수: 0.5000)
2위: k-pop (점수: 0.3000)
3위: k-hiphop (점수: 0.2000)

[유저: 서동진]
1위: alternative rock (점수: 0.4000)
2위: rock (점수: 0.3000)
3위: synth-pop (점수: 0.1000)

[유저: 백나리]
1위: k-ballad (점수: 0.6000)
2위: alternative rock (점수: 0.2000)
3위: k-indie (점수: 0.2000)

[유저: 규진]
1위: k-ballad (점수: 0.3000)
2위: k-pop (점수: 0.3000)
3위: r&b/soul (점수: 0.2000)

[유저: 김태호]
1위: k-pop (점수: 0.6250)
2위: k-hip-hop (점수: 0.2500)
3위: k-rock (점수: 0.2500)

[유저: 김태현]
1위: k-hiphop (점수: 0.6000)
2위: k-pop (점수: 0.2333)
3위: k-ballad (점수: 0.1000)

[유저: 이승은]
1위: k-ballad (점수: 0.5000)
2위: k-pop (점수: 0.2000)
3위: ost (점수: 0.2000)

[유저: 천지성]
1위: k-indie (점수: 0.6000)
2위: k-pop (점수: 0.5000)
3위: k-ballad (점수: 0.2000)

[유저: 김규표]
1위: k-hip-hop (점수: 0.2000)
2위: k-pop (점수: 0.2000)
3위: k-rock (점수: 0.2000)
- '윤승서'와 유사도: 0.5860
- '류희철'와 유사도: 0.8871
- '서동진'와 유

In [6]:
from pymongo import MongoClient
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# 1. MongoDB에서 모든 유저와 곡 불러오기
def load_user_item_matrix(uri):
    client = MongoClient(uri)
    db = client["user"]
    user_ids = db.list_collection_names()
    
    # 모든 곡 수집
    all_titles = set()
    user_likes = {}

    for user_id in user_ids:
        collection = db[user_id]
        titles = [doc["title"] for doc in collection.find({}, {"_id": 0}) if "title" in doc]
        user_likes[user_id] = titles
        all_titles.update(titles)
    
    all_titles = sorted(all_titles)
    title_index = {title: i for i, title in enumerate(all_titles)}

    # 유저-곡 행렬 생성 (binary matrix)
    matrix = np.zeros((len(user_ids), len(all_titles)))
    for i, user_id in enumerate(user_ids):
        for title in user_likes[user_id]:
            if title in title_index:
                matrix[i][title_index[title]] = 1

    return user_ids, all_titles, matrix, user_likes

# 2. 협업 필터링 추천
def recommend_user_cf(target_user, user_ids, all_titles, matrix, user_likes, top_k=2, top_n=5):
    if target_user not in user_ids:
        raise ValueError("해당 유저 없음")

    user_idx = user_ids.index(target_user)
    target_vector = matrix[user_idx].reshape(1, -1)

    similarities = []
    for i, uid in enumerate(user_ids):
        if uid == target_user:
            continue
        sim = cosine_similarity(target_vector, matrix[i].reshape(1, -1))[0][0]
        similarities.append((uid, sim))

    similarities.sort(key=lambda x: x[1], reverse=True)
    top_users = [uid for uid, _ in similarities[:top_k]]

    print(f"[{target_user}]와 유사한 사용자 Top-{top_k}: {top_users}")

    # 추천 후보: 유사한 사용자가 좋아했으나 내가 안 들은 곡
    target_likes = set(user_likes[target_user])
    candidates = set()
    for uid in top_users:
        candidates.update(user_likes[uid])
    
    recommendations = list(candidates - target_likes)
    print(f"[추천 후보]: {recommendations}")
    
    return recommendations[:top_n]

# 3. 실행
if __name__ == "__main__":
    uri = "mongodb://localhost:27017/"
    target_user = "최민호"

    user_ids, all_titles, matrix, user_likes = load_user_item_matrix(uri)
    recommendations = recommend_user_cf(target_user, user_ids, all_titles, matrix, user_likes, top_k=2, top_n=5)

    print(f"\n[{target_user}]에게 추천할 곡:")
    for title in recommendations:
        print(f"- {title}")


[최민호]와 유사한 사용자 Top-2: ['김태호', '류희철']
[추천 후보]: ['Whiplash', 'TONIGHT', '동이 틀 때', 'Rush (feat. 정인)', '사랑하지마요', 'Cafe', '나는 반딧불', '뜨래요', '고독하구만', '그래서 그래 (feat. 윤하)', '봄 내음보다 너를', '가까운듯 먼 그대여', '나는 아픈건 딱 질색이니까', '봄날은 간다']

[최민호]에게 추천할 곡:
- Whiplash
- TONIGHT
- 동이 틀 때
- Rush (feat. 정인)
- 사랑하지마요


In [10]:
from pymongo import MongoClient
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# 1. MongoDB에서 사용자-아이템 행렬 생성
def load_user_item_matrix(uri):
    client = MongoClient(uri)
    db = client["user"]
    user_ids = db.list_collection_names()
    
    all_titles = set()
    user_likes = {}

    for user_id in user_ids:
        collection = db[user_id]
        titles = [doc["title"] for doc in collection.find({}, {"_id": 0}) if "title" in doc]
        user_likes[user_id] = titles
        all_titles.update(titles)
    
    all_titles = sorted(all_titles)
    title_index = {title: i for i, title in enumerate(all_titles)}

    matrix = np.zeros((len(user_ids), len(all_titles)))
    for i, user_id in enumerate(user_ids):
        for title in user_likes[user_id]:
            if title in title_index:
                matrix[i][title_index[title]] = 1

    return user_ids, all_titles, matrix, user_likes

# 2. 협업 필터링 추천
def recommend_user_cf(target_user, user_ids, all_titles, matrix, user_likes, top_k=2):
    if target_user not in user_ids:
        raise ValueError("해당 유저 없음")

    user_idx = user_ids.index(target_user)
    target_vector = matrix[user_idx].reshape(1, -1)

    # 유사도 계산
    similarities = []
    for i, uid in enumerate(user_ids):
        if uid == target_user:
            continue
        sim = cosine_similarity(target_vector, matrix[i].reshape(1, -1))[0][0]
        similarities.append((uid, sim))

    similarities.sort(key=lambda x: x[1], reverse=True)
    top_users = [uid for uid, _ in similarities[:top_k]]

    print(f"\n[1] '{target_user}'가 좋아한 곡 목록:")
    for title in user_likes[target_user]:
        print(f"  - {title}")

    print(f"\n[2] 유사 사용자 Top-{top_k}:")
    for uid in top_users:
        print(f"  {uid}의 좋아한 곡:")
        for title in user_likes[uid]:
            print(f"    - {title}")

    # 공통 곡 및 추천 곡 수집
    target_likes = set(user_likes[target_user])
    combined_songs = []

    print(f"\n[3] '{target_user}'와 유사한 사용자의 공통으로 좋아한 곡:")
    for uid in top_users:
        common = set(user_likes[uid]) & target_likes
        if common:
            print(f"  {uid}와 공통:")
            for song in common:
                print(f"    - {song}")
        combined_songs.extend(user_likes[uid])

    # 추천 곡 (타겟 유저가 안 들은 곡)
    recommendations = list(set(combined_songs) - target_likes)

    print(f"\n[4] '{target_user}'에게 추천할 곡 (유사 유저가 좋아했지만 내가 안 들은 곡):")
    for title in recommendations:
        print(f"  - {title}")

    return recommendations

# 3. 실행
if __name__ == "__main__":
    uri = "mongodb://localhost:27017/"
    target_user = "최민호"

    user_ids, all_titles, matrix, user_likes = load_user_item_matrix(uri)
    recommendations = recommend_user_cf(target_user, user_ids, all_titles, matrix, user_likes, top_k=2)

    print(f"\n최종 추천 목록:")
    for title in recommendations:
        print(f"- {title}")



[1] '최민호'가 좋아한 곡 목록:
  - 청춘만화
  - 나무
  - Here With Me
  - 너라는 별
  - ...사랑했잖아...(2024)
  - HOME SWEET HOME
  - 봄여름가을겨울
  - Drowning
  - HAPPY
  - 봄이 와도
  - 플러팅
  - 내 삶의 반
  - 숲
  - 널 미워하기로 했어
  - 그날에 나는 맘이 편했을까
  - 그리워 그리워
  - 연
  - 사랑..그게 뭔데
  - 같은 베개
  - 너 없인 안 된다
  - 그리워하다
  - 아름답고도 아프구나
  - 나의 바람
  - 우리 사랑하지 말아요
  - 중독된 사랑
  - BETELGEUSE
  - Heaven
  - 여전히 아름다운지
  - yours
  - 무제

[2] 유사 사용자 Top-2:
  김태호의 좋아한 곡:
    - 사랑하지마요
    - Rush (feat. 정인)
    - 동이 틀 때
    - TONIGHT
    - Drowning
    - HAPPY
    - Cafe
    - 그래서 그래 (feat. 윤하)
  류희철의 좋아한 곡:
    - 나는 반딧불
    - 뜨래요
    - 고독하구만
    - 봄 내음보다 너를
    - Whiplash
    - 가까운듯 먼 그대여
    - 나무
    - 나는 아픈건 딱 질색이니까
    - 봄날은 간다
    - 봄여름가을겨울

[3] '최민호'와 유사한 사용자의 공통으로 좋아한 곡:
  김태호와 공통:
    - Drowning
    - HAPPY
  류희철와 공통:
    - 봄여름가을겨울
    - 나무

[4] '최민호'에게 추천할 곡 (유사 유저가 좋아했지만 내가 안 들은 곡):
  - Whiplash
  - TONIGHT
  - 동이 틀 때
  - Rush (feat. 정인)
  - 사랑하지마요
  - Cafe
  - 나는 반딧불
  - 뜨래요
  - 고독하구만
  - 그래서 그래 (feat. 윤하)
  - 봄 내음보다 너를
  - 가까운듯 먼 그