In [21]:
import torch
import torch.nn as nn
from transformers import BertTokenizer, BertModel
import pandas as pd
import json
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" # 서버적용 코드
os.environ["CUDA_VISIBLE_DEVICES"]= "3"
# GPU 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Q-Network 클래스 정의
class QNetwork(nn.Module):
    def __init__(self, text_embedding_dim, genre_embedding_dim, hidden_dim):
        super(QNetwork, self).__init__()
        input_dim = text_embedding_dim * 2 + genre_embedding_dim
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, text_embedding_dim)

    def forward(self, album_emb, song_emb, genre_emb):
        album_emb = torch.tensor(album_emb).view(1, -1).to(device)
        song_emb = torch.tensor(song_emb).view(1, -1).to(device)
        genre_emb = torch.tensor(genre_emb).view(1, -1).to(device)
        x = torch.cat((album_emb, song_emb, genre_emb), dim=1)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# 학습된 모델 로드
model_path = 'file path for your weight file'
text_embedding_dim = 768
genre_embedding_dim = 8
hidden_dim = 256
model = QNetwork(text_embedding_dim, genre_embedding_dim, hidden_dim).to(device)

# 저장된 모델의 상태를 로드
model.load_state_dict(torch.load(model_path))
model.eval()


QNetwork(
  (fc1): Linear(in_features=1544, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=768, bias=True)
)

In [22]:
import torch
from transformers import BertTokenizer, BertModel
import pandas as pd
import json

# GPU 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Multilingual BERT 모델과 토크나이저 로드
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')
bert_model = BertModel.from_pretrained('bert-base-multilingual-cased').to(device)

# 함수 정의: 텍스트 인코딩
def encode_text(text):
    inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True).to(device)
    outputs = bert_model(**inputs)
    return outputs.last_hidden_state.mean(dim=1).detach().cpu().numpy()

# 노래 메타데이터 로드
song_meta = pd.read_csv('./filtered_song_meta.csv')

# 앨범명, 곡명 임베딩 생성
album_embeddings = song_meta['album_name'].apply(encode_text)
song_embeddings = song_meta['song_name'].apply(encode_text)
genre_embeddings = song_meta['embedding'].apply(eval).apply(lambda x: torch.tensor(x, dtype=torch.float32).cpu().numpy())

# Q-Network 출력 임베딩 생성
def get_qnetwork_embedding(row):
    album_emb = row['album_embedding']
    song_emb = row['song_embedding']
    genre_emb = row['genre_embedding']
    with torch.no_grad():
        qnetwork_emb = model(album_emb, song_emb, genre_emb).cpu().numpy().tolist()
    return qnetwork_emb

# 데이터프레임 생성
embeddings_df = pd.DataFrame({
    'id': song_meta['id'],
    'album_embedding': album_embeddings.apply(lambda x: x.tolist()),
    'song_embedding': song_embeddings.apply(lambda x: x.tolist()),
    'genre_embedding': genre_embeddings.apply(lambda x: x.tolist())
})

# Q-Network 출력 임베딩 추가
embeddings_df['qnetwork_embedding'] = embeddings_df.apply(get_qnetwork_embedding, axis=1)

# CSV 파일로 저장
embeddings_df.to_csv('./embeddings.csv', index=False)
print("Q-Network embeddings saved to 'embeddings'")


Q-Network embeddings saved to 'fine_qnetwork_embeddings2.csv'


In [24]:
import torch
from transformers import BertTokenizer, BertModel
import pandas as pd
import numpy as np
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" # 서버적용 코드
os.environ["CUDA_VISIBLE_DEVICES"]= "3"

# GPU 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Multilingual BERT 모델과 토크나이저 로드
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')
bert_model = BertModel.from_pretrained('bert-base-multilingual-cased').to(device)

# 함수 정의: 텍스트 인코딩
def encode_text(text):
    inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True).to(device)
    outputs = bert_model(**inputs)
    return outputs.last_hidden_state.mean(dim=1).detach().cpu().numpy()


# 임베딩 데이터 로드
embeddings_df = pd.read_csv('./embeddings')
# embeddings_df = pd.read_csv('./simple_qnetwork_embeddings.csv')
embeddings_df['qnetwork_embedding'] = embeddings_df['qnetwork_embedding'].apply(eval)

# 노래 메타데이터 로드 및 매핑 생성
song_meta = pd.read_csv('./filtered_song_meta.csv')
id_to_song_info = dict(zip(song_meta['id'], zip(song_meta['song_name'], song_meta['artist_name_basket'])))

# 예측 함수 정의
def predict_songs(prompt, embeddings_df, num_recommendations=10, weight=0.01):
    prompt_emb = torch.tensor(encode_text(prompt)).view(1, -1).to(device)
    recommended_songs = []
    
    for i in range(num_recommendations):
        max_similarity = -float('inf')
        best_song = None

        for idx, row in embeddings_df.iterrows():
            qnetwork_emb = torch.tensor(row['qnetwork_embedding']).view(1, -1).to(device)
            
            similarity = torch.cosine_similarity(qnetwork_emb, prompt_emb, dim=1).item()

            if similarity > max_similarity and row['id'] not in [song['id'] for song in recommended_songs]:
                max_similarity = similarity
                best_song = {
                    'id': row['id'],
                    'qnetwork_emb': qnetwork_emb
                }

        if best_song is not None:
            song_name, artist_name_basket = id_to_song_info.get(best_song['id'], ('Unknown Title', 'Unknown Artist'))
            artist_names = ', '.join(eval(artist_name_basket))
            recommended_songs.append({
                'id': best_song['id'],
                'title': song_name,
                'artists': artist_names,
                'qnetwork_emb': best_song['qnetwork_emb']
            })
            print(f"Recommended Song {i+1}: ID: {best_song['id']}, Title: {song_name}, Artists: {artist_names}")
            reward = float(input(f"Enter the reward for song '{song_name}' by '{artist_names}' (e.g., 1.0 or -1.0): "))
            # 상태 업데이트
            prompt_emb = (1-weight) * prompt_emb + weight * best_song['qnetwork_emb'] * reward

    return recommended_songs

# 사용자 입력 받기
prompt = input("Enter the prompt: ")

# 예측 수행
recommended_songs = predict_songs(prompt, embeddings_df)
print("Recommended Songs:")
for song in recommended_songs:
    print(f"ID: {song['id']}, Title: {song['title']}, Artists: {song['artists']}")