In [None]:
from matplotlib import pyplot as plt
%matplotlib inline
import pandas as pd
import numpy as np
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")

In [None]:
place = pd.read_csv('../Data/whyout_data/place.csv') # shape(4697,10)
product = pd.read_csv('../Data/whyout_data/product.csv') # shape(5821,11)
video = pd.read_csv('../Data/whyout_data/video.csv') # shape(3250, 9)

In [None]:
user_place = pd.read_csv('../Data/whyout_data/col_user_place.csv') # (31177,4697)
user_product = pd.read_csv('../Data/whyout_data/col_user_product.csv') # (31177,5821)
user_video = pd.read_csv('../Data/whyout_data/col_user_video.csv') # (31177, 3250)

In [None]:
drop_user_place = pd.read_csv('../Data/whyout_data/drop_user_place.csv') # (22420,4697) 
drop_user_product = pd.read_csv('../Data/whyout_data/drop_user_product.csv') # (2994,5821)
drop_user_video = pd.read_csv('../Data/whyout_data/drop_user_video.csv') # (11067, 3250)

In [None]:
user = pd.read_csv('../Data/whyout_data/user.csv') # (31177,3)
drop_user_place_idx = pd.read_csv('../Data/whyout_data/drop_user_place_idx.csv') # (22420,4)
drop_user_product_idx = pd.read_csv('../Data/whyout_data/drop_user_product_idx.csv') # (2294,4)
drop_user_video_idx = pd.read_csv('../Data/whyout_data/drop_user_video_idx.csv') # (11067, 4)

In [None]:
full_data_sgd_place_preds = pd.read_csv('../Data/whyout_data/sgd_result/full_data/user_place_k50epochs1000.csv')
full_data_sgd_product_preds = pd.read_csv('../Data/whyout_data/sgd_result/full_data/user_product_k50epochs1000.csv')
full_data_sgd_video_preds = pd.read_csv('../Data/whyout_data/sgd_result/full_data/user_video_k50epochs1000.csv')

In [None]:
del_data_sgd_place_preds = pd.read_csv('../Data/whyout_data/sgd_result/del_data/drop_user_place_k40epochs1000.csv')
del_data_sgd_product_preds = pd.read_csv('../Data/whyout_data/sgd_result/del_data/drop_user_product_k20epochs1000.csv')
del_data_sgd_video_preds = pd.read_csv('../Data/whyout_data/sgd_result/del_data/drop_user_video_k30epochs1000.csv')

In [None]:
place_user_latent = pd.read_csv('../Data/whyout_data/sgd_result/del_data/drop_user_place_user_latent_k40epochs1000.csv')
place_item_latent = pd.read_csv('../Data/whyout_data/sgd_result/del_data/drop_user_place_item_latent_k40epochs1000.csv')
product_user_latent = pd.read_csv('../Data/whyout_data/sgd_result/del_data/drop_user_product_user_latent_k20epochs1000.csv')
product_item_latent = pd.read_csv('../Data/whyout_data/sgd_result/del_data/drop_user_product_item_latent_k20epochs1000.csv')
video_user_latent = pd.read_csv('../Data/whyout_data/sgd_result/del_data/drop_user_video_user_latent_k30epochs1000.csv')
video_item_latent = pd.read_csv('../Data/whyout_data/sgd_result/del_data/drop_user_video_item_latent_k30epochs1000.csv')

In [None]:
# 사용자의 평가 유무 확인
def find_zero_indices(df):
    return df.index[df.eq(0).all(axis=1)].tolist()

def find_non_zero_indices(df):
    return df.index[~df.eq(0).all(axis=1)].tolist()

place_zero_indices = find_zero_indices(user_place)
product_zero_indices = find_zero_indices(user_product)
video_zero_indices = find_zero_indices(user_video)
common_zero_indices = set(place_zero_indices) & set(product_zero_indices) & set(video_zero_indices)

print('장소를 평가하지 않은 사용자 수:',len(place_zero_indices))
print('상품을 평가하지 않은 사용자 수:',len(product_zero_indices))
print('영상을 평가하지 않은 사용자 수:',len(video_zero_indices))
print('모두 평가하지 않은 사용자 수:',len(common_zero_indices))

place_non_zero_indices = find_non_zero_indices(user_place)
product_non_zero_indices = find_non_zero_indices(user_product)
video_non_zero_indices = find_non_zero_indices(user_video)
common_non_zero_indices = set(place_non_zero_indices) & set(product_non_zero_indices) & set(video_non_zero_indices)

print('장소를 평가한 사용자 수:',len(place_non_zero_indices))
print('상품을 평가한 사용자 수:',len(product_non_zero_indices))
print('영상을 평가한 사용자 수:',len(video_non_zero_indices))
print('모두 평가한 사용자 수:',len(common_non_zero_indices))


In [None]:
# place 활동내역이 있는 user에게 place 추천
place_user_id = 100
num_recommendations = 10

def recommend_place(sgd_place_preds, user_id, place_df, ratings_df, idx, num_recommendations):
    if user_id in idx['idx'].values:
        print(f'{user_id}번 유저의 행동이 있습니다.')
        user_place_index = int(idx[idx['idx'] == user_id].iloc[:,2])
        drop_user_place_index = int(idx[idx['idx'] == user_id].iloc[:,3])
        print('user_place_index:', user_place_index)
        print('drop_user_place_index:', drop_user_place_index)

        # 원본 평점 데이터에서 user_id에 해당하는 행을 DataFrame으로 가져온다.
        user_data = ratings_df.loc[drop_user_place_index]

        # 사용자가 이미 평가한 상품의 인덱스를 추출
        user_history_indices = [int(i) for i in user_data[user_data > 0].index.tolist()]
        user_history_non_indices = [int(i) for i in user_data[user_data <= 0].index.tolist()]
        print(f'이미 평가한 place 길이: {len(user_history_indices)}')
        print(f'이미 평가한 place:', user_history_indices)
        #print(len(user_history_non_indices),user_history_non_indices)
        non_recommendations = place_df.iloc[user_history_indices]['idx'].tolist()
        recommendations = place_df.iloc[user_history_non_indices]['idx'].tolist()

        print("이미 평가한 place idx:", non_recommendations)
        print("평가 안한 place idx:", recommendations)

        # SGD를 통해 예측된 사용자의 평점을 기반으로 데이터 정렬
        user_predictions = sgd_place_preds.loc[drop_user_place_index]
        user_predictions_filtered = user_predictions.iloc[user_history_non_indices]
        sorted_predictions = user_predictions_filtered.sort_values(ascending=False)
        top_recommendations = sorted_predictions.index.tolist()[:num_recommendations]
        recommendations_result = place_df.iloc[top_recommendations]['idx'].tolist()
        print(f"user {user_id}에게 추천해줄 {10}개 place idx : {recommendations_result}")
        return recommendations_result
    else:
        print(f'{user_id}번 유저의 행동이 없습니다.')

predictions = recommend_place(del_data_sgd_place_preds, place_user_id, place, drop_user_place, drop_user_place_idx ,num_recommendations)

In [None]:
# product 활동내역이 있는 user에게 product 추천
product_user_id = 7
num_recommendations = 10

def recommend_product(sgd_product_preds, user_id, product_df, ratings_df, idx, num_recommendations):
    if user_id in idx['idx'].values:
        print(f'{user_id}번 유저의 행동이 있습니다.')
        user_product_index = int(idx[idx['idx'] == user_id].iloc[:,2])
        drop_user_product_index = int(idx[idx['idx'] == user_id].iloc[:,3])
        print('user_product_index:', user_product_index)
        print('drop_user_product_index:', drop_user_product_index)

        # 원본 평점 데이터에서 user_id에 해당하는 행을 DataFrame으로 가져온다.
        user_data = ratings_df.loc[drop_user_product_index]

        # 사용자가 이미 평가한 상품의 인덱스를 추출
        user_history_indices = [int(i) for i in user_data[user_data > 0].index.tolist()]
        user_history_non_indices = [int(i) for i in user_data[user_data <= 0].index.tolist()]
        print(f'이미 평가한 product 길이: {len(user_history_indices)}')
        print(f'이미 평가한 product:', user_history_indices)
        #print(len(user_history_non_indices),user_history_non_indices)
        non_recommendations = product_df.iloc[user_history_indices]['idx'].tolist()
        recommendations = product_df.iloc[user_history_non_indices]['idx'].tolist()

        print("이미 평가한 product idx:", non_recommendations)
        print("평가 안한 product idx:", recommendations)

        # SGD를 통해 예측된 사용자의 평점을 기반으로 데이터 정렬
        user_predictions = sgd_product_preds.loc[drop_user_product_index]
        user_predictions_filtered = user_predictions.iloc[user_history_non_indices]
        sorted_predictions = user_predictions_filtered.sort_values(ascending=False)
        top_recommendations = sorted_predictions.index.tolist()[:num_recommendations]
        recommendations_result = product_df.iloc[top_recommendations]['idx'].tolist()
        print(f"user {user_id}에게 추천해줄 {10}개 product idx : {recommendations_result}")
        return recommendations_result
    else:
        print(f'{user_id}번 유저의 행동이 없습니다.')

predictions = recommend_product(del_data_sgd_product_preds, product_user_id, product, drop_user_product, drop_user_product_idx, num_recommendations)

In [None]:
# video 활동내역이 있는 user에게 video 추천
video_user_id = 7
num_recommendations = 10

def recommend_video(sgd_video_preds, user_id, video_df, ratings_df, idx, num_recommendations):
    if user_id in idx['idx'].values:
        print(f'{user_id}번 유저의 행동이 있습니다.')
        user_video_index = int(idx[idx['idx'] == user_id].iloc[:,2])
        drop_user_video_index = int(idx[idx['idx'] == user_id].iloc[:,3])
        print('user_video_index:', user_video_index)
        print('drop_user_video_index:', drop_user_video_index)

        # 원본 평점 데이터에서 user_id에 해당하는 행을 DataFrame으로 가져온다.
        user_data = ratings_df.loc[drop_user_video_index]

        # 사용자가 이미 평가한 상품의 인덱스를 추출
        user_history_indices = [int(i) for i in user_data[user_data > 0].index.tolist()]
        user_history_non_indices = [int(i) for i in user_data[user_data <= 0].index.tolist()]
        print(f'이미 평가한 video 길이: {len(user_history_indices)}')
        print('이미 평가한 video:', user_history_indices)
        #print(len(user_history_non_indices),user_history_non_indices)
        non_recommendations = video_df.iloc[user_history_indices]['idx'].tolist()
        recommendations = video_df.iloc[user_history_non_indices]['idx'].tolist()

        print("이미 평가한 video idx:", non_recommendations)
        print("평가 안한 video idx:", recommendations)

        # SGD를 통해 예측된 사용자의 평점을 기반으로 데이터 정렬
        user_predictions = sgd_video_preds.loc[drop_user_video_index]
        user_predictions_filtered = user_predictions.iloc[user_history_non_indices]
        sorted_predictions = user_predictions_filtered.sort_values(ascending=False)
        top_recommendations = sorted_predictions.index.tolist()[:num_recommendations]
        recommendations_result = video_df.iloc[top_recommendations]['idx'].tolist()
        print(f"user {user_id}에게 추천해줄 {10}개 video idx : {recommendations_result}")
        return recommendations_result
    else:
        print(f'{user_id}번 유저의 행동이 없습니다.')

predictions = recommend_video(del_data_sgd_video_preds, video_user_id, video, drop_user_video, drop_user_video_idx, num_recommendations)

In [None]:
# 모든 데이터 place, product, video 추천
user_id = 7
num_recommendations = 10

def recommend_video(total_sgd_preds, user_id, total_df, ratings_df, idx, num_recommendations):
    if user_id in idx['idx'].values:
        print(f'{user_id}번 유저의 행동이 있습니다.')
        user_video_index = int(idx[idx['idx'] == user_id].iloc[:,2])
        print('user_video_index:', user_video_index)

        # 원본 평점 데이터에서 user_id에 해당하는 행을 DataFrame으로 가져온다.
        user_data = ratings_df.loc[user_video_index]

        # 사용자가 이미 평가한 상품의 인덱스를 추출
        user_history_indices = [int(i) for i in user_data[user_data > 0].index.tolist()]
        user_history_non_indices = [int(i) for i in user_data[user_data <= 0].index.tolist()]
        print(f'이미 평가한 아이템 길이: {len(user_history_indices)}')
        print('이미 평가한 아이템:', user_history_indices)
        #print(len(user_history_non_indices),user_history_non_indices)
        non_recommendations = total_df.iloc[user_history_indices]['idx'].tolist()
        recommendations = total_df.iloc[user_history_non_indices]['idx'].tolist()

        print("이미 평가한 아이템 idx:", non_recommendations)
        print("평가 안한 아이템 idx:", recommendations)

        # SGD를 통해 예측된 사용자의 평점을 기반으로 데이터 정렬
        user_predictions = total_sgd_preds.loc[user_video_index]
        user_predictions_filtered = user_predictions.iloc[user_history_non_indices]
        sorted_predictions = user_predictions_filtered.sort_values(ascending=False)
        top_recommendations = sorted_predictions.index.tolist()[:num_recommendations]
        recommendations_result = total_df.iloc[top_recommendations]['idx'].tolist()
        print(f"user {user_id}에게 추천해줄 {10}개 아이템 idx : {recommendations_result}")
        return recommendations_result
    else:
        print(f'{user_id}번 유저의 행동이 없습니다.')

predictions = recommend_video(del_data_sgd_place_preds, user_id, place, user_place, user, num_recommendations)

In [None]:
# video 활동내역이 있는 user에게 video 추천
user_id = 100
num_recommendations = 10

def recommend(item, sgd_video_preds, user_id, video_df, ratings_df, idx, num_recommendations):
    drop_user_video_index = int(idx[idx['idx'] == user_id].iloc[:,3])
    user_data = ratings_df.loc[drop_user_video_index]

    user_history_indices = [int(i) for i in user_data[user_data > 0].index.tolist()]
    user_history_non_indices = [int(i) for i in user_data[user_data <= 0].index.tolist()]

    non_recommendations = video_df.iloc[user_history_indices]['idx'].tolist()
    recommendations = video_df.iloc[user_history_non_indices]['idx'].tolist()

    user_predictions = sgd_video_preds.loc[drop_user_video_index]
    user_predictions_filtered = user_predictions.iloc[user_history_non_indices]
    sorted_predictions = user_predictions_filtered.sort_values(ascending=False)
    top_recommendations = sorted_predictions.index.tolist()[:num_recommendations]
    recommendations_result = video_df.iloc[top_recommendations]['idx'].tolist()
    print(f"user {user_id}에게 추천해줄 {10}개 {item} idx : {recommendations_result}")

In [None]:
user_id = [10]
item = 'place'
dict = { 'place' : [del_data_sgd_place_preds, place, drop_user_place, drop_user_place_idx],
         'product' : [del_data_sgd_product_preds, product, drop_user_product, drop_user_product_idx],
         'video' : [del_data_sgd_video_preds, video, drop_user_video, drop_user_video_idx]}

In [None]:
def cos_similiarity(item, item_list, user_id):
    item_list.remove(item)
    print(item_list)
    a = item_list[0]
    new_user_id= 15
    #new_user_id = cos(a[U], user_id) # user_id와 유사도가 높은 argmax
    return new_user_id

In [None]:
def recommendation_system(user_id, item):
    try:
        recommend(item, dict[item][0], user_id, dict[item][1], dict[item][2], dict[item][3], num_recommendations)
    except:
        item_list = ['place', 'video', 'product']
        new_user_id = cos_similiarity(item, item_list, user_id)
        recommend(item, dict[item][0], new_user_id, dict[item][1], dict[item][2], dict[item][3], num_recommendations)