In [41]:
import os
import os.path as path
import gc
import re
import math
import json

In [42]:
import numpy as np
import pandas as pd

In [43]:
import matplotlib.pyplot as plt
import seaborn as sns

In [44]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [45]:
pd.set_option('display.max_row', 50)
pd.set_option('display.max_columns', 100)

In [46]:
# 디렉토리 기본 경로 설정
DIR_PATH = path.join('..', 'data', 'sql_dummy')
DIR_SAVE_PATH = path.join('..', 'data', 'output')

print(DIR_PATH)
print(DIR_SAVE_PATH)

..\data\sql_dummy
..\data\output


In [49]:
# sql_capsule.csv
capsule_read = pd.read_csv(path.join(DIR_PATH, 'sql_capsule.csv'), low_memory=False, encoding='utf-8')

print(capsule_read.shape)
capsule_read.head()

(152, 8)


Unnamed: 0,idx,created_data,updated_date,name_en,name_ko,summary,thumbnail,user_grade
0,1,2023-03-31 12:00:00,,Amaretti,아마레티,"아몬드, 비스킷",default_capsule.png,0
1,2,2023-03-31 12:00:00,,Bukeela ka Ethiopia,부케엘라 카 에티오피아,"과일, 플로럴",default_capsule.png,0
2,3,2023-03-31 12:00:00,,Cafe Istanbul,카페 이스탄불,"강렬한, 자극적인",default_capsule.png,0
3,4,2023-03-31 12:00:00,,Caramelito,카라멜리또,카라멜,default_capsule.png,0
4,5,2023-03-31 12:00:00,,Ciocattino,초콜라티노,초코,default_capsule.png,0


In [50]:
# sql_capsule_score.csv
capsule_score_read = pd.read_csv(path.join(DIR_PATH, 'sql_capsule_score.csv'), low_memory=False, encoding='utf-8')

print(capsule_score_read.shape)
capsule_score_read.head()

(152, 11)


Unnamed: 0,idx,created_date,updated_date,acidity,balance,bitterness,body,coffeeing_note,flavor,roasting,capsule_idx
0,1,2023-03-31 12:00:00.000000,,6,5,6,6,"아몬드, 비스킷",5,6,1
1,2,2023-03-31 12:00:00.000000,,4,5,2,2,"과일, 플로럴",3,2,2
2,3,2023-03-31 12:00:00.000000,,6,5,6,6,"강렬한, 자극적인",10,6,3
3,4,2023-03-31 12:00:00.000000,,6,5,6,6,카라멜,6,6,4
4,5,2023-03-31 12:00:00.000000,,6,5,6,6,초코,6,6,5


In [51]:
# sql_capsule_detail.csv
capsule_detail_read = pd.read_csv(path.join(DIR_PATH, 'sql_capsule_detail.csv'), low_memory=False, encoding='utf-8')

print(capsule_detail_read.shape)
capsule_detail_read.head()

(152, 8)


Unnamed: 0,idx,created_date,updated_date,company,description,machine_type,origin,capsule_idx
0,1,2023-03-31 12:00:00.000000,,Nespresso,상세 내용 입니다!!!,original,Unknown,1
1,2,2023-03-31 12:00:00.000000,,Nespresso,상세 내용 입니다!!!,original,Unknown,2
2,3,2023-03-31 12:00:00.000000,,Nespresso,상세 내용 입니다!!!,original,Unknown,3
3,4,2023-03-31 12:00:00.000000,,Nespresso,상세 내용 입니다!!!,original,Unknown,4
4,5,2023-03-31 12:00:00.000000,,Nespresso,상세 내용 입니다!!!,original,Unknown,5


In [52]:
capsule_data = capsule_read.copy()
capsule_data = pd.merge(capsule_data, capsule_detail_read.drop(['idx', 'created_date', 'updated_date'], axis=1), how='left', left_on='idx', right_on='capsule_idx')
capsule_data.drop('capsule_idx', axis=1, inplace=True)
capsule_data = pd.merge(capsule_data, capsule_score_read.drop(['idx', 'created_date', 'updated_date'], axis=1), how='left', left_on='idx', right_on='capsule_idx')
capsule_data.drop('capsule_idx', axis=1, inplace=True)
print(capsule_data.shape)
capsule_data.head()

(152, 19)


Unnamed: 0,idx,created_data,updated_date,name_en,name_ko,summary,thumbnail,user_grade,company,description,machine_type,origin,acidity,balance,bitterness,body,coffeeing_note,flavor,roasting
0,1,2023-03-31 12:00:00,,Amaretti,아마레티,"아몬드, 비스킷",default_capsule.png,0,Nespresso,상세 내용 입니다!!!,original,Unknown,6,5,6,6,"아몬드, 비스킷",5,6
1,2,2023-03-31 12:00:00,,Bukeela ka Ethiopia,부케엘라 카 에티오피아,"과일, 플로럴",default_capsule.png,0,Nespresso,상세 내용 입니다!!!,original,Unknown,4,5,2,2,"과일, 플로럴",3,2
2,3,2023-03-31 12:00:00,,Cafe Istanbul,카페 이스탄불,"강렬한, 자극적인",default_capsule.png,0,Nespresso,상세 내용 입니다!!!,original,Unknown,6,5,6,6,"강렬한, 자극적인",10,6
3,4,2023-03-31 12:00:00,,Caramelito,카라멜리또,카라멜,default_capsule.png,0,Nespresso,상세 내용 입니다!!!,original,Unknown,6,5,6,6,카라멜,6,6
4,5,2023-03-31 12:00:00,,Ciocattino,초콜라티노,초코,default_capsule.png,0,Nespresso,상세 내용 입니다!!!,original,Unknown,6,5,6,6,초코,6,6


In [53]:
# TF-IDF 벡터화
tfidf_vector = TfidfVectorizer()
tfidf_matrix = tfidf_vector.fit_transform(capsule_data['coffeeing_note']).toarray()
tfidf_matrix_feature = tfidf_vector.get_feature_names_out()

In [54]:
print(tfidf_matrix_feature)
print(tfidf_matrix)

['가벼운' '감초' '강렬한' '강한' '개암' '견과류' '고소한' '과일' '과자' '구운' '균형있는' '기름진' '깊은'
 '다크로스트' '다크초코' '달콤한' '담배' '당밀' '라임' '라즈베리' '레드와인' '레몬' '마일드' '맥아' '무거운'
 '묵직한' '바닐라' '바디' '박하' '백단나무' '보리' '복숭아' '부드러운' '비스킷' '사탕' '석류' '설탕' '스모키'
 '시나몬' '시리얼' '싱그러운' '쌉쌀한' '아몬드' '야채' '오렌지' '우디' '우아한' '위스키' '자극적인' '정향'
 '중후한' '체리' '초코' '카라멜' '코코넛' '코코아' '쿠키' '크랜베리' '파인애플' '팝콘' '풍부한' '플로럴'
 '허니' '허브' '헤이즐넛' '호두' '호박' '호박파이' '후추']
[[0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.74264322 ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]]


In [55]:
%%time
# tfidf_matrix 기반 유사도 측정
grade_cosine_sim = cosine_similarity(tfidf_matrix)

CPU times: total: 0 ns
Wall time: 2.34 ms


In [56]:
%%time
# aroma ~ bady 까지의 스테이터스에 따른 유사도 특정
grade_cosine_sim = cosine_similarity(capsule_data[['flavor', 'acidity', 'balance', 'bitterness', 'body']])

CPU times: total: 0 ns
Wall time: 1.99 ms


In [57]:
print(grade_cosine_sim.shape)
print(grade_cosine_sim.dtype)

grade_cosine_sim = grade_cosine_sim.astype(np.float16)
gc.collect()
print(grade_cosine_sim.dtype)

grade_cosine_sim

(152, 152)
float64
float16


array([[1.    , 0.9194, 0.9536, ..., 0.9907, 0.9907, 0.997 ],
       [0.9194, 1.    , 0.886 , ..., 0.915 , 0.915 , 0.9146],
       [0.9536, 0.886 , 1.    , ..., 0.986 , 0.986 , 0.9287],
       ...,
       [0.9907, 0.915 , 0.986 , ..., 1.    , 1.    , 0.9775],
       [0.9907, 0.915 , 0.986 , ..., 1.    , 1.    , 0.9775],
       [0.997 , 0.9146, 0.9287, ..., 0.9775, 0.9775, 1.    ]],
      dtype=float16)

In [58]:
# 이름 뿐만 아니라 id로도 검색할 수 있도록 행과 열 중 하나를 title, 다른 하나를 id로 지정

df_grade_cosine_sim = pd.DataFrame(grade_cosine_sim, index = capsule_data['idx'], columns = capsule_data['name_ko'], dtype=np.float16)
print(df_grade_cosine_sim.shape)
df_grade_cosine_sim.head()

(152, 152)


name_ko,아마레티,부케엘라 카 에티오피아,카페 이스탄불,카라멜리또,초콜라티노,다르칸,브라질 둘세오,엔비보 롱고,포레스트 아몬드 플레이버,포레스트 블랙,포레스트 프룻,포르티시오 롱고,인드리야,인피니트 에스프레소,인피니트 후루티 라즈베리,인피니트 고메 헤이즐넛,리니시오 롱고,파리 블랙,로사바야 데 콜롬비아,토르타 디 노초올레,바닐리오,비발토 롱고,디카페인 비발토 롱고,에이지드 수마트라,아마하 아웨 우간다,아르페지오,디카페인 아르페지오,브라질 오가닉,부에노스 아이레스 롱고,까페시또 데 쿠바,까페시또 데 푸에르토리코,까페지뇨 두 브라질,케이프타운 엔비보 롱고,카프리치오,카라멜 크레임 브륄레,카라멜로,키아로,초콜라티노,코코아 트러플,코코넛 플레이버 오버 아이스,콜롬비아,코르토,코지,에스페란사 데 콜롬비아,에티오피아,필터 스타일 인텐스,필터 스타일 밀드,프레도 델리카토,프레도 인텐소,갈라파고스 스페셜 리저브,...,디아볼리토,더블 에스프레소 챠로,더블 에스프레소 돌체,더블 에스프레소 스쿠로,포르타도,디카페인 포르타도,징거브레드 리미티드 에디션,골든 캐러멜,하프-카페이나토,하와이 코나 리미티드 에디션,헤이즐리노 머핀,아이스 포르테,아이스 레제로,이니지오,인텐소 (커피),자마이카 블루 마운틴 리미티드 에디션,멜로지오,멜로지오 부스트,디카페인 멜로지오,멕시코,니카라과 라 쿰플리다,오다시오,오라푸리오,페퍼민트 핀휠 리미티드 에디션,푸어오버 스타일 인텐소,푸어오버 스타일 밀드,펌킨 스파이스 케이크 리미티드 에디션,리치 초콜렛,로스트 헤이즐넛,스토미오,스토미오 부스트,솔레리오,스위트 바닐라,토칸토,트로피컬 코코넛 오버 아이스,바닐라 커스터드 파이,비비다 B12,볼테소,위스키 에센스,아플로라지오,비안코 레게로,디카페나토,돌체,엘바조,포레스트 푸르트,지오르니오,인피니티 더블 에스프레소,인텐소 (알토),디카페나토 인텐소,디카페나토 온투오소
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
1,1.0,0.919434,0.953613,0.997559,0.997559,0.90625,0.986816,0.917969,1.0,0.975098,0.997559,0.962891,0.96875,1.0,0.98877,0.98877,0.91748,0.913086,0.974609,1.0,0.997559,0.986816,0.986816,0.935547,0.964355,0.956055,0.956055,0.910156,0.91748,0.898926,0.953613,0.917969,0.908203,0.98877,0.90625,0.981445,0.949707,0.981445,0.90625,1.0,0.991699,0.97168,0.98291,1.0,0.958496,0.890625,0.902832,1.0,1.0,0.975098,...,0.908203,0.905273,0.980469,0.89209,0.925781,0.925781,0.90625,0.90625,0.90625,0.984375,0.90625,1.0,1.0,0.973633,0.887695,0.975098,0.934082,0.934082,0.934082,0.935547,0.98877,0.969727,0.934082,1.0,0.949707,0.942871,0.90625,0.902832,0.90625,0.925781,0.925781,0.884766,0.90625,0.981445,1.0,0.90625,0.934082,0.986816,0.980469,0.99707,0.949707,0.997559,0.99707,0.99707,0.90625,0.99707,0.98877,0.990723,0.990723,0.99707
2,0.919434,1.0,0.88623,0.918945,0.918945,0.759277,0.960449,0.783203,0.919434,0.887207,0.918945,0.878418,0.857422,0.919434,0.938965,0.938965,0.928223,0.761719,0.92041,0.919434,0.918945,0.960449,0.960449,0.836426,0.834473,0.830566,0.830566,0.876465,0.993652,0.789551,0.88623,0.783203,0.77832,0.938965,0.916016,0.966797,0.936035,0.966797,0.916016,0.919434,0.92627,0.846191,0.968262,0.919434,0.953613,0.765137,0.866699,0.919434,0.919434,0.887207,...,0.77832,0.839844,0.955078,0.805664,0.786133,0.786133,0.916016,0.916016,0.916016,0.936035,0.916016,0.919434,0.919434,0.959473,0.825195,0.887207,0.861816,0.861816,0.861816,0.836426,0.938965,0.835938,0.861816,0.919434,0.842285,0.867188,0.916016,0.866699,0.916016,0.786133,0.786133,0.968262,0.916016,0.966797,0.919434,0.916016,0.861816,0.960449,0.955078,0.914551,0.936035,0.918945,0.914551,0.914551,0.916016,0.914551,0.938965,0.915039,0.915039,0.914551
3,0.953613,0.88623,1.0,0.972656,0.972656,0.9375,0.951172,0.946777,0.953613,0.98291,0.972656,0.989746,0.990234,0.953613,0.953613,0.953613,0.918457,0.930176,0.931152,0.953613,0.972656,0.951172,0.951172,0.95752,0.96582,0.973633,0.973633,0.968262,0.918457,0.956543,1.0,0.946777,0.952148,0.953613,0.936523,0.958984,0.927734,0.958984,0.936523,0.953613,0.957031,0.910156,0.935059,0.953613,0.901367,0.859375,0.912109,0.953613,0.953613,0.98291,...,0.952148,0.962891,0.972656,0.96582,0.938477,0.938477,0.936523,0.936523,0.936523,0.971191,0.936523,0.953613,0.953613,0.974609,0.962402,0.98291,0.950684,0.950684,0.950684,0.95752,0.953613,0.954102,0.950684,0.953613,0.927734,0.935547,0.936523,0.912109,0.936523,0.938477,0.938477,0.805176,0.936523,0.958984,0.953613,0.936523,0.950684,0.951172,0.972656,0.928711,0.927734,0.972656,0.928711,0.928711,0.936523,0.928711,0.953613,0.98584,0.98584,0.928711
4,0.997559,0.918945,0.972656,1.0,1.0,0.921387,0.986816,0.932617,0.997559,0.985352,1.0,0.977051,0.981934,0.997559,0.98877,0.98877,0.925293,0.924805,0.972656,0.997559,1.0,0.986816,0.986816,0.948242,0.972656,0.968262,0.968262,0.931641,0.925293,0.919922,0.972656,0.932617,0.92627,0.98877,0.920898,0.984375,0.952637,0.984375,0.920898,0.997559,0.992188,0.965332,0.97998,0.997559,0.953125,0.890625,0.912598,0.997559,0.997559,0.985352,...,0.92627,0.92627,0.986816,0.916992,0.936523,0.936523,0.920898,0.920898,0.920898,0.989258,0.920898,0.997559,0.997559,0.981934,0.912598,0.985352,0.945801,0.945801,0.945801,0.948242,0.98877,0.974121,0.945801,0.997559,0.952637,0.94873,0.920898,0.912598,0.920898,0.936523,0.936523,0.873535,0.920898,0.984375,0.997559,0.920898,0.945801,0.986816,0.986816,0.989258,0.952637,1.0,0.989258,0.989258,0.920898,0.989258,0.98877,0.998047,0.998047,0.989258
5,0.997559,0.918945,0.972656,1.0,1.0,0.921387,0.986816,0.932617,0.997559,0.985352,1.0,0.977051,0.981934,0.997559,0.98877,0.98877,0.925293,0.924805,0.972656,0.997559,1.0,0.986816,0.986816,0.948242,0.972656,0.968262,0.968262,0.931641,0.925293,0.919922,0.972656,0.932617,0.92627,0.98877,0.920898,0.984375,0.952637,0.984375,0.920898,0.997559,0.992188,0.965332,0.97998,0.997559,0.953125,0.890625,0.912598,0.997559,0.997559,0.985352,...,0.92627,0.92627,0.986816,0.916992,0.936523,0.936523,0.920898,0.920898,0.920898,0.989258,0.920898,0.997559,0.997559,0.981934,0.912598,0.985352,0.945801,0.945801,0.945801,0.948242,0.98877,0.974121,0.945801,0.997559,0.952637,0.94873,0.920898,0.912598,0.920898,0.936523,0.936523,0.873535,0.920898,0.984375,0.997559,0.920898,0.945801,0.986816,0.986816,0.989258,0.952637,1.0,0.989258,0.989258,0.920898,0.989258,0.98877,0.998047,0.998047,0.989258


In [59]:
df_grade_cosine_sim.loc[1].sort_values(ascending=False)[1:11].index

Index(['트로피컬 코코넛 오버 아이스', '프레도 델리카토', '프레도 인텐소', '토르타 디 노초올레', '림인하 오버 아이스',
       '스쿠로', '아이스 포르테', '아이스 레제로', '인피니트 에스프레소', '코코넛 플레이버 오버 아이스'],
      dtype='object', name='name_ko')

In [60]:
df_grade_cosine_sim.index.get_indexer([1])

array([0], dtype=int64)

In [61]:
df_grade_cosine_sim.iloc[:, df_grade_cosine_sim.index.get_indexer([1])].columns[0]

'아마레티'

In [77]:
df_grade_cosine_sim.iloc[:, df_grade_cosine_sim.index.get_indexer([5])].sort_values(by=df_grade_cosine_sim.iloc[:, df_grade_cosine_sim.index.get_indexer([5])].columns[0], ascending=False)[1:11].index

Int64Index([5, 144, 11, 21, 62, 151, 150, 1, 78, 44], dtype='int64', name='idx')

In [79]:
df_grade_cosine_sim.iloc[:, df_grade_cosine_sim.index.get_indexer([5])].sort_values(by=df_grade_cosine_sim.iloc[:, df_grade_cosine_sim.index.get_indexer([5])].columns[0], ascending=False).drop(5)

name_ko,초콜라티노
idx,Unnamed: 1_level_1
4,1.000000
144,1.000000
11,1.000000
21,1.000000
62,1.000000
...,...
130,0.912598
46,0.890625
134,0.873535
102,0.870117


In [82]:
# id 기반 추천 알고리즘
def recommendations_by_id(target_id, matrix, items, k=10):
    try:
        target_idx = matrix.index.get_indexer([target_id])
        recom_idx = matrix.iloc[:, target_idx].sort_values(by= matrix.iloc[:, target_idx].columns[0], ascending=False).drop(target_id)[:k].index

        # 반환한 인덱스 값은 1부터 시작하나, 실제 iloc로 접근하는 인덱스 값은 0부터 시작하므로 이를 보정해야함
        recom_idx = recom_idx-1
        recom_id = items.iloc[recom_idx, :].idx.values
        recom_title = items.iloc[recom_idx, :].name_ko.values

        target_id_list = np.full(len(range(k)), target_id)
        target_title_list = np.full(len(range(k)), items[items.idx == target_id].name_ko.values)
        
    except:
        print(recom_idx)
        print(recom_id, recom_title)
        print(target_id_list, target_title_list)
    
    d = {
        'target_id': target_id_list,
        'target_title': target_title_list,
        'recom_id'    : recom_id,
        'recom_title' : recom_title,
    }
    
    return pd.DataFrame(d)

In [83]:
recommendations_by_id(5, df_grade_cosine_sim, capsule_data)

Unnamed: 0,target_id,target_title,recom_id,recom_title
0,5,초콜라티노,4,카라멜리또
1,5,초콜라티노,144,디카페나토
2,5,초콜라티노,11,포레스트 프룻
3,5,초콜라티노,21,바닐리오
4,5,초콜라티노,62,리반토
5,5,초콜라티노,151,디카페나토 인텐소
6,5,초콜라티노,150,인텐소 (알토)
7,5,초콜라티노,1,아마레티
8,5,초콜라티노,78,스쿠로
9,5,초콜라티노,44,에스페란사 데 콜롬비아


In [85]:
# id 기반 추천 알고리즘
def recommendation_list_by_id(target_id, matrix, items, k=10):
    try:
        target_idx =  matrix.index.get_indexer([target_id])
        recom_idx = matrix.iloc[:, target_idx].sort_values(by= matrix.iloc[:, target_idx].columns[0], ascending=False).drop(target_id)[:k].index
        
        # 반환한 인덱스 값은 1부터 시작하나, 실제 iloc로 접근하는 인덱스 값은 0부터 시작하므로 이를 보정해야함
        recom_idx = recom_idx-1
        recom_id = items.iloc[recom_idx, :].idx.values
        recom_title = items.iloc[recom_idx, :].name_ko.values
        
    except:
        print(recom_idx)
        print(recom_id, recom_title)
    
    recom_list = [dict(id = id, title = title) for id, title in zip(recom_id, recom_title)]
    
    return recom_list

In [87]:
recommendation_list_by_id(5, df_grade_cosine_sim, capsule_data, k=5)

[{'id': 4, 'title': '카라멜리또'},
 {'id': 144, 'title': '디카페나토'},
 {'id': 11, 'title': '포레스트 프룻'},
 {'id': 21, 'title': '바닐리오'},
 {'id': 62, 'title': '리반토'}]

In [88]:
# 유사도 기준으로 추천 원두의 상위 5개를 출력
capsule_recom = capsule_data.copy()[['idx', 'name_ko']]
capsule_recom['recommendation'] = capsule_recom.apply(lambda x: recommendation_list_by_id(x.idx, df_grade_cosine_sim, capsule_data, k=5), axis=1)
print(capsule_recom.shape)
capsule_recom.head()

(152, 3)


Unnamed: 0,idx,name_ko,recommendation
0,1,아마레티,"[{'id': 137, 'title': '트로피컬 코코넛 오버 아이스'}, {'id..."
1,2,부케엘라 카 에티오피아,"[{'id': 29, 'title': '부에노스 아이레스 롱고'}, {'id': 1..."
2,3,카페 이스탄불,"[{'id': 31, 'title': '까페시또 데 푸에르토리코'}, {'id': ..."
3,4,카라멜리또,"[{'id': 5, 'title': '초콜라티노'}, {'id': 144, 'tit..."
4,5,초콜라티노,"[{'id': 4, 'title': '카라멜리또'}, {'id': 144, 'tit..."


In [89]:
# 파일 저장
os.makedirs(DIR_SAVE_PATH, exist_ok=True)
capsule_recom.to_csv(path.join(DIR_SAVE_PATH, 'item_recom_capsule.csv'), sep=',', index=False)

In [90]:
# item_recom_capsule.csv
recom_read = pd.read_csv(path.join(DIR_SAVE_PATH, 'item_recom_capsule.csv'), low_memory=False)

print(recom_read.shape)
recom_read.head()

(152, 3)


Unnamed: 0,idx,name_ko,recommendation
0,1,아마레티,"[{'id': 137, 'title': '트로피컬 코코넛 오버 아이스'}, {'id..."
1,2,부케엘라 카 에티오피아,"[{'id': 29, 'title': '부에노스 아이레스 롱고'}, {'id': 1..."
2,3,카페 이스탄불,"[{'id': 31, 'title': '까페시또 데 푸에르토리코'}, {'id': ..."
3,4,카라멜리또,"[{'id': 5, 'title': '초콜라티노'}, {'id': 144, 'tit..."
4,5,초콜라티노,"[{'id': 4, 'title': '카라멜리또'}, {'id': 144, 'tit..."


In [91]:
print(capsule_read.shape)
capsule_read.head()

(152, 8)


Unnamed: 0,idx,created_data,updated_date,name_en,name_ko,summary,thumbnail,user_grade
0,1,2023-03-31 12:00:00,,Amaretti,아마레티,"아몬드, 비스킷",default_capsule.png,0
1,2,2023-03-31 12:00:00,,Bukeela ka Ethiopia,부케엘라 카 에티오피아,"과일, 플로럴",default_capsule.png,0
2,3,2023-03-31 12:00:00,,Cafe Istanbul,카페 이스탄불,"강렬한, 자극적인",default_capsule.png,0
3,4,2023-03-31 12:00:00,,Caramelito,카라멜리또,카라멜,default_capsule.png,0
4,5,2023-03-31 12:00:00,,Ciocattino,초콜라티노,초코,default_capsule.png,0


In [92]:
def get_recom_by_capsule(itemIdx, matrix, k=5):
    try:
        recom_list = matrix.set_index('idx').loc[itemIdx]['recommendation']
        recom_list = json.loads(recom_list.replace('\'', '\"'))
        recom_list = [dict(t) for t in {tuple(d.items()) for d in recom_list}]
        
    except:
        print(itemIdx)
        print(recom_list)
        
    return recom_list[:k]

In [93]:
recom_list = get_recom_by_capsule(1, recom_read)
recom_list

[{'id': 61, 'title': '림인하 오버 아이스'},
 {'id': 137, 'title': '트로피컬 코코넛 오버 아이스'},
 {'id': 20, 'title': '토르타 디 노초올레'},
 {'id': 48, 'title': '프레도 델리카토'},
 {'id': 49, 'title': '프레도 인텐소'}]