In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import re
from sklearn.metrics.pairwise import cosine_similarity
import ast
import os
from sklearn.preprocessing import MinMaxScaler
import pickle
import datetime
import warnings
warnings.filterwarnings('ignore')

In [None]:
ratings = pd.read_csv('./RAW_interactions_01') #레시피 만든 사람이 작성한 리뷰 제거된 파일
recipes = pd.read_csv('./RAW_recipes.csv')

In [None]:
# Loading previously generated mappers
with open('./i2r_map.pkl', 'rb') as f:
    i2r_map =  pickle.load(f)

with open('./r2i_map.pkl', 'rb') as f:
    r2i_map =  pickle.load(f)

with open('./i2id_map.pkl', 'rb') as f:
    i2id_map =  pickle.load(f)

with open('./id2r_map.pkl', 'rb') as f:
    id2r_map =  pickle.load(f)
    
with open('./r2min_map.pkl', 'rb') as f:
    r2min_map =  pickle.load(f)

### 전처리

In [None]:
# 필요한 컬럼만 추출
recipes = recipes[['id', 'name', 'minutes', 'ingredients', 'description', 'tags',  'steps', 'nutrition']]
recipes.rename(columns={'id': 'recipe_id'}, inplace=True)
# name 결측치 제거
recipes.dropna(axis=0, subset=['name'], inplace=True)

In [None]:
#제조시간 0분 초과~1일 이하의 데이터만 저장
recipes = recipes[(0 < recipes['minutes']) & (recipes['minutes'] <= 1440)]

In [None]:
#영양소 분리
recipes[['calories','total fat (PDV)','sugar (PDV)','sodium (PDV)','protein (PDV)','saturated fat (PDV)','carbohydrates (PDV)']] = recipes.nutrition.str.split(",",expand=True) 
recipes['calories'] =  recipes['calories'].apply(lambda x: x.replace('[',''))
recipes['carbohydrates (PDV)'] =  recipes['carbohydrates (PDV)'].apply(lambda x: x.replace(']','')) 
recipes[['calories','total fat (PDV)','sugar (PDV)','sodium (PDV)','protein (PDV)','saturated fat (PDV)','carbohydrates (PDV)']] = recipes[['calories','total fat (PDV)','sugar (PDV)','sodium (PDV)','protein (PDV)','saturated fat (PDV)','carbohydrates (PDV)']].astype('float')

In [None]:
#날짜 데이터 삭제
ratings = ratings.drop('date', axis=1)
ratings.head()

In [None]:
print(recipes.shape, ratings.shape)

In [None]:
# ingredients 컬럼 리스트 형태로 변경
from ast import literal_eval
recipes['ingredients'] = recipes['ingredients'].apply(literal_eval)

In [None]:
recipes['steps'] = recipes['steps'].apply(literal_eval)

In [None]:
raw_recipes_rind=recipes.copy().set_index('recipe_id')
# 재료 리스트를 재료 번호로 반환
# ingredient_list_string=[가지고 있는 재료]
def parseIngredientList(ingredient_list_string):
    ingredient_list_id=[]
    for i in ingredient_list_string:
        ingredient_list_id.append(i2id_map[i])
    return ingredient_list_id


# 현재 가지고 있는 재료 중 하나 이상 포함되는 레시피 번호로 반환
def getRecipes(ingredient_list_id):
    output_data = {} # key = recipe id, value = {'i_req': set(),'i_avail': set(),'i_needed': set(), 'time_req':r2min_map[r]}
    
    for i in ingredient_list_id:
        recipes = i2r_map[i] # Retrieve recipes containing this ingredient
        for r in recipes:
            if r in output_data.keys():
                output_data[r]['i_avail'] = output_data[r]['i_avail'].union({i})
            else:
                output_data[r] = {'i_req': r2i_map[r],'i_avail': {i}, 'time_req':r2min_map[r]}
    
    for r in output_data.keys():
        output_data[r]['i_needed'] = output_data[r]['i_req'].difference(output_data[r]['i_avail'])
    
    return output_data


# 레시피 번호, 레시피명, 조리 시간, 가지고 있는 재료 수, 필요한 재료 수 반환
def getRecipeData(r_id,output_data):
    recipe_data_list = []
    recipe_data_list.append(r_id) # Append recipeId to list
    recipe_data_list.append(id2r_map[r_id]) # Append recipeName to list
    recipe_data_list.append(output_data[r_id]['time_req']) # Append prepTimeInMinutes to list
    recipe_data_list.append(len(output_data[r_id]['i_avail'])) # Append numberOfFridgeItemUsed to list
    recipe_data_list.append(len(output_data[r_id]['i_needed'])) # Append numberOfAdditionalItemsNeeded to list
    return recipe_data_list

In [None]:
# 리뷰 2건 이상인 레시피만 필터링 (??)

g = {'rating' : ['mean'],'user_id' : ['nunique']}
int_summary = ratings.groupby(['recipe_id']).agg(g).reset_index()

ind = pd.Index([e[0] + "_" +e[1] for e in int_summary.columns.tolist()])

int_summary.columns = ind
int_summary.columns = ['recipe_id', 'rating_mean', 'user_id_nunique']

int_summary_2 = int_summary[ (int_summary['user_id_nunique'] > 2)]

In [None]:
filter_recipe = pd.merge(recipes,int_summary_2,right_on = ['recipe_id'],left_on = ['recipe_id'],how = 'inner')

In [None]:
filter_recipe.shape

In [None]:
filter_recipe.head()

## steps별 코사인 유사도

In [None]:
with open("./similarities_sparse_recipe.pickle", "rb") as MyFile:
    similarity_recipe = pickle.load(MyFile)


In [None]:
similarity_recipe.head()

## tags별 코사인 유사도

In [None]:
with open("./similarities_sparse_tag.pickle", "rb") as MyFile:
    similarity_tag = pickle.load(MyFile)

In [None]:
similarity_tag.head()

## 평점별 코사인 유사도

### 데이터 프레임 인덱싱

In [None]:
final_review=pd.merge(filter_recipe, ratings, on='recipe_id')[['user_id', 'recipe_id', 'rating']]

In [None]:
final_review.shape

In [None]:
# 리뷰가 17건 이상인 레시피 id
index_recipe_17=final_review.groupby(['recipe_id']).count()[final_review.groupby(['recipe_id']).count()['user_id']>=17].index

In [None]:
final_review_17=final_review.set_index('recipe_id').loc[index_recipe_17,].reset_index()

In [None]:
final_review_17['rating']=final_review_17['rating'].astype('int32')
final_review_17['recipe_id']=final_review_17['recipe_id'].astype('int32')

In [None]:
# review 5건 이상 한 user
index_user_5=final_review_17.groupby('user_id').count()[final_review_17.groupby('user_id').count()['rating']>=5].index

In [None]:
final_rating=final_review_17.set_index('user_id').loc[index_user_5,].reset_index()

In [None]:
matrix_final_rating=final_rating.pivot_table('rating', index='user_id', columns='recipe_id')

In [None]:
# null값 채우기
matrix_final_rating = matrix_final_rating.fillna(0)

In [None]:
matrix_final_rating.shape

In [None]:
# index 맞추기
similarity_step=similarity_recipe.loc[similarity_recipe.index.intersection(final_rating.groupby('recipe_id').count().index),similarity_recipe.index.intersection(final_rating.groupby('recipe_id').count().index)]

In [None]:
# index 맞추기
similarity_tag=similarity_tag.loc[similarity_tag.index.intersection(final_rating.groupby('recipe_id').count().index),similarity_tag.index.intersection(final_rating.groupby('recipe_id').count().index)]

In [None]:
# steps와 tags의 평균 코사인 유사도
similarity_recipe=(similarity_step+similarity_tag)/2

### 코사인 유사도 계산

In [None]:
# 코사인 유사도 구하기
item_sim = cosine_similarity(matrix_final_rating.T, matrix_final_rating.T)

In [None]:
# cosine_similarity() 로 반환된 넘파이 행렬을 레시피명을 매핑하여 DataFrame으로 변환
item_sim_df = pd.DataFrame(data=item_sim, index=matrix_final_rating.columns,
                          columns=matrix_final_rating.columns)
print(item_sim_df.shape)

In [None]:
def predict_rating(ratings_arr, item_sim_arr ):
    ratings_pred = ratings_arr.dot(item_sim_arr)/ np.array([np.abs(item_sim_arr).sum(axis=1)])
    return ratings_pred

In [None]:
ratings_pred = predict_rating(matrix_final_rating.values , item_sim_df.values)
ratings_pred_matrix_i = pd.DataFrame(data=ratings_pred, index= matrix_final_rating.index,
                                   columns = matrix_final_rating.columns)

In [None]:
ratings_pred_matrix_i

## step과 tag와 평점별  추천시스템

In [None]:
from sklearn.metrics import mean_squared_error

 
def get_mse(pred, actual):
    # Ignore nonzero terms.
    pred = pred[actual.nonzero()].flatten()
    actual = actual[actual.nonzero()].flatten()
    return mean_squared_error(pred, actual)


In [None]:
def predict_rating_topsim(ratings_arr, item_sim_arr, n=20):

    pred = np.zeros(ratings_arr.shape)


    for col in range(ratings_arr.shape[1]):

        top_n_items = [np.argsort(item_sim_arr[:, col])[:-n-1:-1]]

        for row in range(ratings_arr.shape[0]):
            pred[row, col] = item_sim_arr[col, :][top_n_items].dot(ratings_arr[row, :][top_n_items].T) 
            pred[row, col] /= np.sum(np.abs(item_sim_arr[col, :][top_n_items]))        
    return pred

In [None]:
def get_not_reviews(ratings_matrix, userId):

    user_rating = ratings_matrix.loc[userId,:]

    already_seen = user_rating[ user_rating > 0].index.tolist()
    

    movies_list = ratings_matrix.columns.tolist()
    
    unseen_list = [ movie for movie in movies_list if movie not in already_seen]
    
    return unseen_list

In [None]:
def get_reviews(ratings_matrix, userId):

    user_rating = ratings_matrix.loc[userId,:]
    
    already_seen = user_rating[ user_rating <= 0].index.tolist()
    
    movies_list = ratings_matrix.columns.tolist()
     
    seen_list = [ movie for movie in movies_list if movie not in already_seen]
    
    return seen_list

In [None]:
def recipes_by_collaborative(pred_df, userId, unseen_list):
    coll_recipes = pred_df.loc[userId, unseen_list]
    coll_recipes=pd.DataFrame(coll_recipes)
    coll_recipes.columns=['score']
    return coll_recipes

In [None]:
def recipes_by_content_mean(similarity_matrix, seen_list, unseen_list):
    cont_recipes=similarity_matrix.loc[seen_list,unseen_list]
    cont_recipes=np.mean(cont_recipes)
    cont_recipes=pd.DataFrame(cont_recipes)
    cont_recipes.columns=['score']
    return cont_recipes

In [None]:
def recipes_by_content_max(similarity_matrix, seen_list, unseen_list):
    cont_recipes=similarity_matrix.loc[seen_list,unseen_list]
    cont_recipes=np.max(cont_recipes)
    cont_recipes=pd.DataFrame(cont_recipes)
    cont_recipes.columns=['score']
    return cont_recipes

In [None]:
def inverse_weighted_ensemble(a,b):
    final_score=a*(1/a/(1/a+1/b))+b*(1/b/(1/a+1/b))
    return final_score

In [None]:
def scaling_method(recipes_type):
    scaler=MinMaxScaler()
    scaled=scaler.fit_transform(np.array(recipes_type).reshape(-1, 1))
    temp_df=recipes_type.copy()
    temp_df['score']=scaled
    return temp_df

In [None]:
def scaling_method_1_5(recipes_type):
    scaler=MinMaxScaler(feature_range=(0, 1.4))
    scaled=scaler.fit_transform(np.array(recipes_type).reshape(-1, 1))
    temp_df=recipes_type.copy()
    temp_df['score']=scaled
    return temp_df

In [None]:
def recomm_recipes(final_score_dataframe, top_n):
    return final_score_dataframe.sort_values(by='score', ascending=False)[:top_n]

In [None]:
def weighted_ensemble(a,b):
    final_score=a*(a/(a+b))+b*(b/(a+b))
    return final_score

# 콘텐츠 기반 + 아이템 기반 협업필터링

In [None]:
# 아이템 기반 협업필터링 + 콘텐츠 기반

# user_id
user_id=4601

# user가 review 안한 recipes
unseen_list = get_not_reviews(matrix_final_rating, user_id)

# user가 review한 recipes
seen_list = get_reviews(matrix_final_rating, user_id)

# 협업필터링 스코어
item_recipes = recipes_by_collaborative(ratings_pred_matrix_i, user_id, unseen_list)

# 협업필터링 스코어 스케일링
scaled_item_recipes=scaling_method(item_recipes)

# 콘텐츠 기반 스코어
cont_recipes= recipes_by_content_mean(similarity_recipe, seen_list, unseen_list)

# 콘텐츠 기반 스코어 스케일링
scaled_cont_recipes=scaling_method(cont_recipes)

# 최종 점수
final_score_df=inverse_weighted_ensemble(scaled_item_recipes,cont_recipes)

In [None]:
recomm_recipes(final_score_df, 10)

In [None]:
plt.hist(item_recipes)

In [None]:
plt.hist(cont_recipes)

# 콘텐츠 기반 + 잠재요인 협업필터링

In [None]:
from sklearn.metrics import mean_squared_error

def get_rmse(R, P, Q, non_zeros):
    error = 0
    # 두개의 분해된 행렬 P와 Q.T의 내적 곱으로 예측 R 행렬 생성
    full_pred_matrix = np.dot(P, Q.T)
    
    # 실제 R 행렬에서 널이 아닌 값의 위치 인덱스 추출하여 실제 R 행렬과 예측 행렬의 RMSE 추출
    x_non_zero_ind = [non_zero[0] for non_zero in non_zeros]
    y_non_zero_ind = [non_zero[1] for non_zero in non_zeros]
    R_non_zeros = R[x_non_zero_ind, y_non_zero_ind]
    
    full_pred_matrix_non_zeros = full_pred_matrix[x_non_zero_ind, y_non_zero_ind]
      
    mse = mean_squared_error(R_non_zeros, full_pred_matrix_non_zeros)
    rmse = np.sqrt(mse)
    
    return rmse

In [None]:
def matrix_factorization(R, K, steps=200, learning_rate=0.01, r_lambda = 0.01):
    num_users, num_items = R.shape
    # P와 Q 매트릭스의 크기를 지정하고 정규분포를 가진 랜덤한 값으로 입력합니다. 
    np.random.seed(1)
    P = np.random.normal(scale=1./K, size=(num_users, K))
    Q = np.random.normal(scale=1./K, size=(num_items, K))

    break_count = 0
       
    # R > 0 인 행 위치, 열 위치, 값을 non_zeros 리스트 객체에 저장. 
    non_zeros = [ (i, j, R[i,j]) for i in range(num_users) for j in range(num_items) if R[i,j] > 0 ]
   
    # SGD기법으로 P와 Q 매트릭스를 계속 업데이트. 
    for step in range(steps):
        for i, j, r in non_zeros:
            # 실제 값과 예측 값의 차이인 오류 값 구함
            eij = r - np.dot(P[i, :], Q[j, :].T)
            # Regularization을 반영한 SGD 업데이트 공식 적용
            P[i,:] = P[i,:] + learning_rate*(eij * Q[j, :] - r_lambda*P[i,:])
            Q[j,:] = Q[j,:] + learning_rate*(eij * P[i, :] - r_lambda*Q[j,:])
       
        rmse = get_rmse(R, P, Q, non_zeros)
        if (step % 10) == 0 :
            print("### iteration step : ", step," rmse : ", rmse)
            
    return P, Q

In [None]:
# P, Q = matrix_factorization(matrix_final_rating.values, K=50, steps=200, learning_rate=0.01, r_lambda = 0.01)
# pred_matrix = np.dot(P, Q.T)

In [None]:
with open("./pred_matrix_p.pickle", "rb") as MyFile:
    pred_matrix = pickle.load(MyFile)

In [None]:
ratings_pred_matrix_p = pd.DataFrame(data=pred_matrix, index= matrix_final_rating.index,
                                   columns = matrix_final_rating.columns)

In [None]:
ratings_pred_matrix_p

In [None]:
# 잠재요인 협업필터링 + 콘텐츠 기반
# 4500
# 5523

# user_id
user_id=7093

# user가 review 안한 recipes
unseen_list = get_not_reviews(matrix_final_rating, user_id)

# user가 review한 recipes
seen_list = get_reviews(matrix_final_rating, user_id)

# 잠재요인 스코어
potential_recipes = recipes_by_collaborative(ratings_pred_matrix_p, user_id, unseen_list)

# 잠재요인 스코어 스케일링
scaled_potential_recipes=scaling_method(potential_recipes)

# 콘텐츠 기반 스코어
cont_recipes= recipes_by_content_mean(similarity_recipe, seen_list, unseen_list)

# 콘텐츠 기반 스코어 스케일링
scaled_cont_recipes=scaling_method_1_5(cont_recipes)

# 최종 점수
final_score_df=inverse_weighted_ensemble(scaled_potential_recipes,cont_recipes)

In [None]:
final_score_df

In [None]:
recomm_recipes(final_score_df, 10)

In [None]:
plt.hist(potential_recipes)

In [None]:
plt.hist(cont_recipes)

# 최종 output

In [None]:
def filtering_recipes(ingredient_list):
    ingredient_recipe_id=list(getRecipes(parseIngredientList(ingredient_list)).keys())
 
    temp_df=pd.DataFrame(getRecipes(parseIngredientList(ingredient_list))).T
    temp_df['time_req']=temp_df['time_req'].astype('int')
    temp_df['num_needed'] = temp_df['i_needed'].apply(lambda x: len(x))
    return temp_df

In [None]:
def filter_final_score(ingredient_list, final_score_dataframe):
    filter_final_score_df=filtering_recipes(ingredient_list).loc[filtering_recipes(ingredient_list).index.intersection(final_score_dataframe.index),]
    return filter_final_score_df

In [None]:
def filter_final_score_output(filter_final_score_dataframe, final_score_dataframe):
    q3=np.percentile(filter_final_score_dataframe['num_needed'], 75)
    filter_final_score_dataframe_index=filter_final_score_dataframe[(filter_final_score_dataframe.num_needed<=q3)&(filter_final_score_dataframe.time_req<=have_time)].index
    result=final_score_dataframe.loc[filter_final_score_dataframe_index,]
    return result

### ver1. 데이터 프레임 형태로 출력

- 함수명 끝에 sub가 들어간 것은 대체재로 검색가능한 코드

In [None]:
def final_output_sub():

    #input
    user_id = int(input("아이디를 입력해주세요."))
    have_ingre = input("가지고 있는 재료를 입력해주세요.").split(',')
    have_time = int(input("제조 가능한 최대 시간을 입력해주세요."))

    # user가 review 안한 recipes
    unseen_list = get_not_reviews(matrix_final_rating, user_id)

    # user가 review한 recipes
    seen_list = get_reviews(matrix_final_rating, user_id)

    # 잠재요인 스코어
    potential_recipes = recipes_by_collaborative(ratings_pred_matrix_p, user_id, unseen_list)

    # 잠재요인 스코어 스케일링
    scaled_potential_recipes=scaling_method(potential_recipes)

    # 콘텐츠 기반 스코어
    cont_recipes= recipes_by_content_mean(similarity_recipe, seen_list, unseen_list)

    # 콘텐츠 기반 스코어 스케일링
    scaled_cont_recipes=scaling_method_1_5(cont_recipes)

    # 최종 점수
    final_score_df=weighted_ensemble(scaled_potential_recipes,scaled_cont_recipes)

    #제조 시간, 재료 필터링
    
    filter_final_score_df=filter_final_score(have_ingre, final_score_df)
    
    q3=np.percentile(filter_final_score_df['num_needed'], 75)
    filter_final_score_df_index=filter_final_score_df[(filter_final_score_df.num_needed<=q3)&(filter_final_score_df.time_req<=have_time)].index
    result=final_score_df.loc[filter_final_score_df_index,]
    result = result.sort_values(by='score', ascending=False)[:10]
    
    final = recipes.set_index('recipe_id').loc[result.index][['name', 'minutes','ingredients','description','tags','steps','calories']]
    final['score'] = result['score']

    return final

In [None]:
#test한 입력값
#user_id = 1533
#ingredients=winter squash,mexican seasoning,mixed spice,honey,butter,olive oil,salt
#time= 30

In [None]:
final_output_sub()  #18초 정도 소요됨

In [None]:
def final_output():

    #input
    user_id = int(input("아이디를 입력해주세요."))
    have_ingre = input("가지고 있는 재료를 입력해주세요.").split(',')
    have_time = int(input("제조 가능한 최대 시간을 입력해주세요."))

    # user가 review 안한 recipes
    unseen_list = get_not_reviews(matrix_final_rating, user_id)

    # user가 review한 recipes
    seen_list = get_reviews(matrix_final_rating, user_id)

    # 잠재요인 스코어 스케일링
    scaled_potential_recipes=scaling_method(potential_recipes)

    # 콘텐츠 기반 스코어
    cont_recipes= recipes_by_content_mean(similarity_recipe, seen_list, unseen_list)

    # 콘텐츠 기반 스코어 스케일링
    scaled_cont_recipes=scaling_method_1_5(cont_recipes)

    # 최종 점수
    final_score_df=weighted_ensemble(scaled_potential_recipes,scaled_cont_recipes)

    #제조 시간, 재료 필터링
    output = pd.merge(final_score_df,recipes, on='recipe_id')
    output = output[output.minutes <= have_time]
  
    output['com_ingre'] = output.apply(lambda x: len(list(set(have_ingre) & set(x['ingredients']))), axis=1)
    q3 = np.percentile(output['com_ingre'], 75)
    output = output[output.com_ingre<=q3]
    output = output[['name', 'minutes','ingredients','description','tags','steps','calories','score']]
    output= output.sort_values(by='score', ascending=False)[:10]

    return output

In [None]:
final_output()

### ver2. 목록 형태로 출력

- 함수명 끝에 sub가 들어간 것은 대체재로 검색가능한 코드

In [None]:
def print_scroll(df, top_n=5):
    df = df[:top_n]
    num=1
    for i in df.index:
        print()
        print("#", num)
        print("name:", df['name'][i])
        print("minutes:", df['minutes'][i])
        print("description:", df['description'][i])
        print()
        print("ingredients:", sep='')
        df['ingredients'] = df['ingredients'].astype('str')
        df['ingredients'] = df['ingredients'].str.strip('[]')
        print(df['ingredients'][i])
        print("steps:")
        for index, i in enumerate(df['steps'][i]):
            print(index+1,')',i)
        print()
        num += 1

In [None]:
def final_output2_sub():

    #input
    user_id = int(input("아이디를 입력해주세요."))
    have_ingre = input("가지고 있는 재료를 입력해주세요.").split(',')
    have_time = int(input("제조 가능한 최대 시간을 입력해주세요."))

    # user가 review 안한 recipes
    unseen_list = get_not_reviews(matrix_final_rating, user_id)

    # user가 review한 recipes
    seen_list = get_reviews(matrix_final_rating, user_id)

    # 잠재요인 스코어
    potential_recipes = recipes_by_collaborative(ratings_pred_matrix_p, user_id, unseen_list)

    # 잠재요인 스코어 스케일링
    scaled_potential_recipes=scaling_method(potential_recipes)

    # 콘텐츠 기반 스코어
    cont_recipes= recipes_by_content_mean(similarity_recipe, seen_list, unseen_list)

    # 콘텐츠 기반 스코어 스케일링
    scaled_cont_recipes=scaling_method_1_5(cont_recipes)

    # 최종 점수
    final_score_df=weighted_ensemble(scaled_potential_recipes,scaled_cont_recipes)

    #제조 시간, 재료 필터링
    
    filter_final_score_df=filter_final_score(have_ingre, final_score_df)
    
    q3=np.percentile(filter_final_score_df['num_needed'], 75)
    filter_final_score_df_index=filter_final_score_df[(filter_final_score_df.num_needed<=q3)&(filter_final_score_df.time_req<=have_time)].index
    result=final_score_df.loc[filter_final_score_df_index,]
    result = result.sort_values(by='score', ascending=False)[:10]
    
    final = recipes.set_index('recipe_id').loc[result.index][['name', 'minutes','ingredients','description','tags','steps','calories']]
    final['score'] = result['score']

    output = print_scroll(final)
    
    
    return output

In [None]:
def final_output2():

    #input
    user_id = int(input("아이디를 입력해주세요."))
    have_ingre = input("가지고 있는 재료를 입력해주세요.").split(',')
    have_time = int(input("제조 가능한 최대 시간을 입력해주세요."))

    # user가 review 안한 recipes
    unseen_list = get_not_reviews(matrix_final_rating, user_id)

    # user가 review한 recipes
    seen_list = get_reviews(matrix_final_rating, user_id)

    # 잠재요인 스코어
    potential_recipes = recipes_by_collaborative(ratings_pred_matrix_p, user_id, unseen_list)

    # 잠재요인 스코어 스케일링
    scaled_potential_recipes=scaling_method(potential_recipes)

    # 콘텐츠 기반 스코어
    cont_recipes= recipes_by_content_mean(similarity_recipe, seen_list, unseen_list)

    # 콘텐츠 기반 스코어 스케일링
    scaled_cont_recipes=scaling_method_1_5(cont_recipes)

    # 최종 점수
    final_score_df=weighted_ensemble(scaled_potential_recipes,scaled_cont_recipes)

    #제조 시간, 재료 필터링
    output = pd.merge(final_score_df,recipes, on='recipe_id')
    output = output[output.minutes <= have_time]
  
    output['com_ingre'] = output.apply(lambda x: len(list(set(have_ingre) & set(x['ingredients']))), axis=1)
    q3 = np.percentile(output['com_ingre'], 75)
    output = output[output.com_ingre>=q3]
    output = output[['name', 'minutes','ingredients','description','tags','steps','calories','score']]
    output= output.sort_values(by='score', ascending=False)[:10]
    
    final = print_scroll(output)

    return final

In [None]:
final_output2_sub() #17초

In [None]:
final_output2() #1초