## 농촌진흥청 한식요리 재료에 대한 정보를 이용한 유사성
1. 음식 코드와 식품 코드(재료)의 중량 정보의 pivot 생성
2. 데이터 이용 cosine similarity
3. 계산된 similarity를 이용한 유사 식품 추출

In [3]:
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pandas as pd
import json
from lib.similarity_model import SimilarityModel

In [5]:
data_raw = pd.read_csv('./dataset/(한식요리정보)FD_음식_신규-수정.csv',encoding='cp949')
data = data_raw.copy()
data['식품명'].fillna('',inplace=True)
data.head(2)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['식품명'].fillna('',inplace=True)


Unnamed: 0,음식_코드,음식명,식품_코드,식품명,정렬_순서,식품_중량,등록자_고유_번호,등록_일시,수정자_고유_번호,수정_일시,중량_1,단위_1,중량_2,단위_2,중량_3,단위_3,메인_식품_여부
0,D230046,우메기떡(개성주악),F02876,"소금, 천일염, 가는소금",6,6.0,SYSTEM,2023-02-13,SYSTEM,2023-02-13,0.5,682032,0.0,0,0.0,0,0
1,D230046,우메기떡(개성주악),F02712,콩기름,7,400.0,SYSTEM,2023-02-13,SYSTEM,2023-02-13,2.0,682031,0.0,0,362.8667,682002,0


In [3]:
data['음식_코드'].unique()

array(['D230046', 'D022018', 'D285006', ..., 'D082023', 'D053038',
       'D031022'], shape=(3093,), dtype=object)

In [22]:
data_pivot = data.pivot(index='음식_코드',columns='식품_코드',values='식품_중량')
target = data_pivot.fillna(0.0)
target.iloc[:,1295:].head()

식품_코드,F03108,F03109,F03110,F03111,F03112,F03113,F03114,F03115,F03116,F03117,F03118,F03119,F03123,F03338,F03485,F03489
음식_코드,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
D011001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
D011002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
D011003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,210.0,0.0,0.0,0.0
D011004,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
D011005,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [23]:
features = target.index
similarity = cosine_similarity(target.to_numpy(),target.to_numpy())
## 유사도 높은순의 인덱스 정리
argsort = np.argsort(similarity, axis=1)[:,::-1]
## 유사도 높은순의 값 정리
valuesort = np.sort(similarity, axis=1)[:,::-1]
similarity

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]], shape=(3093, 3093))

In [24]:
## 음식 코드에 대한 유사도 기준값보다 큰 리스트들 정리(유사도 높은순)
threadhold = 0
feature_similarity_map = {}
for i,arg in enumerate(argsort):
    top = (valuesort[i] > threadhold).sum()
    top_idx = 3 if top < 3 else top
    arg_top = argsort[i,:top_idx]
    if features[i] in feature_similarity_map:
        print(features[i],arg_top[:1],feature_similarity_map[features[i]])
    feature_similarity_map[features[i]] = features[arg_top].tolist()

In [None]:
# class SimilarityModel():
#     def __init__(self,data_df,feature_map) -> None:
#         ## data : 농촌진흥청 한식 요리 정보 재료 DataFrame
#         self.data = data_df
#         ## reature_similarity_map : 유사도 정보를 이용한 최종 결과물
#         self.feature_similarity_map = feature_map
#     ## 음식 코드에 대한 음식명 가져오기
#     ## pkl 대상 : feature_similarity_map, data
#     def find_food(self,food_code):
#         ## data load
#         food = self.data.loc[self.data['음식_코드'] == food_code,'음식명']
#         if not food.empty:
#             return food.values[0]
#         return None
#     def find_food_and_recommend(self,food_code,top=3):
#         ## feature_similarity_map
#         food = self.find_food(food_code)
#         recommend = []
#         for code in self.feature_similarity_map[food_code]:
#             food_name = self.find_food(code)
#             if food_name not in recommend:
#                 recommend.append(self.find_food(code))
#             if len(recommend) == top+1:
#                 break
#         recommend.remove(food)
#         return food, recommend
#     def find_food_by_name(self,food_name):
#         ## 해당 음식명이 포함된 리스트 찾기
#         food_df = self.data.loc[self.data['식품명'].str.contains(food_name),['음식_코드','음식명']]
#         ## 중복된 것 삭제
#         food_df.drop_duplicates(inplace=True)
#         ## 코드로 소팅하여 (코드,명) 형식의 리스트 리턴
#         return food_df.sort_values(by='음식_코드').values.tolist()
    

In [25]:
## data : 농촌진흥청 한식 요리 정보 재료 DataFrame
## reature_similarity_map : 유사도 정보를 이용한 최종 결과물
import joblib
model = SimilarityModel(data, feature_similarity_map)
joblib.dump(model,'SimilarityModel.pkl')

['SimilarityModel.pkl']

In [None]:
"""
## 음식 코드에 대한 음식명 가져오기
## pkl 대상 : feature_similarity_map, data
def find_food(food_code):
    ## data load
    food = data.loc[data['음식_코드'] == food_code,'음식명']
    if not food.empty:
        return food.values[0]
    return None
def find_food_and_recommend(food_code,top=3):
    ## feature_similarity_map
    food = find_food(food_code)
    recommend = []
    for code in feature_similarity_map[food_code]:
        food_name = find_food(code)
        if food_name not in recommend:
            recommend.append(find_food(code))
        if len(recommend) == top+1:
            break
    recommend.remove(food)
    return food, recommend
def find_food_by_name(food_name):
    ## 해당 음식명이 포함된 리스트 찾기
    food_df = data.loc[data['식품명'].str.contains(food_name),['음식_코드','음식명']]
    ## 중복된 것 삭제
    food_df.drop_duplicates(inplace=True)
    ## 코드로 소팅하여 (코드,명) 형식의 리스트 리턴
    return food_df.sort_values(by='음식_코드').values.tolist()
"""

In [32]:
model.find_food_and_recommend('D040014')

('호박죽', ['늙은호박나물', '호박나물(호박탕쉬)', '호박들깨죽'])

In [131]:
lst = model.find_food_by_name('돼지')
lst

[['D013005', '콩나물밥'],
 ['D014001', '김치볶음밥(돼지고기)'],
 ['D014004', '비빔밥(고추장, 돼지고기)'],
 ['D015009', '잡채밥'],
 ['D015010', '잡탕밥'],
 ['D015011', '제육덮밥(고추장)'],
 ['D015012', '짜장밥'],
 ['D015013', '카레라이스'],
 ['D015014', '하이라이스'],
 ['D015031', '카레라이스'],
 ['D031015', '짜장면'],
 ['D032002', '김치만두'],
 ['D032006', '떡만둣국(돼지고기만두, 멸치)'],
 ['D032009', '곤드레 만둣국'],
 ['D032015', '편수 소(개성 편수)'],
 ['D032017', '만두'],
 ['D051013', '김칫국(돼지고기)'],
 ['D051032', '돼지고기국(고추장)'],
 ['D053002', '감자탕'],
 ['D053029', '짬뽕국'],
 ['D053046', '버섯탕'],
 ['D053047', '뼈다귀감자탕(감자탕)'],
 ['D053059', '콩비지탕'],
 ['D053060', '콩비지탕'],
 ['D062002', '돼지고기찌개(고추장)'],
 ['D062003', '부대찌개(고추장)'],
 ['D063004', '돼지고기된장찌개(고추장)'],
 ['D063005', '돼지고기된장찌개(양배추)'],
 ['D063006', '돼지고기된장찌개(호박잎)'],
 ['D063017', '호박된장찌개(돼지고기)'],
 ['D064003', '김치전골(돼지고기)'],
 ['D064004', '김치전골(돼지고기, 떡)'],
 ['D065002', '감자찌개(고추장, 돼지고기)'],
 ['D065003', '김치찌개(돼지고기)'],
 ['D065004', '김치찌개(돼지고기, 된장)'],
 ['D065013', '비지찌개(돼지고기)'],
 ['D065015', '순두부찌개'],
 ['D065017', '고추장찌개'],
 ['D065019'

In [None]:
## 코드, 메뉴명, 추천 코드 리스트
print(list(feature_similarity_map.items())[:3])
## 저장된 내용 중 상위 5개, 만약 갯수가 5개 보다 적을 경우 해당 리스트만
feature_similarity_map['D011002'][:5]

[('D011001', ['D011001', 'D311008', 'D081054']), ('D011002', ['D011002', 'D012002', 'D012006', 'D012004', 'D012023', 'D012020', 'D016009', 'D012007', 'D012003', 'D012009', 'D012015', 'D012008', 'D012016', 'D012021', 'D012022', 'D012024', 'D012011', 'D012005', 'D012014', 'D012026', 'D016007', 'D012017', 'D012001', 'D012010', 'D014020', 'D016004', 'D016006', 'D012019', 'D012025', 'D012018', 'D016003', 'D013014', 'D016001', 'D016008', 'D016002', 'D014009', 'D013001', 'D013003', 'D016011', 'D016012', 'D040008', 'D040002', 'D013004', 'D014004', 'D014002', 'D016010', 'D014008', 'D014007', 'D015009', 'D013002', 'D015014', 'D013010', 'D015006', 'D015010', 'D015012', 'D014012', 'D016005', 'D014003', 'D014006', 'D015016', 'D015002', 'D015011', 'D015005', 'D013012', 'D014035', 'D014001', 'D013005', 'D015001', 'D015003', 'D012048', 'D012047', 'D014011', 'D014010', 'D040010', 'D015013', 'D014034', 'D015017', 'D014005', 'D015018', 'D016018', 'D015030', 'D015008', 'D015015', 'D014022', 'D014027', 'D0

['D011002', 'D012002', 'D012006', 'D012004', 'D012023']

In [39]:
## 추천 결과를 코드와 대상 코드 리스트 형식으로 저장
import json
with open('feature_similarity_map.json','w') as f:
    json.dump(feature_similarity_map,f,indent=2)

In [None]:
feature_map_for_json = {}
for code in feature_similarity_map:
    print(code)
    _, recommand = model.find_food_and_recommend(code,top=5)
    feature_map_for_json[code] = {
        'code': model.find_food(code),
        'recommend': recommand
    }

D011001
D011002
D011003
D011004
D011005
D011006
D011007
D012001
D012002
D012003
D012004
D012005
D012006
D012007
D012008
D012009
D012010
D012011
D012012
D012013
D012014
D012015
D012016
D012017
D012018
D012019
D012020
D012021
D012022
D012023
D012024
D012025
D012026
D012027
D012028
D012029
D012030
D012031
D012032
D012033
D012034
D012035
D012036
D012037
D012038
D012039
D012040
D012041
D012042
D012043
D012044
D012045
D012046
D012047
D012048
D012049
D012050
D012051
D012052
D012053
D012054
D012055
D012056
D012057
D013001
D013002
D013003
D013004
D013005
D013006
D013007
D013008
D013009
D013010
D013011
D013012
D013013
D013014
D013015
D013016
D013017
D013018
D013019
D013020
D013021
D013022
D014001
D014002
D014003
D014004
D014005
D014006
D014007
D014008
D014009
D014010
D014011
D014012
D014013
D014014
D014015
D014016
D014017
D014018
D014019
D014020
D014021
D014022
D014023
D014024
D014025
D014026
D014027
D014028
D014029
D014030
D014031
D014032
D014033
D014034
D014035
D014036
D015001
D015002
D015003


In [137]:
feature_map_for_json

{'D011001': {'name': '눌은밥', 'recommend': ['즉석밥(누룽지)', '장어구이']},
 'D011002': {'name': '쌀밥', 'recommend': ['기장밥', '보리밥', '땅콩밥', '찰밥(멥쌀)', '조밥']},
 'D011003': {'name': '찰밥', 'recommend': ['약식', '오곡밥', '귀리밥', '양주밤밥', '영양돌솥밥']},
 'D011004': {'name': '현미밥', 'recommend': ['밥도그', '현미강정', '미나리 덮밥', '대통밥']},
 'D011005': {'name': '현미밥', 'recommend': ['밥도그', '현미강정', '미나리 덮밥', '대통밥']},
 'D011006': {'name': '현미밥', 'recommend': ['밥도그', '현미강정', '미나리 덮밥', '대통밥']},
 'D011007': {'name': '누룽지(멥쌀)',
  'recommend': ['메밀묵', '메밀묵무침', '메밀묵채', '메밀묵밥', '메밀묵채국수']},
 'D012001': {'name': '검정콩밥',
  'recommend': ['콩밥', '쌀밥', '기장밥', '보리밥', '찰밥(멥쌀)']},
 'D012002': {'name': '기장밥', 'recommend': ['쌀밥', '보리밥', '땅콩밥', '찰밥(멥쌀)', '조밥']},
 'D012003': {'name': '녹두밥',
  'recommend': ['찰밥(멥쌀)', '쌀밥', '완두콩밥', '율무밥', '기장밥']},
 'D012004': {'name': '땅콩밥', 'recommend': ['쌀밥', '기장밥', '보리밥', '찰밥(멥쌀)', '조밥']},
 'D012005': {'name': '밤밥', 'recommend': ['쌀밥', '기장밥', '보리밥', '찰밥(멥쌀)', '땅콩밥']},
 'D012006': {'name': '보리밥',
  'recommend': ['보리밥(

In [141]:
with open('feature_map.json','w') as f:
    json.dump(feature_map_for_json,f,indent=4, ensure_ascii=False)