In [2]:
import json 
import requests
import sys
import os
import pandas as pd

In [89]:
# 'irish cream'이 재료에 포함된 칵테일만 찾기
cocktails_with_irish_cream = [
    cocktail for cocktail in json_data['cocktail_info']
    if any('irish cream' in ingredient for ingredient in cocktail['recipe'].keys())
]

# json_data
cocktails_with_irish_cream[0]['recipe'].keys()
ingredient_ids["bailey's irish cream"]

KeyError: "bailey's irish cream"

In [68]:
json_data['cocktail_info'][0]

{'cocktail_name': "'57 chevy with a white license plate",
 'cocktail_glass': 'highball glass',
 'category': 'Cocktail',
 'recipe': {'creme de cacao': 29.5735, 'vodka': 29.5735},
 'alcoholic': 'Alcoholic'}

In [131]:
flavor_data[32].keys()


dict_keys(['name', 'ABV', 'boozy', 'sweet', 'sour', 'bitter', 'umami', 'salty', 'astringent', 'Perceived_temperature', 'spicy', 'herbal', 'floral', 'fruity', 'nutty', 'creamy', 'smoky', 'ID'])

## 재료 임베딩

In [153]:
import numpy as np
import ast
with open('./train_data.json', 'r') as f:
    json_data = json.load(f)
with open('../flavor.json','r') as f:
    flavor_data = json.load(f)
train_embeddings_dict = dict()

class CocktailRecommender:
    def __init__(self, json_data, flavor_data):
        self.cocktail_info = json_data['cocktail_info']
        self.flavor_data = flavor_data
        self.init()
    def normalize_string(self, name):
    #"""특수 문자를 처리하여 이름을 정규화하는 함수"""
        return name.replace('\\"', '"').replace("\\'", "'")

    def init(self):    
        #임베딩 ID 붙이기
        ingredient_ids={}
        for idx, item in enumerate(self.flavor_data):
            item['ID'] = idx
            # 이스케이프 문자를 제거하고 이름을 그대로 사용하여 저장
            normalized_name =self.normalize_string(item['name'])
            ingredient_ids[normalized_name] = idx

        self.ingredient_ids = ingredient_ids
        self.num_ingredients = len(flavor_data)
        self.embedding_dim = 64

    # 레시피별 임베딩 벡터 생성
    def create_recipe_embedding(self, recipe):
        embedding_matrix = np.random.rand(self.num_ingredients, self.embedding_dim)
        # recipe = json_data['cocktail_info'][0]['recipe']
        total_amount = sum(recipe.values())
        normalized_amount = {ingredient:amount/total_amount for ingredient, amount in recipe.items()}
        weighted_embeddings = []
        for ingredient, amount in normalized_amount.items():
           # 레시피의 재료 이름도 동일하게 정규화
            normalized_ingredient = self.normalize_string(ingredient)
            if normalized_ingredient not in self.ingredient_ids:
                raise KeyError(f"Ingredient '{normalized_ingredient}' not found in ingredient_ids")
            ingredient_id = self.ingredient_ids[normalized_ingredient]
            ingredient_embedding = embedding_matrix[ingredient_id]
            weighted_embedding = ingredient_embedding * amount
            weighted_embeddings.append(weighted_embedding)
        recipe_embedding = np.sum(weighted_embeddings, axis=0)

        return recipe_embedding

    def create_recipe_embedding_list(self):
        recipe_embeddings = dict()
        for cocktail in self.cocktail_info:
            name = cocktail['cocktail_name']
            recipe = cocktail['recipe']
            recipe_embedding = self.create_recipe_embedding(recipe)
            recipe_embeddings[name]={'recipe_embedding':recipe_embedding}
        return recipe_embeddings
    
    def calculate_recipe_taste_weights(self,recipe):
        # 레시피에 사용된 재료만 필터링
        recipe_ingredients = [d for d in self.flavor_data if d['name'] in list(recipe.keys())]
                
        # 레시피의 재료 비율 계산
        total_amount = sum(recipe.values())
        ingredient_ratios = {ingredient: amount / total_amount for ingredient, amount in recipe.items()}
        
        # 맛 비중 계산
        recipe_taste_weights = {}
        for ingredient, ratio in ingredient_ratios.items():
            ingredient_dict = next((d for d in recipe_ingredients if d['name'] == ingredient), None)
            if ingredient_dict:
                for taste, weight in ingredient_dict.items():
                    if taste != 'name':
                        recipe_taste_weights[taste] = recipe_taste_weights.get(taste, 0) + weight * ratio
        return recipe_taste_weights
    
    def create_taste_embedding_list(self):
        taste_embeddings = dict()
        for cocktail in self.cocktail_info: 
            name = cocktail['cocktail_name']
            recipe = cocktail['recipe']
            recipe_taste_weights = self.calculate_recipe_taste_weights(recipe)
            taste_embeddings[name] = {'taste_embedding': np.array(list(recipe_taste_weights.values()))}
        return taste_embeddings
    def create_combined_embedding_list(self):
        recipe_embeddings = self.create_recipe_embedding_list()
        taste_embeddings = self.create_taste_embedding_list()

        combined_embeddings = {}
        for name in recipe_embeddings.keys():
            combined_embeddings[name] = {
                'recipe_embedding': recipe_embeddings[name]['recipe_embedding'],
                'taste_embedding': taste_embeddings[name]['taste_embedding']
            }

        return combined_embeddings



### embedding - recipe_embedding / taste_embedding

In [None]:
coctail_re = CocktailRecommender(json_data, flavor_data)
result = coctail_re.create_combined_embedding_list()


In [152]:
json_data['cocktail_info'][0]['alcoholic']

{'cocktail_name': "'57 chevy with a white license plate",
 'cocktail_glass': 'highball glass',
 'category': 'Cocktail',
 'recipe': {'creme de cacao': 29.5735, 'vodka': 29.5735},
 'alcoholic': 'Alcoholic'}

In [None]:
for idx, item in enumerate(result):
    print(item, result[item]['taste_embedding'][0])


In [52]:
from sklearn.preprocessing import MinMaxScaler
def parse_cocktail_data(cocktail_data):
    parsed_data = {}
    
    # 칵테일 이름
    parsed_data['cocktail_name'] = cocktail_data['cocktail_name']
    
    # 재료 리스트
    ingredients = list(cocktail_data['recipe'].keys())
    parsed_data['ingredients'] = ingredients
    
    # 재료 양 리스트
    amounts = list(cocktail_data['recipe'].values())
    parsed_data['amounts'] = amounts
    
    # 글래스 종류
    parsed_data['glass'] = cocktail_data['cocktail_glass']
    
    return parsed_data



In [54]:
taste_weight = calculate_recipe_taste_weights(json_data['cocktail_info'][0]['recipe'], flavor_data)
taste_weight.values()



[{'name': 'creme de cacao', 'ABV': 28.0, 'boozy': 50.0, 'sweet': 60.0, 'sour': 0.0, 'bitter': 40.0, 'umami': 0.0, 'salty': 0.0, 'astringent': 0.0, 'Perceived_temperature': 30.0, 'spicy': 0.0, 'herbal': 0.0, 'floral': 0.0, 'fruity': 10.0, 'nutty': 0.0, 'creamy': 40.0, 'smoky': 10.0}, {'name': 'vodka', 'ABV': 40.0, 'boozy': 80.0, 'sweet': 0.0, 'sour': 0.0, 'bitter': 0.0, 'umami': 0.0, 'salty': 0.0, 'astringent': 0.0, 'Perceived_temperature': 30.0, 'spicy': 0.0, 'herbal': 0.0, 'floral': 0.0, 'fruity': 0.0, 'nutty': 0.0, 'creamy': 0.0, 'smoky': 0.0}]


dict_values([34.0, 65.0, 30.0, 0.0, 20.0, 0.0, 0.0, 0.0, 30.0, 0.0, 0.0, 0.0, 5.0, 0.0, 20.0, 5.0])

In [51]:
json_data['cocktail_info'][0]

{'cocktail_name': "'57 chevy with a white license plate",
 'cocktail_glass': 'highball glass',
 'category': 'Cocktail',
 'recipe': {'creme de cacao': 29.5735, 'vodka': 29.5735},
 'alcoholic': 'Alcoholic'}

In [43]:
[d for d in flavor_data if d['name'] in list(json_data['cocktail_info'][0]['recipe'].keys())]

[{'name': 'creme de cacao',
  'ABV': 28.0,
  'boozy': 50.0,
  'sweet': 60.0,
  'sour': 0.0,
  'bitter': 40.0,
  'umami': 0.0,
  'salty': 0.0,
  'astringent': 0.0,
  'Perceived_temperature': 30.0,
  'spicy': 0.0,
  'herbal': 0.0,
  'floral': 0.0,
  'fruity': 10.0,
  'nutty': 0.0,
  'creamy': 40.0,
  'smoky': 10.0},
 {'name': 'vodka',
  'ABV': 40.0,
  'boozy': 80.0,
  'sweet': 0.0,
  'sour': 0.0,
  'bitter': 0.0,
  'umami': 0.0,
  'salty': 0.0,
  'astringent': 0.0,
  'Perceived_temperature': 30.0,
  'spicy': 0.0,
  'herbal': 0.0,
  'floral': 0.0,
  'fruity': 0.0,
  'nutty': 0.0,
  'creamy': 0.0,
  'smoky': 0.0}]

In [34]:
for i in range(len(flavor_data)):
    print(flavor_data[i]['name'] in list(json_data['cocktail_info'][0]['recipe'].keys()))
# [d for d in ingredient_properties if d['name'] in recipe]

False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
True
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False

In [31]:
json_data['cocktail_info'][0]['recipe']

{'creme de cacao': 29.5735, 'vodka': 29.5735}

In [25]:
json_data['cocktail_info'][0]['recipe']

{'creme de cacao': 29.5735, 'vodka': 29.5735}

In [12]:
train_data = []
for item in json_data['cocktail_info']:
    train_data.append(parse_cocktail_data(item))
train_data = parse_cocktail_data(json_data['cocktail_info'][0])

In [13]:
train_data

{'cocktail_name': "'57 chevy with a white license plate",
 'ingredients': ['creme de cacao', 'vodka'],
 'amounts': [29.5735, 29.5735],
 'glass': 'highball glass'}

In [24]:
flavor_data

[{'name': '151 proof rum',
  'ABV': 75.5,
  'boozy': 95.0,
  'sweet': 10.0,
  'sour': 0.0,
  'bitter': 10.0,
  'umami': 0.0,
  'salty': 0.0,
  'astringent': 5.0,
  'Perceived_temperature': 40.0,
  'spicy': 0.0,
  'herbal': 0.0,
  'floral': 0.0,
  'fruity': 10.0,
  'nutty': 0.0,
  'creamy': 0.0,
  'smoky': 20.0},
 {'name': '7-up',
  'ABV': 0.0,
  'boozy': 0.0,
  'sweet': 70.0,
  'sour': 30.0,
  'bitter': 0.0,
  'umami': 0.0,
  'salty': 0.0,
  'astringent': 0.0,
  'Perceived_temperature': 20.0,
  'spicy': 0.0,
  'herbal': 0.0,
  'floral': 0.0,
  'fruity': 20.0,
  'nutty': 0.0,
  'creamy': 0.0,
  'smoky': 0.0},
 {'name': 'absinthe',
  'ABV': 59.5,
  'boozy': 60.0,
  'sweet': 30.0,
  'sour': 0.0,
  'bitter': 70.0,
  'umami': 0.0,
  'salty': 0.0,
  'astringent': 40.0,
  'Perceived_temperature': 30.0,
  'spicy': 0.0,
  'herbal': 90.0,
  'floral': 10.0,
  'fruity': 0.0,
  'nutty': 0.0,
  'creamy': 0.0,
  'smoky': 0.0},
 {'name': 'absolut citron',
  'ABV': 40.0,
  'boozy': 70.0,
  'sweet': 20.