# 3.2 Set up delta12 and magnitude metrics

In [1]:
from game.flaskapp_andrius.api import preprocesser
from game.utils import run_query
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import euclidean_distances

from game.src.feature_generator import get_embedding, get_vector_list
from game.src.ab_test_preprocessor import calc_new_delta
from game.src.delta12_calculator import get_delta12

# Demonstration of original calculate delta12 function

In [2]:
# Test order: Last order: (238, 419), last order 2: (419, 532)
order_hist_1 = {"lastorder": [
        {
			"recipe_id": "238",
			"food_group": "Lamb",
			"calories": "289.1",
			"carbs": "18.72",
			"fat": "12.95",
			"protein": "22.75",
			"cooking_time": 30,
			"title": "Creamy lamb korma & broccoli rice",
			"description": "Broccoli rice is a delicious accompaniment to this creamy lamb spiced korma with cherry tomatoes and mushrooms.",
			"key_ingredient": "Free-range heritage breed Yorkshire lamb",
			"price": "No price"
		},
		{
			"recipe_id": "419",
			"food_group": "Vegan",
			"calories": "672.93",
			"carbs": "97.52",
			"fat": "14.71",
			"protein": "39.09",
			"cooking_time": 30,
			"title": "Sticky tamarind & ginger tofu with mangetout",
			"description": "Shiitake mushrooms are absolutely brimming with umami and make this tangy tofu dish completely irresistible. The tamarind and fresh aromatics add even more flavour.",
			"key_ingredient": "Dragonfly organic extra-firm tofu ",
			"price": "No price"
		}
	],
	"lastorder2": [
		{
			"recipe_id": "419",
			"food_group": "Vegan",
			"calories": "672.93",
			"carbs": "97.52",
			"fat": "14.71",
			"protein": "39.09",
			"cooking_time": 30,
			"title": "Sticky tamarind & ginger tofu with mangetout",
			"description": "Shiitake mushrooms are absolutely brimming with umami and make this tangy tofu dish completely irresistible. The tamarind and fresh aromatics add even more flavour.",
			"key_ingredient": "Dragonfly organic extra-firm tofu ",
			"price": "No price"
		},
		{
			"recipe_id": "532",
			"food_group": "Fish",
			"calories": "818.8",
			"carbs": "86.64",
			"fat": "39.61",
			"protein": "29.09",
			"cooking_time": 0,
			"title": "Haddock, Asian veg & coconut black rice ",
			"description": "Coconut black rice infused with ginger, garlic and chilli contrasts with delicate white haddock and a selection of crisp Asian vegetables. As always, our fish is responsibly sourced. The perfect meal for date night? \n",
			"key_ingredient": "Fresh, sustainably caught haddock fillet",
			"price": "No price"
		}
	]}

In [3]:
round(get_delta12(order_hist_1), 5)

0.01698

# Looking closer at functions

In [4]:
"""
def get_delta12(order_hist):
    
    if "lastorder" in order_hist.keys():
        lastorder_embedding = np.mean(
            [preprocesser.recipe2vec(recipe) for recipe in order_hist["lastorder"]]
        )
    else:
        lastorder_embedding = np.nan

    if "lastorder2" in order_hist.keys():
        lastorder2_embedding = np.mean(
            [preprocesser.recipe2vec(recipe) for recipe in order_hist["lastorder2"]]
        )
    else:
        lastorder2_embedding = np.nan

    delta12 = preprocesser.calc_delta(lastorder_embedding, lastorder2_embedding)

    return delta12
"""

'\ndef get_delta12(order_hist):\n    \n    if "lastorder" in order_hist.keys():\n        lastorder_embedding = np.mean(\n            [preprocesser.recipe2vec(recipe) for recipe in order_hist["lastorder"]]\n        )\n    else:\n        lastorder_embedding = np.nan\n\n    if "lastorder2" in order_hist.keys():\n        lastorder2_embedding = np.mean(\n            [preprocesser.recipe2vec(recipe) for recipe in order_hist["lastorder2"]]\n        )\n    else:\n        lastorder2_embedding = np.nan\n\n    delta12 = preprocesser.calc_delta(lastorder_embedding, lastorder2_embedding)\n\n    return delta12\n'

In [5]:
lastorder_embedding = np.mean([preprocesser.recipe2vec(recipe) for recipe in order_hist_1["lastorder"]])
lastorder_embedding

0.0472033754085259

In [6]:
lastorder2_embedding = np.mean([preprocesser.recipe2vec(recipe) for recipe in order_hist_1["lastorder2"]])
lastorder2_embedding

0.030227354547122078

In [7]:
"""
def calc_delta(embedding1, embedding2):
	
	'''

	Calculates the Euclidean distance between any two embeddings.

	(float) <- list of floats, list of floats

	'''    
	logger.debug("Calculate deltas, embedding1 = {}, embedding2 = {}".format(embedding1, embedding2))
	if (embedding1 is np.nan)|(embedding2 is np.nan):
		
		return np.nan
	
	else:
		delta = np.sqrt(np.sum((embedding1 - embedding2)**2))
		logger.debug("Delta calculated, delta = {}".format(delta))
		return delta
"""

'\ndef calc_delta(embedding1, embedding2):\n\t\n\t\'\'\'\n\n\tCalculates the Euclidean distance between any two embeddings.\n\n\t(float) <- list of floats, list of floats\n\n\t\'\'\'    \n\tlogger.debug("Calculate deltas, embedding1 = {}, embedding2 = {}".format(embedding1, embedding2))\n\tif (embedding1 is np.nan)|(embedding2 is np.nan):\n\t\t\n\t\treturn np.nan\n\t\n\telse:\n\t\tdelta = np.sqrt(np.sum((embedding1 - embedding2)**2))\n\t\tlogger.debug("Delta calculated, delta = {}".format(delta))\n\t\treturn delta\n'

In [8]:
delta = np.sqrt(np.sum((lastorder_embedding - lastorder2_embedding)**2))
round(delta, 5)

0.01698

In [9]:
round((lastorder_embedding - lastorder2_embedding), 5)

0.01698

# Alternative adventurousness calculation using magnitudes of recipe journeys

In [10]:
df_pc = pd.read_csv('../data/df_3_pc.csv')

In [11]:
# Take new recipe table
df_recipes = pd.read_csv('../data/recipe_table.csv', sep=';')

In [12]:
df_recipes['price'] = df_recipes['price_2p_pence']

df_clean = df_recipes[['id', 'food_group', 'calories', 'carbs', 'fat',
                      'protein', 'cooking_time', 'title', 'description',
                      'key_ingredient', 'price']]
recipe_dict = df_clean.to_dict(orient='records')
list_of_embeddings = get_embedding(recipe_dict)
df_clean['embedding'] = list_of_embeddings
df_clean.head(1)

Unnamed: 0,id,food_group,calories,carbs,fat,protein,cooking_time,title,description,key_ingredient,price,embedding
0,1,Beef,474,26,23,45,25,Seared beef with spring vegetable medley,We've paired a succulent seared steak with a m...,Grass-fed heritage breed Yorkshire beef,1400,0.039164


In [13]:
list_of_vectors = get_vector_list(recipe_dict)
df_clean['vector'] = list_of_vectors
df_clean['vector'].head(1)

0    [0.0647217469599511, -0.003410175382256585, -0...
Name: vector, dtype: object

In [14]:
df_vectors = pd.DataFrame(df_clean[['id', 'vector']])
df_vectors.head(1)
df_vectors.to_csv('../data/df_vectors.csv', index=False)

In [15]:
type(df_vectors['vector'][0])

numpy.ndarray

# Testing

In [16]:
df_pc[df_pc['food_group']=='Vegan'].sort_values('PC_1').head(6)['id'].values.tolist()

[481, 381, 314, 126, 30, 158]

In [17]:
# Just vegan, lowest PC_1
vegan_boring = [481, 381, 314, 126, 30, 158]
calc_new_delta(vegan_boring, df_vectors)

0.4176128656425803

In [18]:
df_pc[df_pc['food_group']=='Vegan'].sort_values('PC_1', ascending=False).head(6)['id'].values.tolist()

[52, 7, 874, 817, 540, 499]

In [19]:
# Just vegan, 3xlowest PC_1, 3x highest PC_1
vegan_adv = [481, 381, 314, 52, 7, 874]
calc_new_delta(vegan_adv, df_vectors)

1.1137429708778517

In [20]:
df_pc[df_pc['food_group']=='Beef'].sort_values('PC_1').head(6)['id'].values.tolist()

[678, 418, 76, 694, 64, 259]

In [21]:
# 3x lowest PC_1 vegan, 3x lowest PC_1 beef
vegan_beef = [481, 381, 314, 678, 418, 76]
calc_new_delta(vegan_beef, df_vectors)

0.9120781742444433

In [22]:
df_pc[df_pc['food_group']=='Beef'].sort_values('PC_1', ascending=False).head(6)['id'].values.tolist()

[244, 635, 270, 275, 669, 560]

In [23]:
# 3x lowest PC_1 vegan, 3x highest PC_1 beef
vegan_beef = [481, 381, 314, 244, 635, 270]
calc_new_delta(vegan_beef, df_vectors)

1.3969966813581436

In [24]:
df_pc[df_pc['food_group']=='Beef']['id'].sample().values[0]

290

In [25]:
# random from all food groups
fg_list = ['Vegan', 'Beef', 'Chicken', 'Fish', 'Pork', 'Lamb']
random_list = []

for i in fg_list:
    random_list.append(df_pc[df_pc['food_group']==i]['id'].sample().values[0])

random_list

[35, 47, 297, 644, 543, 273]

In [26]:
calc_new_delta(random_list, df_vectors)

1.2419476545956956