# 0. Setup

In [1]:
import pandas as pd
import os.path
import math
import re
import random

In [2]:
root_path = os.path.dirname(os.getcwd())
ingredients_data_raw = pd.read_csv(os.path.join(root_path, 'DATA/ingredients_data.csv'))
ingredients_data = ingredients_data_raw.replace(float('nan'), '')
salad_data_impractical = ingredients_data[ingredients_data['salad']=='y']
salad_data = salad_data_impractical[(salad_data_impractical['salad_umbrella'] != 'y') & (salad_data_impractical['redirect'] != 'y')]

salad_data_basic = salad_data[salad_data['salad_basic'] == 'y']
# salad_data = salad_data_basic
salad_data_is_basic = False

salad_data.reset_index(inplace=True)

# 1. Matching "pairs with" terms to ingredient names

In [3]:
def get_terms_from_pairs_with(pairs_with):
    if str(pairs_with) == 'nan':
        return []
    else:
        return [term.strip() for term in pairs_with.split('\n\n') if term.strip() != '']

# break entries in column that has 'pairs with' strings into lists of ingredient terms
ingredient_pairs_with_terms = salad_data['pairs_with'].apply(get_terms_from_pairs_with)

In [4]:
# create list of all terms, ignoring case and excluding duplicates
all_terms = list(set(ingredient_pairs_with_terms.sum()))
all_terms_lower = list(set([term.lower() for term in ingredient_pairs_with_terms.sum()]))

In [5]:
# !pip install inflect
import inflect
p = inflect.engine()

In [6]:
def get_normal_tokens(phrase):
    # should add and/or, but the data is good enough and I don't want to mess up the manual entry
    tokens = [token.strip().lower() for token in re.split('\(|\)|,|e\.g\.|esp\.|and|—|or|aka|see|see also|;', phrase)]
#     print(tokens)
    tokens = [p.singular_noun(token) or token for token in tokens if token != '']
#     print(tokens)
    return tokens

def get_score(name, term):
    name_tokens = get_normal_tokens(name)
#     print(name_tokens)
    term_tokens = get_normal_tokens(term)
#     print(term_tokens)
    common_tokens = list(set(name_tokens).intersection(set(term_tokens)))
#     print(common_tokens)
    return len(common_tokens)

In [7]:
# # doing it this way so I can add 'print' to monitor progress
# score_data = []
# for name in salad_data['name']:
#     print(name)
#     score_data.append([get_score(name, term) for term in all_terms])

# term_name_scores = pd.DataFrame(score_data, columns = all_terms)
# term_name_scores['name'] = salad_data['name'].values

# term_name_scores.to_csv(os.path.join(root_path, 'DATA/term_name_scores_common.csv'), index=False)

# 2. Extracting "pairs with" data

In [8]:
# term_name_matches_raw = pd.read_csv(os.path.join(root_path, 'DATA/term_name_matches.csv'))
# term_name_matches = term_name_matches_raw.replace(['0', '1', '2', '3', '4', '5', 0, 1, 2, 3, 4, 5, float('nan')], '')
# term_name_matches_lower = term_name_matches.replace('Y', 'y')

In [9]:
# # create a dataframe with name and a list of pairing terms for each ingredient
# pairing_data = pd.DataFrame({
#     'name': salad_data['name'],
#     'pairs_with_terms': ingredient_pairs_with_terms
# })

# def get_pairs_with_names(row):
# #     print(row['name'], type(row['name']))
#     lower_names = []
#     upper_names = []
#     for term in row['pairs_with_terms']:
#         if term in term_name_matches.columns.values.tolist():
#             lower_names += term_name_matches[term_name_matches[term] == 'y']['name'].values.tolist()
#             upper_names += term_name_matches[term_name_matches[term] == 'Y']['name'].values.tolist()
#         else:
#             print('OH NO didnt find term:', term)
# #     for name in lower_names + upper_names:
# #         if name == row['name']:
# #             print('DUPLICATE')
# #     print(lower_names, upper_names)
#     row['lower_pairs_with_names'] = [lower_name for lower_name in lower_names if lower_name != row['name']]
#     row['upper_pairs_with_names'] = [upper_name for upper_name in upper_names if upper_name != row['name']]
#     row['all_pairs_with_names'] = lower_names + upper_names
#     return row

# pairing_data = pairing_data.apply(get_pairs_with_names, axis=1)

In [10]:
term_name_matches_raw = pd.read_csv(os.path.join(root_path, 'DATA/term_name_matches_specific.csv'))
term_name_matches = term_name_matches_raw.replace(['0', '1', '2', '3', '4', '5', 0, 1, 2, 3, 4, 5, float('nan')], '')

In [11]:
# # create a dataframe with name and a list of pairing terms for each ingredient
# pairing_data = pd.DataFrame({
#     'name': salad_data['name'],
#     'pairs_with_terms': ingredient_pairs_with_terms
# })

# def get_pairs_with_names(row):
# #     print(row['name'], type(row['name']))
#     lower_category_names = []
#     lower_direct_names = []
#     upper_category_names = []
#     upper_direct_names = []
#     for term in row['pairs_with_terms']:
#         if term in term_name_matches.columns.values.tolist():
#             lower_category_names += term_name_matches[term_name_matches[term] == 'c']['name'].values.tolist()
#             lower_direct_names += term_name_matches[term_name_matches[term] == 'd']['name'].values.tolist()
#             upper_category_names += term_name_matches[term_name_matches[term] == 'C']['name'].values.tolist()
#             upper_direct_names += term_name_matches[term_name_matches[term] == 'D']['name'].values.tolist()
#         else:
#             pass
# #             print('OH NO didnt find term:', term)
#     row['lower_category_pairs_with_names'] = list(set([lower_category_name for lower_category_name in lower_category_names if lower_category_name != row['name']]))
#     row['lower_direct_pairs_with_names'] = list(set([lower_direct_name for lower_direct_name in lower_direct_names if lower_direct_name != row['name']]))
#     row['upper_category_pairs_with_names'] = list(set([upper_category_name for upper_category_name in upper_category_names if upper_category_name != row['name']]))
#     row['upper_direct_pairs_with_names'] = list(set([upper_direct_name for upper_direct_name in upper_direct_names if upper_direct_name != row['name']]))
#     row['lower_pairs_with_names'] = list(set(row['lower_category_pairs_with_names'] + row['lower_direct_pairs_with_names']))
#     row['upper_pairs_with_names'] = list(set(row['upper_category_pairs_with_names'] + row['upper_direct_pairs_with_names']))
#     row['all_pairs_with_names'] = list(set(row['lower_pairs_with_names'] + row['upper_pairs_with_names']))
    
#     row['lc_sorted_pairs'] = tuple(sorted(row['lower_category_pairs_with_names']))
#     row['ld_sorted_pairs'] = tuple(sorted(row['lower_direct_pairs_with_names']))
#     row['uc_sorted_pairs'] = tuple(sorted(row['upper_category_pairs_with_names']))
#     row['ud_sorted_pairs'] = tuple(sorted(row['upper_direct_pairs_with_names']))
#     row['l_sorted_pairs'] = tuple(sorted(row['lower_pairs_with_names']))
#     row['u_sorted_pairs'] = tuple(sorted(row['upper_pairs_with_names']))
#     row['a_sorted_pairs'] = tuple(sorted(row['all_pairs_with_names']))
#     return row

# pairing_data = pairing_data.apply(get_pairs_with_names, axis=1)

In [12]:
# CREATE PAIRING DATA MATRIX (names x names)
# takes a few minutes

pairing_data = pd.DataFrame({
    'name': salad_data['name'],
    'pairs_with_terms': ingredient_pairs_with_terms
})

for name in salad_data['name']:
    pairing_data[name] = pd.Series(['']*len(salad_data['name']))

def get_pairs_with_names(row):
#     print(row['name'], type(row['name']))
    lower_category_names = []
    lower_direct_names = []
    upper_category_names = []
    upper_direct_names = []
    for term in row['pairs_with_terms']:
        if term in term_name_matches.columns.values.tolist():
            lower_category_names += term_name_matches[term_name_matches[term] == 'c']['name'].values.tolist()
            lower_direct_names += term_name_matches[term_name_matches[term] == 'd']['name'].values.tolist()
            upper_category_names += term_name_matches[term_name_matches[term] == 'C']['name'].values.tolist()
            upper_direct_names += term_name_matches[term_name_matches[term] == 'D']['name'].values.tolist()
        else:
            pass
#             print('OH NO didnt find term:', term)
    
    for lower_category_name in lower_category_names:
        row[lower_category_name] = 'c'
    for lower_direct_name in lower_direct_names:
        row[lower_direct_name] = 'd'
    for upper_category_name in upper_category_names:
        row[upper_category_name] = 'C'
    for upper_direct_name in upper_direct_names:
        row[upper_direct_name] = 'D'
    
#     row['lower_category_pairs_with_names'] = list(set([lower_category_name for lower_category_name in lower_category_names if lower_category_name != row['name']]))
#     row['lower_direct_pairs_with_names'] = list(set([lower_direct_name for lower_direct_name in lower_direct_names if lower_direct_name != row['name']]))
#     row['upper_category_pairs_with_names'] = list(set([upper_category_name for upper_category_name in upper_category_names if upper_category_name != row['name']]))
#     row['upper_direct_pairs_with_names'] = list(set([upper_direct_name for upper_direct_name in upper_direct_names if upper_direct_name != row['name']]))
#     row['lower_pairs_with_names'] = list(set(row['lower_category_pairs_with_names'] + row['lower_direct_pairs_with_names']))
#     row['upper_pairs_with_names'] = list(set(row['upper_category_pairs_with_names'] + row['upper_direct_pairs_with_names']))
#     row['all_pairs_with_names'] = list(set(row['lower_pairs_with_names'] + row['upper_pairs_with_names']))
    
#     row['lc_sorted_pairs'] = tuple(sorted(row['lower_category_pairs_with_names']))
#     row['ld_sorted_pairs'] = tuple(sorted(row['lower_direct_pairs_with_names']))
#     row['uc_sorted_pairs'] = tuple(sorted(row['upper_category_pairs_with_names']))
#     row['ud_sorted_pairs'] = tuple(sorted(row['upper_direct_pairs_with_names']))
#     row['l_sorted_pairs'] = tuple(sorted(row['lower_pairs_with_names']))
#     row['u_sorted_pairs'] = tuple(sorted(row['upper_pairs_with_names']))
#     row['a_sorted_pairs'] = tuple(sorted(row['all_pairs_with_names']))
    return row

pairing_data = pairing_data.apply(get_pairs_with_names, axis=1)
pairing_data.replace(float('nan'), '', inplace=True)

In [13]:
# SYNC PAIRING DATA MATRIX (make sure [a][b] agrees with [b][a])

for index_1, name_1 in enumerate(pairing_data['name'].values.tolist()):
    for index_2, name_2 in enumerate(pairing_data['name'].values.tolist()):
        value_1 = pairing_data[name_1][index_2]
        value_2 = pairing_data[name_2][index_1]
        
        if name_1 == name_2:
            proper_value = ''
        elif value_1 == 'D' or value_2 == 'D':
            proper_value = 'D'
        elif value_1 == 'C' or value_2 == 'C':
            proper_value = 'C'
        elif value_1 == 'd' or value_2 == 'd':
            proper_value = 'd'
        elif value_1 == 'c' or value_2 == 'c':
            proper_value = 'c'
        else:
            proper_value = ''
        
        pairing_data[name_1][index_2] = proper_value
        pairing_data[name_2][index_1] = proper_value

In [14]:
# REPRESENT PAIRING DATA AS LISTS

def get_pairs_with_names(row):
    lower_category_name = pairing_data[pairing_data[row['name']] == 'c']['name'].values.tolist()
    lower_direct_name = pairing_data[pairing_data[row['name']] == 'd']['name'].values.tolist()
    upper_category_name = pairing_data[pairing_data[row['name']] == 'C']['name'].values.tolist()
    upper_direct_name = pairing_data[pairing_data[row['name']] == 'D']['name'].values.tolist()
    
    row['lower_category_names'] = pairing_data[pairing_data[row['name']] == 'c']['name'].values.tolist()
    row['lower_direct_names'] = pairing_data[pairing_data[row['name']] == 'd']['name'].values.tolist()
    row['upper_category_names'] = pairing_data[pairing_data[row['name']] == 'C']['name'].values.tolist()
    row['upper_direct_names'] = pairing_data[pairing_data[row['name']] == 'D']['name'].values.tolist()
    row['lower_names'] = row['lower_category_names'] + row['lower_direct_names']
    row['upper_names'] = row['upper_category_names'] + row['upper_direct_names']
    row['all_names'] = row['lower_names'] + row['upper_names']
    
    return row

pairing_data = pairing_data.apply(get_pairs_with_names, axis=1)

In [15]:
# pairing_data.to_csv(os.path.join(root_path, 'DATA/pairing_data.csv'), index=False)
# pairing_data = pd.read_csv(os.path.join(root_path, 'DATA/pairing_data.csv'))

# 3. Creating "clashes with" data

In [16]:
# name_name_clashes_blank = salad_data[['name', 'protein_cheese_sub', 'salad_allium', 'fruit', 'veg']].copy()
# names = salad_data['name'].values.tolist()

# for i, col_name in enumerate(names):
#     name_name_clashes_blank[col_name] = pd.Series(['x']*(i+1) + ['']*(len(names)-(i+1)))

# name_name_clashes_blank.to_csv(os.path.join(root_path, 'DATA/name_name_clashes_blank.csv'), index=False)

In [17]:
name_name_clashes_input = pd.read_csv(os.path.join(root_path, 'DATA/name_name_clashes_input.csv'))
name_name_clashes_input = name_name_clashes_input.replace([float('nan'), 'x'], '')
columns = ['name'] + name_name_clashes_input['name'].values.tolist()
name_name_clashes_input = name_name_clashes_input[columns]

def get_clashes_with_data(row):
    data = pd.Series([])
#     print(row.index.values.tolist())
    name = row['name']
    data['name'] = name
    
    lower_names = name_name_clashes_input['name'][name_name_clashes_input[name] == 'y'].values.tolist()
    upper_names = name_name_clashes_input['name'][name_name_clashes_input[name] == 'Y'].values.tolist()
    if 'y' in lower_names:
        print(lower_names)
    
    for name in name_name_clashes_input['name']:
#         if name in ['y', 'Y']:
#             print(name)
        if row[name] == 'y':
            lower_names.append(name)
        elif row[name] == 'Y':
            upper_names.append(name)
    
    lower_names = list(set(lower_names))
    upper_names = list(set(upper_names))
    
    data['lower_clashes_with_names'] = lower_names
    data['upper_clashes_with_names'] = upper_names
    data['all_clashes_with_names'] = list(set(lower_names + upper_names)) # shouldn't be overlap here, but hey
    
    data['lower_clashes_with_pairs'] = [tuple(sorted([name, lower_name])) for lower_name in data['lower_clashes_with_names']]
    data['upper_clashes_with_pairs'] = [tuple(sorted([name, lower_name])) for lower_name in data['upper_clashes_with_names']]
    data['all_clashes_with_pairs'] = [tuple(sorted([name, lower_name])) for lower_name in data['all_clashes_with_names']]
    
    return data

clashes_with_data = name_name_clashes_input.apply(get_clashes_with_data, axis=1)
            
# # for row in name_name_clashes_input.iterrows():
# clashes_with_data = pd.DataFrame({'name': salad_data['name']})
# for i, row in clashes_with_data.iterrows():
# #     print(row, type(row))
#     name = row['name']
#     row['lower_clashes_with'] = name_name_clashes_input[name][name_name_clashes_input[name] == 'y']
#     row['upper_clashes_with'] = name_name_clashes_input[name][name_name_clashes_input[name] == 'Y']
#     row['all_clashes_with'] = row['lower_clashes_with'] + row['upper_clashes_with']

In [18]:
all_lower_clashes_with_pairs = list(set(clashes_with_data['lower_clashes_with_pairs'].sum()))
all_upper_clashes_with_pairs = list(set(clashes_with_data['upper_clashes_with_pairs'].sum()))
all_clashes_with_pairs = list(set(clashes_with_data['all_clashes_with_pairs'].sum()))

# 4. Salad recipe generators

In [19]:
salad_greens = salad_data[salad_data['salad_green'] == 'y']

salad_extras = salad_data[salad_data['salad_extra'] == 'y']
salad_extra_veg = salad_data[(salad_data['veg'] == 'y') & (salad_data['salad_extra'] == 'y')]
salad_extra_fruits = salad_data[(salad_data['fruit'] == 'y') & (salad_data['salad_extra'] == 'y')]
salad_extra_nuts = salad_data[(salad_data['protein_seed'] == 'y') & (salad_data['salad_extra'] == 'y')]
salad_extra_seeds = salad_data[(salad_data['protein_nut'] == 'y') & (salad_data['salad_extra'] == 'y')]
salad_extra_tomatoes = salad_data[salad_data['salad_extra_tomato'] == 'y']
salad_extra_olives = salad_data[salad_data['salad_extra_olive'] == 'y']
salad_extra_cheeses = salad_data[salad_data['salad_extra_cheese'] == 'y']
salad_extra_eggs = salad_data[salad_data['salad_extra_egg'] == 'y']
salad_extra_croutons = salad_data[salad_data['salad_extra_crouton'] == 'y']

salad_dressing_oils = salad_data[salad_data['salad_dressing_oil'] == 'y']
salad_dressing_vinegars = salad_data[salad_data['salad_dressing_vinegar'] == 'y']
salad_dressing_salts = salad_data[salad_data['salad_dressing_salt'] == 'y']
salad_dressing_peppers = salad_data[salad_data['salad_dressing_pepper'] == 'y']
salad_dressing_garlics = salad_data[salad_data['salad_dressing_garlic'] == 'y']

## 4.1. Best of many

### 4.1.1. Generator

In [20]:
# !pip install networkx
# !pip install pyvis

import networkx as nx
from pyvis import network as net

In [21]:
#IDEAS
    # savory v sweet templates (avoid eggs/mushroom/hard veg w fruit...)
    # get eg working, weight it as 2
    # start out w 1 base salad, extra (randomly selected from vip list), then branch out
    # control for eg lots of types of citrus, alliums
    # mark strong flavors, treat them separately
    # select main ingredients from each category that go with each other, then branch out from each, weighting traditionally at the end
    # work off of pairs_with data for categories, while at same time picking categories then within categories (to account for eg allium bias)
    # *only* match eg.s with specifically named; also, consider not matching categories the weird way
    # add a bonus if category of ingredient pairs well with other ingredients in salad (meh)
    # add in dressing garlics, when applicable
    # CONSIDER
        # replacing average net dist with something like, edges/possible edges

# TODO
    # add dried cranberries to salad stuffs?
    # maybe some other dried fruit? maybe not.
    
# UI PRIORITIES
    # speed
    # translate names to common (e.g. ALLMONDS (AND UNSWEETENED ALMOND BUTTER) -> Almonds, sliced)
    # maybe consisder one of those spokes-of-wheel charts to display stats/scores
    
# RECIPE DETAILS
    # maybe suggest diluting e.g. sesame oil w regular olive oil
    
# ==================================================================================================================
# ==================================================================================================================

# ALGORITHM A (best of many)
top_score = 0
for i in range(300):
    n_subgraphs = 2
    while n_subgraphs > 1: # keep shuffling until you get a well connected graph
        n_greens = random.randrange(2, 4)
        n_extras = random.randrange(2, 6)
        n_dressing_oils = 1
        n_dressing_vinegars = 1
        n_dressing_salts = 1
        n_dressing_peppers = 1
        # n_dressing_garlics = random.randrange(0, 2) # maybe make presence dependent on the rest. or, just leave out for now.
        
        selected_greens = salad_greens.sample(n_greens)
        selected_extras = salad_extras.sample(n_extras)
        selected_dressing_oils = salad_dressing_oils.sample(n_dressing_oils)
        selected_dressing_vinegars = salad_dressing_vinegars.sample(n_dressing_vinegars)
        selected_dressing_salts = salad_dressing_salts.sample(n_dressing_salts)
        selected_dressing_peppers = salad_dressing_peppers.sample(n_dressing_peppers)
        selected_ingredients = selected_greens.append(selected_extras).append(selected_dressing_oils).append(selected_dressing_vinegars).append(selected_dressing_salts).append(selected_dressing_peppers)

        lower_category_pairs = []
        lower_direct_pairs = []
        upper_category_pairs = []
        upper_direct_pairs = []
        ingredients_list = selected_ingredients['name'].values.tolist()
        already_checked = []
        for ingredient_name in ingredients_list:
            for lc_name in pairing_data['lower_category_names'][pairing_data['name'] == ingredient_name].iloc[0]:
                if lc_name in ingredients_list and not lc_name in already_checked:
                    lower_category_pairs.append([ingredient_name, lc_name])
            for ld_name in pairing_data['lower_direct_names'][pairing_data['name'] == ingredient_name].iloc[0]:
                if ld_name in ingredients_list and not ld_name in already_checked:
                    lower_direct_pairs.append([ingredient_name, ld_name])
            for uc_name in pairing_data['upper_category_names'][pairing_data['name'] == ingredient_name].iloc[0]:
                if uc_name in ingredients_list and not uc_name in already_checked:
                    upper_category_pairs.append([ingredient_name, uc_name])
            for ud_name in pairing_data['upper_direct_names'][pairing_data['name'] == ingredient_name].iloc[0]:
                if ud_name in ingredients_list and not ud_name in already_checked:
                    upper_direct_pairs.append([ingredient_name, ud_name])
            already_checked.append(ingredient_name)

        lower_pairs = lower_category_pairs + lower_direct_pairs
        upper_pairs = upper_category_pairs + upper_direct_pairs
        all_pairs = lower_pairs + upper_pairs
        
#         all_pairs_sp = [tuple(sorted(pair)) for pair in all_pairs]
#         print(len(all_pairs_sp), len(list(set(all_pairs_sp))))

#         print('INGREDIENTS', ingredients_list)
#         print()
#         print('LC PAIRS', lower_category_pairs)
#         print()
#         print()

        G = nx.Graph()
        G.add_nodes_from(selected_ingredients['name'].values.tolist())
        G.add_edges_from(all_pairs)
        n_subgraphs = len(list(nx.connected_component_subgraphs(G)))
            
    score = 0

# SIMPLER ALTERNATIVE: bonus for proportion of actual pairs over possible pairs? could then combine with pair strength bonus?
# PAIRING DENSITY BONUS ============================================================================================
    # ranges from roughly (.1 to 1) * 4
    average_shortest_path_length = nx.average_shortest_path_length(G)
    average_shortest_path_score = 2.5 / average_shortest_path_length - 1.1
#     print(average_shortest_path_score)
    score += average_shortest_path_score * 4    
    
# # UPPER PAIRING BONUS ==============================================================================================
#     # ranges from roughly (.25 to 1) * 3
#     upper_proportion_score = len(upper_pairs) / len(all_pairs) * 2.5 # messed with this, not sure if it still works
# #     print(upper_proportion_score)
# #     print(len(upper_pairs), len(lower_pairs), len(all_pairs))
# #     print()
#     score += upper_proportion_score * 3

# PAIR STRENGTH BONUS ==============================================================================================
    # ranges from roughly (0 to 1) * 3

    # I'm thinking of 'lower category' as default, and awarding points for steps up from that
    ld_bonus = 1*len(lower_direct_pairs)
    uc_bonus = 2*len(upper_category_pairs)
    ud_bonus = 5*len(upper_direct_pairs)
    pair_strength_score = .6*(ld_bonus + uc_bonus + ud_bonus)/len(all_pairs) # otherwise would tend toward large salads
#     print('LC', len(lower_category_pairs))
#     print('LD', len(lower_direct_pairs))
#     print('UC', len(upper_category_pairs))
#     print('UD', len(upper_direct_pairs))
#     print('SCORE', pair_strength_score)
#     print()
    score += pair_strength_score * 3

# important but easy to avoid, so not weighted too heavily
# CLASH PENALTY ====================================================================================================
    # ranges from roughly (0 to 1) * -1.5
    all_clashing_pairs = []
    selected_ingredients_list = selected_ingredients['name'].values.tolist()
    for name in selected_ingredients_list:
        names_that_clash_with_name = clashes_with_data['all_clashes_with_names'][clashes_with_data['name'] == name].iloc[0]
        all_clashing_names = set(selected_ingredients_list).intersection(set(names_that_clash_with_name)) # selected names that clash with this selected name
        all_clashing_pairs += [tuple(sorted([name, all_clashing_name])) for all_clashing_name in all_clashing_names]

    all_clashing_pairs = list(set(all_clashing_pairs))
    all_clashing_pairs_score = len(list(all_clashing_pairs)) / 4
    score += len(all_clashing_pairs) * -1.5

# # FRUIT BONUS ======================================================================================================
#     # ranges from roughly (0 to 3) * .1
#     n_fruit = len(selected_ingredients[selected_ingredients['fruit'] == 'y'])
#     score += n_fruit * .1
    
# # NUT SEED BONUS ===================================================================================================    
#     # ranges from roughly (0 to 2) * .15
#     n_nut_seed = len(selected_ingredients[selected_ingredients['protein_nut_seed'] == 'y'])
#     score += n_nut_seed * .15

# FLAVOR BALANCE BONUS =============================================================================================
    # ranges from roughly (0 to 1) * 1
    n_sweet_lower = len(selected_ingredients[selected_ingredients['sweet'] == 'y'])
    n_sweet_upper = len(selected_ingredients[selected_ingredients['sweet'] == 'Y'])
    n_salty_lower = len(selected_ingredients[selected_ingredients['salty'] == 'y'])
    n_salty_upper = len(selected_ingredients[selected_ingredients['salty'] == 'Y'])
    n_sour_lower = len(selected_ingredients[selected_ingredients['sour'] == 'y'])
    n_sour_upper = len(selected_ingredients[selected_ingredients['sour'] == 'Y'])
    n_savory_lower = len(selected_ingredients[selected_ingredients['savory'] == 'y'])
    n_savory_upper = len(selected_ingredients[selected_ingredients['savory'] == 'Y'])
    n_bitter_lower = len(selected_ingredients[selected_ingredients['bitter'] == 'y'])
    n_bitter_upper = len(selected_ingredients[selected_ingredients['bitter'] == 'Y'])
    n_spicy_lower = len(selected_ingredients[selected_ingredients['spicy'] == 'y'])
    n_spicy_upper = len(selected_ingredients[selected_ingredients['spicy'] == 'Y'])
    
    # each varies from roughly .5 to 1
    sweet_score = (n_sweet_lower/2 + n_sweet_upper)/5
    salty_score = (n_salty_lower/2 + n_salty_upper)*2/5
    sour_score = (n_sour_lower/2 + n_sour_upper)*2/5
    savory_score = (n_savory_lower/2 + n_savory_upper)*3/5
    bitter_score = (n_bitter_lower/2 + n_bitter_upper)*3/5
    spicy_score = (n_spicy_lower/2 + n_spicy_upper)*2/5
    
    flavor_balance_score = 5 / (1 + abs(1-sweet_score) + abs(1-salty_score) + abs(1-sour_score) + abs(1-savory_score) + abs(1-spicy_score)) - 1.25
#     print(flavor_balance_score)
    
    score += flavor_balance_score
#     print(sweet_score, salty_score, sour_score, savory_score, bitter_score, spicy_score)
#     print()
    
#     print(n_sweet_lower, n_sweet_upper)
#     print(n_salty_lower, n_salty_upper)
#     print(n_sour_lower, n_sour_upper)
#     print(n_savory_lower, n_savory_upper)
#     print(n_bitter_lower, n_bitter_upper)
#     print(n_spicy_lower, n_spicy_upper)
#     print()

# TEXTURE BALANCE BONUS ============================================================================================
    # ranges from roughly (0 to 1) * .75
    n_crunchy_lower = len(selected_ingredients[selected_ingredients['salad_crunchy'] == 'y'])
    n_crunchy_upper = len(selected_ingredients[selected_ingredients['salad_crunchy'] == 'Y'])
    n_chewy_lower = len(selected_ingredients[selected_ingredients['salad_chewy'] == 'y'])
    n_chewy_upper = len(selected_ingredients[selected_ingredients['salad_chewy'] == 'Y'])
    n_juicy_lower = len(selected_ingredients[selected_ingredients['salad_juicy'] == 'y'])
    n_juicy_upper = len(selected_ingredients[selected_ingredients['salad_juicy'] == 'Y'])
    
    # each ranges from roughly 0 to 1
    crunchy_score = (n_crunchy_lower/2 + n_crunchy_upper)/3
    chewy_score = (n_chewy_lower/2 + n_chewy_upper)
    juicy_score = (n_juicy_lower/2 + n_juicy_upper)/3
#     print(crunchy_score, chewy_score, juicy_score)
    
    texture_balance_score = 4 / (1 + abs(1-crunchy_score) + abs(1-chewy_score) + abs(1-juicy_score)) - 1
#     print(texture_balance_score)
#     print()
    
    score += texture_balance_score * .75

# seems like it's hard to balance food groups on top of everything else. pity the scores aren't more independent
# FOOD GROUP BALANCE BONUS =========================================================================================
    # ranges from roughly (25 to 1) * 2
    n_fruit = len(selected_ingredients[selected_ingredients['fruit'] == 'y'])
    n_veg = len(selected_ingredients[selected_ingredients['veg'] == 'y'])
    n_protein = len(selected_ingredients[selected_ingredients['protein'] == 'y'])
    
    # each varies from roughly 0 to 1 (sometimes a little over)
    fruit_score = n_fruit / 3
    veg_score = n_veg / 5
    protein_score = n_protein / 3
#     print(fruit_score, veg_score, protein_score)
    
    food_group_balance_score = 3 / (1 + abs(1-fruit_score) + abs(1-veg_score) + abs(1-protein_score)) - .75
#     print(food_group_balance_score)
#     print()
    
    score += food_group_balance_score * 2
    
    if score > top_score:
        top_score = score
        top_food_group_balance_score = food_group_balance_score
        top_average_shortest_path_score = average_shortest_path_score
        top_flavor_balance_score = flavor_balance_score
        top_texture_balance_score = texture_balance_score
        top_all_clashing_pairs_score = all_clashing_pairs_score
        top_pair_strength_score = pair_strength_score
#         top_upper_pairs = upper_pairs
#         top_lower_pairs = lower_pairs
        top_lc_pairs = lower_category_pairs
        top_ld_pairs = lower_direct_pairs
        top_uc_pairs = upper_category_pairs
        top_ud_pairs = upper_direct_pairs
        top_selected_ingredients = selected_ingredients
        top_average_shortest_path_length = average_shortest_path_length
#         top_upper_proportion = len(upper_pairs) / (len(upper_pairs) + len(lower_pairs))
# print('TOP AVG SHORTEST PATH LENGTH', top_average_shortest_path_length)
# print('TOP UPPER PROPORTION', top_upper_proportion)
print('TOP AVERAGE SHORTEST PATH SCORE', top_average_shortest_path_score * 4)
print('TOP PAIR STRENGTH SCORE', top_pair_strength_score * 3)
print('TOP ALL CLASHING PAIRS SCORE', top_all_clashing_pairs_score * -1.5)
print('TOP FLAVOR BALANCE SCORE', top_flavor_balance_score)
print('TOP TEXTURE BALANCE SCORE', top_texture_balance_score * .75)
print('TOP FOOD GROUP BALANCE SCORE', top_food_group_balance_score * 2)
print('TOP_SCORE', top_score)            
    
top_net = net.Network(notebook=True)

nodes = top_selected_ingredients['name'].tolist()

def get_color(row):
#     print(type(row))
#     print(row)
    if row['salad_green'] == 'y':
        return 'lightgreen'
    elif row['salad_extra'] == 'y':
        if row['veg'] == 'y':
            return 'green'
        elif row['fruit'] == 'y':
            return 'orange'
        elif row['protein_nut_seed'] == 'y':
            return 'brown'
        else:
            return 'lightblue'
    elif row['salad_dressing'] == 'y':
        return 'lightgrey'
    
nodes_color = top_selected_ingredients.apply(get_color, axis=1).tolist()

top_net.add_nodes(
    nodes=nodes,
    color=nodes_color
)

for pair in top_lc_pairs:
    top_net.add_edge(pair[0], pair[1], physics=False, color='lightgrey')

for pair in top_ld_pairs:
    top_net.add_edge(pair[0], pair[1], physics=False, color='grey')
    
for pair in top_uc_pairs:
    top_net.add_edge(pair[0], pair[1], color='darkgrey')
    
for pair in top_ud_pairs:
    top_net.add_edge(pair[0], pair[1], color='black')

vegan = top_selected_ingredients['not_vegan'].sum() == ''
gluten_free = top_selected_ingredients['gluten'].sum() == ''
print(vegan, gluten_free)

top_net.show('top_net.html')

TOP AVERAGE SHORTEST PATH SCORE 4.632258064516128
TOP PAIR STRENGTH SCORE 3.3119999999999994
TOP ALL CLASHING PAIRS SCORE -0.0
TOP FLAVOR BALANCE SCORE 0.6730769230769229
TOP TEXTURE BALANCE SCORE 0.0
TOP FOOD GROUP BALANCE SCORE 0.45652173913043503
TOP_SCORE 9.073856726723486
False True


### 4.1.2 Recording recipes

In [22]:
try:
    salad_recipe_data = pd.read_csv(os.path.join(root_path, 'DATA/salad_recipe_data_latest.csv'))
    print('SALAD RECIPE DATA IMPORT SUCCESSFUL')
except:
    salad_recipe_data = pd.DataFrame({
        'vegan': [],
        'gluten_free': [],
        'basic': [],
        'best_of': [],
        'score': [],
        'pairing_density_bonus': [],
        'pair_strength_bonus': [],
        'clash_penalty': [],
        'flavor_balance_bonus': [],
        'texture_balance_bonus': [],
        'food_group_balance_bonus': [],
        'lc_pairs': [],
        'ld_pairs': [],
        'uc_pairs': [],
        'ud_pairs':[],
        'clashing_pairs': [],
        'ingredient_names': [],
        'leafy_green_names': [],
        'extra_names': [],
        'dressing_names': [],
    })
    print('IMPORT FAILED, CREATING NEW SALAD RECIPE DATAFRAME')

SALAD RECIPE DATA IMPORT SUCCESSFUL


In [28]:
salad_recipe_data

Unnamed: 0,vegan,gluten_free,basic,best_of,score,pairing_density_bonus,pair_strength_bonus,clash_penalty,flavor_balance_bonus,texture_balance_bonus,food_group_balance_bonus,lc_pairs,ld_pairs,uc_pairs,ud_pairs,clashing_pairs,ingredient_names,leafy_green_names,extra_names,dressing_names
0,True,True,False,300,9.127106,4.061538,2.300000,0.0,0.416667,63.461538,1.714286,"[['SPINACH', 'LETTUCE, BUTTER (aka BIBB or BOS...","[['SPINACH', 'ORANGES, ORANGE JUICE, and ORANG...","[['SPINACH', 'OIL, COCONUT'], ['LETTUCE, BUTTE...","[['SPINACH', 'LEMONS'], ['SPINACH', 'HAZELNUTS...",[],"['SPINACH', 'LETTUCE, BUTTER (aka BIBB or BOST...","['SPINACH', 'LETTUCE, BUTTER (aka BIBB or BOST...","['HAZELNUTS (aka FILBERTS)', 'ORANGES, ORANGE ...","['OIL, COCONUT', 'VINEGAR, BROWN RICE (aka CHI..."
1,True,True,False,300,8.709734,3.635714,3.282353,0.0,0.416667,37.500000,1.000000,"[['GREENS, DANDELION', 'PEPPER, SZECHUAN'], ['...","[['GREENS, DANDELION', 'SPINACH'], ['GREENS, D...",[],"[['GREENS, DANDELION', 'LEMONS'], ['GREENS, DA...",[],"['GREENS, DANDELION', 'SPINACH', 'DAIKON', 'HA...","['GREENS, DANDELION', 'SPINACH']","['DAIKON', 'HAZELNUTS (aka FILBERTS)', 'LEMONS...","['OIL, OLIVE', 'VINEGAR, BALSAMIC', 'SALT, KOS..."
2,False,True,False,300,7.902055,2.933333,2.365714,0.0,1.381579,15.000000,1.071429,"[['LETTUCE, BUTTER (aka BIBB or BOSTON LETTUCE...","[['LETTUCE, BUTTER (aka BIBB or BOSTON LETTUCE...","[['LETTUCE, BUTTER (aka BIBB or BOSTON LETTUCE...","[['SPINACH', 'ONIONS, RED (see also ONIONS)'],...",[],"['LETTUCE, BUTTER (aka BIBB or BOSTON LETTUCE)...","['LETTUCE, BUTTER (aka BIBB or BOSTON LETTUCE)...","['DAIKON', 'CHEESE, PARMESAN', 'ONIONS, RED (s...","['OIL, COCONUT', 'VINEGAR, APPLE CIDER (aka VI..."
3,False,True,False,300,9.295476,2.547368,2.723077,0.0,1.527778,53.571429,1.961538,"[['SORREL', 'ARUGULA (aka ROCKET)'], ['SORREL'...","[['SORREL', 'PEPPER, BLACK'], ['SORREL', 'VINE...","[['SPINACH', 'ARUGULA (aka ROCKET)'], ['SPINAC...","[['SORREL', 'SPINACH'], ['SORREL', 'OIL, OLIVE...",[],"['SORREL', 'SPINACH', 'ARUGULA (aka ROCKET)', ...","['SORREL', 'SPINACH', 'ARUGULA (aka ROCKET)']","['SORREL', 'CHEESE, FETA', 'GRAPEFRUIT', 'RAIS...","['OIL, OLIVE', 'VINEGAR, BALSAMIC', 'SALT, KOS..."
4,False,True,False,300,7.881930,4.061538,2.280000,0.0,0.750000,19.736842,0.593023,"[['ARUGULA (aka ROCKET)', 'NORI (aka LAVER)'],...","[['LETTUCE, LAMB’S (aka CORN SALAD or MCHE)', ...","[['ARUGULA (aka ROCKET)', 'LETTUCE, LAMB’S (ak...","[['ARUGULA (aka ROCKET)', 'CHEESE, FETA'], ['S...",[],"['ARUGULA (aka ROCKET)', 'LETTUCE, LAMB’S (aka...","['ARUGULA (aka ROCKET)', 'LETTUCE, LAMB’S (aka...","['ONIONS, PEARL', 'CHEESE, FETA', 'TOMATOES, T...","['OIL, COCONUT', 'VINEGAR, SHERRY', 'SALT, SMO..."
5,False,True,False,300,9.108077,4.600000,2.835000,0.0,0.673077,25.000000,0.750000,"[['SPINACH', 'OLIVES, PICHOLINE'], ['SPINACH',...","[['SPINACH', 'CUCUMBERS'], ['SPINACH', 'PEPPER...","[['CUCUMBERS', 'CHEESE, PARMESAN'], ['SALT, TR...","[['SPINACH', 'SORREL'], ['SPINACH', 'CHEESE, P...",[],"['SPINACH', 'SORREL', 'CUCUMBERS', 'OLIVES, PI...","['SPINACH', 'SORREL']","['SORREL', 'CUCUMBERS', 'OLIVES, PICHOLINE', '...","['OIL, OLIVE', 'VINEGAR, BALSAMIC', 'SALT, TRU..."
6,True,True,False,300,8.675069,4.933333,3.115385,0.0,0.101351,15.000000,0.375000,"[['LETTUCE, ROMAINE', 'CAULIFLOWER'], ['LETTUC...","[['LETTUCE, ROMAINE', 'PEPPER, BLACK'], ['CAUL...","[['LETTUCE, ROMAINE', 'BEETS'], ['SALT, TRUFFL...","[['LETTUCE, ROMAINE', 'OIL, OLIVE'], ['LETTUCE...",[],"['LETTUCE, ROMAINE', 'SPROUTS, PEA', 'CAULIFLO...","['LETTUCE, ROMAINE', 'SPROUTS, PEA']","['CAULIFLOWER', 'BEETS']","['OIL, OLIVE', 'VINEGAR, BALSAMIC', 'SALT, TRU..."
7,True,True,False,300,8.202715,3.972093,2.606897,0.0,0.833333,19.736842,0.593023,"[['SPROUTS, BROCCOLI', 'GREENS, DANDELION'], [...","[['GREENS, DANDELION', 'TOMATOES, TOMATO JUICE...","[['SALT, SMOKED', 'PEPPER, BLACK']]","[['GREENS, DANDELION', 'OIL, OLIVE'], ['GREENS...",[],"['SPROUTS, BROCCOLI', 'GREENS, DANDELION', 'AL...","['SPROUTS, BROCCOLI', 'GREENS, DANDELION']","['ALMONDS, MARCONA', 'TOMATOES, TOMATO JUICE, ...","['OIL, OLIVE', 'VINEGAR, RED WINE (see also VI..."
8,True,True,False,300,8.729441,3.227119,3.993750,0.0,0.265152,37.500000,0.868421,"[['SORREL', 'LETTUCE, BUTTER (aka BIBB or BOST...","[['SORREL', 'LEMONS'], ['SORREL', 'SHALLOTS'],...",[],"[['SORREL', 'OIL, OLIVE'], ['LETTUCE, ROMAINE'...",[],"['SORREL', 'LETTUCE, ROMAINE', 'LETTUCE, BUTTE...","['SORREL', 'LETTUCE, ROMAINE', 'LETTUCE, BUTTE...","['SORREL', 'DATES', 'SHALLOTS', 'LEMONS']","['OIL, OLIVE', 'VINEGAR, SHERRY', 'SALT, BLACK..."
9,False,True,False,300,8.828991,4.830769,2.181818,0.0,0.923913,19.736842,0.695122,"[['GREENS, DANDELION', 'SALT, TRUFFLE'], ['LET...","[['GREENS, DANDELION', 'CARROTS'], ['GREENS, D...","[['GREENS, DANDELION', 'CHEESE, PECORINO'], ['...","[['GREENS, DANDELION', 'VINEGAR, BALSAMIC'], [...",[],"['GREENS, DANDELION', 'LETTUCE, ROMAINE', 'CHE...","['GREENS, DANDELION', 'LETTUCE, ROMAINE']","['CHEESE, PECORINO', 'HAZELNUTS (aka FILBERTS)...","['OIL, COCONUT', 'VINEGAR, BALSAMIC', 'SALT, T..."


In [23]:
salad_greens = salad_data[salad_data['salad_green'] == 'y']

salad_extras = salad_data[salad_data['salad_extra'] == 'y']
salad_extra_veg = salad_data[(salad_data['veg'] == 'y') & (salad_data['salad_extra'] == 'y')]
salad_extra_fruits = salad_data[(salad_data['fruit'] == 'y') & (salad_data['salad_extra'] == 'y')]
salad_extra_nuts = salad_data[(salad_data['protein_seed'] == 'y') & (salad_data['salad_extra'] == 'y')]
salad_extra_seeds = salad_data[(salad_data['protein_nut'] == 'y') & (salad_data['salad_extra'] == 'y')]
salad_extra_tomatoes = salad_data[salad_data['salad_extra_tomato'] == 'y']
salad_extra_olives = salad_data[salad_data['salad_extra_olive'] == 'y']
salad_extra_cheeses = salad_data[salad_data['salad_extra_cheese'] == 'y']
salad_extra_eggs = salad_data[salad_data['salad_extra_egg'] == 'y']
salad_extra_croutons = salad_data[salad_data['salad_extra_crouton'] == 'y']

salad_dressing_oils = salad_data[salad_data['salad_dressing_oil'] == 'y']
salad_dressing_vinegars = salad_data[salad_data['salad_dressing_vinegar'] == 'y']
salad_dressing_salts = salad_data[salad_data['salad_dressing_salt'] == 'y']
salad_dressing_peppers = salad_data[salad_data['salad_dressing_pepper'] == 'y']
salad_dressing_garlics = salad_data[salad_data['salad_dressing_garlic'] == 'y']

In [24]:
# !pip install networkx

import networkx as nx

In [25]:
best_of = 300
for recipe_count in range(100):
    top_score = 0
    for i in range(best_of):
        n_subgraphs = 2
        while n_subgraphs > 1: # keep shuffling until you get a well connected graph
            n_greens = random.randrange(2, 4)
            n_extras = random.randrange(2, 6)
            n_dressing_oils = 1
            n_dressing_vinegars = 1
            n_dressing_salts = 1
            n_dressing_peppers = 1
            # n_dressing_garlics = random.randrange(0, 2) # maybe make presence dependent on the rest. or, just leave out for now.

            selected_greens = salad_greens.sample(n_greens)
            selected_extras = salad_extras.sample(n_extras)
            selected_dressing_oils = salad_dressing_oils.sample(n_dressing_oils)
            selected_dressing_vinegars = salad_dressing_vinegars.sample(n_dressing_vinegars)
            selected_dressing_salts = salad_dressing_salts.sample(n_dressing_salts)
            selected_dressing_peppers = salad_dressing_peppers.sample(n_dressing_peppers)
            selected_ingredients = selected_greens.append(selected_extras).append(selected_dressing_oils).append(selected_dressing_vinegars).append(selected_dressing_salts).append(selected_dressing_peppers)

            lower_category_pairs = []
            lower_direct_pairs = []
            upper_category_pairs = []
            upper_direct_pairs = []
            ingredients_list = selected_ingredients['name'].values.tolist()
            already_checked = []
            for ingredient_name in ingredients_list:
                for lc_name in pairing_data['lower_category_names'][pairing_data['name'] == ingredient_name].iloc[0]:
                    if lc_name in ingredients_list and not lc_name in already_checked:
                        lower_category_pairs.append([ingredient_name, lc_name])
                for ld_name in pairing_data['lower_direct_names'][pairing_data['name'] == ingredient_name].iloc[0]:
                    if ld_name in ingredients_list and not ld_name in already_checked:
                        lower_direct_pairs.append([ingredient_name, ld_name])
                for uc_name in pairing_data['upper_category_names'][pairing_data['name'] == ingredient_name].iloc[0]:
                    if uc_name in ingredients_list and not uc_name in already_checked:
                        upper_category_pairs.append([ingredient_name, uc_name])
                for ud_name in pairing_data['upper_direct_names'][pairing_data['name'] == ingredient_name].iloc[0]:
                    if ud_name in ingredients_list and not ud_name in already_checked:
                        upper_direct_pairs.append([ingredient_name, ud_name])
                already_checked.append(ingredient_name)

            lower_pairs = lower_category_pairs + lower_direct_pairs
            upper_pairs = upper_category_pairs + upper_direct_pairs
            all_pairs = lower_pairs + upper_pairs

    #         all_pairs_sp = [tuple(sorted(pair)) for pair in all_pairs]
    #         print(len(all_pairs_sp), len(list(set(all_pairs_sp))))

    #         print('INGREDIENTS', ingredients_list)
    #         print()
    #         print('LC PAIRS', lower_category_pairs)
    #         print()
    #         print()

            G = nx.Graph()
            G.add_nodes_from(selected_ingredients['name'].values.tolist())
            G.add_edges_from(all_pairs)
            n_subgraphs = len(list(nx.connected_component_subgraphs(G)))

        score = 0

    # SIMPLER ALTERNATIVE: bonus for proportion of actual pairs over possible pairs? could then combine with pair strength bonus?
    # PAIRING DENSITY BONUS ============================================================================================
        # ranges from roughly (.1 to 1) * 4
        average_shortest_path_length = nx.average_shortest_path_length(G)
        average_shortest_path_score = 2.5 / average_shortest_path_length - 1.1
    #     print(average_shortest_path_score)
        score += average_shortest_path_score * 4    

    # # UPPER PAIRING BONUS ==============================================================================================
    #     # ranges from roughly (.25 to 1) * 3
    #     upper_proportion_score = len(upper_pairs) / len(all_pairs) * 2.5 # messed with this, not sure if it still works
    # #     print(upper_proportion_score)
    # #     print(len(upper_pairs), len(lower_pairs), len(all_pairs))
    # #     print()
    #     score += upper_proportion_score * 3

    # PAIR STRENGTH BONUS ==============================================================================================
        # ranges from roughly (0 to 1) * 3

        # I'm thinking of 'lower category' as default, and awarding points for steps up from that
        ld_bonus = 1*len(lower_direct_pairs)
        uc_bonus = 2*len(upper_category_pairs)
        ud_bonus = 5*len(upper_direct_pairs)
        pair_strength_score = .6*(ld_bonus + uc_bonus + ud_bonus)/len(all_pairs) # otherwise would tend toward large salads
    #     print('LC', len(lower_category_pairs))
    #     print('LD', len(lower_direct_pairs))
    #     print('UC', len(upper_category_pairs))
    #     print('UD', len(upper_direct_pairs))
    #     print('SCORE', pair_strength_score)
    #     print()
        score += pair_strength_score * 3

    # important but easy to avoid, so not weighted too heavily
    # CLASH PENALTY ====================================================================================================
        # ranges from roughly (0 to 1) * -1.5
        all_clashing_pairs = []
        selected_ingredients_list = selected_ingredients['name'].values.tolist()
        for name in selected_ingredients_list:
            names_that_clash_with_name = clashes_with_data['all_clashes_with_names'][clashes_with_data['name'] == name].iloc[0]
            all_clashing_names = set(selected_ingredients_list).intersection(set(names_that_clash_with_name)) # selected names that clash with this selected name
            all_clashing_pairs += [tuple(sorted([name, all_clashing_name])) for all_clashing_name in all_clashing_names]

        all_clashing_pairs = list(set(all_clashing_pairs))
        all_clashing_pairs_score = len(list(all_clashing_pairs)) / 4
        score += len(all_clashing_pairs) * -1.5

    # # FRUIT BONUS ======================================================================================================
    #     # ranges from roughly (0 to 3) * .1
    #     n_fruit = len(selected_ingredients[selected_ingredients['fruit'] == 'y'])
    #     score += n_fruit * .1

    # # NUT SEED BONUS ===================================================================================================    
    #     # ranges from roughly (0 to 2) * .15
    #     n_nut_seed = len(selected_ingredients[selected_ingredients['protein_nut_seed'] == 'y'])
    #     score += n_nut_seed * .15

    # FLAVOR BALANCE BONUS =============================================================================================
        # ranges from roughly (0 to 1) * 1
        n_sweet_lower = len(selected_ingredients[selected_ingredients['sweet'] == 'y'])
        n_sweet_upper = len(selected_ingredients[selected_ingredients['sweet'] == 'Y'])
        n_salty_lower = len(selected_ingredients[selected_ingredients['salty'] == 'y'])
        n_salty_upper = len(selected_ingredients[selected_ingredients['salty'] == 'Y'])
        n_sour_lower = len(selected_ingredients[selected_ingredients['sour'] == 'y'])
        n_sour_upper = len(selected_ingredients[selected_ingredients['sour'] == 'Y'])
        n_savory_lower = len(selected_ingredients[selected_ingredients['savory'] == 'y'])
        n_savory_upper = len(selected_ingredients[selected_ingredients['savory'] == 'Y'])
        n_bitter_lower = len(selected_ingredients[selected_ingredients['bitter'] == 'y'])
        n_bitter_upper = len(selected_ingredients[selected_ingredients['bitter'] == 'Y'])
        n_spicy_lower = len(selected_ingredients[selected_ingredients['spicy'] == 'y'])
        n_spicy_upper = len(selected_ingredients[selected_ingredients['spicy'] == 'Y'])

        # each varies from roughly .5 to 1
        sweet_score = (n_sweet_lower/2 + n_sweet_upper)/5
        salty_score = (n_salty_lower/2 + n_salty_upper)*2/5
        sour_score = (n_sour_lower/2 + n_sour_upper)*2/5
        savory_score = (n_savory_lower/2 + n_savory_upper)*3/5
        bitter_score = (n_bitter_lower/2 + n_bitter_upper)*3/5
        spicy_score = (n_spicy_lower/2 + n_spicy_upper)*2/5

        flavor_balance_score = 5 / (1 + abs(1-sweet_score) + abs(1-salty_score) + abs(1-sour_score) + abs(1-savory_score) + abs(1-spicy_score)) - 1.25
    #     print(flavor_balance_score)

        score += flavor_balance_score
    #     print(sweet_score, salty_score, sour_score, savory_score, bitter_score, spicy_score)
    #     print()

    #     print(n_sweet_lower, n_sweet_upper)
    #     print(n_salty_lower, n_salty_upper)
    #     print(n_sour_lower, n_sour_upper)
    #     print(n_savory_lower, n_savory_upper)
    #     print(n_bitter_lower, n_bitter_upper)
    #     print(n_spicy_lower, n_spicy_upper)
    #     print()

    # TEXTURE BALANCE BONUS ============================================================================================
        # ranges from roughly (0 to 1) * .75
        n_crunchy_lower = len(selected_ingredients[selected_ingredients['salad_crunchy'] == 'y'])
        n_crunchy_upper = len(selected_ingredients[selected_ingredients['salad_crunchy'] == 'Y'])
        n_chewy_lower = len(selected_ingredients[selected_ingredients['salad_chewy'] == 'y'])
        n_chewy_upper = len(selected_ingredients[selected_ingredients['salad_chewy'] == 'Y'])
        n_juicy_lower = len(selected_ingredients[selected_ingredients['salad_juicy'] == 'y'])
        n_juicy_upper = len(selected_ingredients[selected_ingredients['salad_juicy'] == 'Y'])

        # each ranges from roughly 0 to 1
        crunchy_score = (n_crunchy_lower/2 + n_crunchy_upper)/3
        chewy_score = (n_chewy_lower/2 + n_chewy_upper)
        juicy_score = (n_juicy_lower/2 + n_juicy_upper)/3
    #     print(crunchy_score, chewy_score, juicy_score)

        texture_balance_score = 4 / (1 + abs(1-crunchy_score) + abs(1-chewy_score) + abs(1-juicy_score)) - 1
    #     print(texture_balance_score)
    #     print()

        score += texture_balance_score * .75

    # seems like it's hard to balance food groups on top of everything else. pity the scores aren't more independent
    # FOOD GROUP BALANCE BONUS =========================================================================================
        # ranges from roughly (25 to 1) * 2
        n_fruit = len(selected_ingredients[selected_ingredients['fruit'] == 'y'])
        n_veg = len(selected_ingredients[selected_ingredients['veg'] == 'y'])
        n_protein = len(selected_ingredients[selected_ingredients['protein'] == 'y'])

        # each varies from roughly 0 to 1 (sometimes a little over)
        fruit_score = n_fruit / 3
        veg_score = n_veg / 5
        protein_score = n_protein / 3
    #     print(fruit_score, veg_score, protein_score)

        food_group_balance_score = 3 / (1 + abs(1-fruit_score) + abs(1-veg_score) + abs(1-protein_score)) - .75
    #     print(food_group_balance_score)
    #     print()

        score += food_group_balance_score * 2

        if score > top_score:
            top_score = score
            top_food_group_balance_score = food_group_balance_score
            top_average_shortest_path_score = average_shortest_path_score
            top_flavor_balance_score = flavor_balance_score
            top_texture_balance_score = texture_balance_score
            top_all_clashing_pairs_score = all_clashing_pairs_score
            top_pair_strength_score = pair_strength_score
    #         top_upper_pairs = upper_pairs
    #         top_lower_pairs = lower_pairs
            top_lc_pairs = lower_category_pairs
            top_ld_pairs = lower_direct_pairs
            top_uc_pairs = upper_category_pairs
            top_ud_pairs = upper_direct_pairs
            top_selected_ingredients = selected_ingredients
            top_average_shortest_path_length = average_shortest_path_length
            top_all_clashing_pairs = all_clashing_pairs
    #         top_upper_proportion = len(upper_pairs) / (len(upper_pairs) + len(lower_pairs))
    # print('TOP AVG SHORTEST PATH LENGTH', top_average_shortest_path_length)
    # print('TOP UPPER PROPORTION', top_upper_proportion)
#     print('TOP AVERAGE SHORTEST PATH SCORE', top_average_shortest_path_score * 4)
#     print('TOP PAIR STRENGTH SCORE', top_pair_strength_score * 3)
#     print('TOP ALL CLASHING PAIRS SCORE', top_all_clashing_pairs_score * -1.5)
#     print('TOP FLAVOR BALANCE SCORE', top_flavor_balance_score)
#     print('TOP TEXTURE BALANCE SCORE', top_texture_balance_score * .75)
#     print('TOP FOOD GROUP BALANCE SCORE', top_food_group_balance_score * 2)
#     print('TOP_SCORE', top_score)  
    
    vegan = top_selected_ingredients['not_vegan'].sum() == ''
    gluten_free = top_selected_ingredients['gluten'].sum() == ''
    
    recipe_greens = top_selected_ingredients[top_selected_ingredients['salad_green'] == 'y']
    recipe_extras = top_selected_ingredients[top_selected_ingredients['salad_extra'] == 'y']
    recipe_dressing_oils = top_selected_ingredients[top_selected_ingredients['salad_dressing_oil'] == 'y']
    recipe_dressing_vinegars = top_selected_ingredients[top_selected_ingredients['salad_dressing_vinegar'] == 'y']
    recipe_dressing_salts = top_selected_ingredients[top_selected_ingredients['salad_dressing_salt'] == 'y']
    recipe_dressing_peppers = top_selected_ingredients[top_selected_ingredients['salad_dressing_pepper'] == 'y']
    recipe_dressing_garlics = top_selected_ingredients[top_selected_ingredients['salad_dressing_garlic'] == 'y']
    # could just select 'salad_dressing', but this includes garlics
    recipe_dressing = recipe_dressing_oils.append(recipe_dressing_vinegars).append(recipe_dressing_salts).append(recipe_dressing_peppers)

    new_recipe = pd.DataFrame({
        'vegan': [vegan],
        'gluten_free': [gluten_free],
        'basic': [salad_data_is_basic],
        'best_of': [best_of],
        'score': [top_score],
        'pairing_density_bonus': [top_average_shortest_path_score * 4],
        'pair_strength_bonus': [top_pair_strength_score * 3],
        'clash_penalty': [top_all_clashing_pairs_score * 4],
        'flavor_balance_bonus': [top_flavor_balance_score],
        'texture_balance_bonus': [top_texture_balance_score * 75],
        'food_group_balance_bonus': [top_food_group_balance_score * 2],
        'lc_pairs': [top_lc_pairs],
        'ld_pairs': [top_ld_pairs],
        'uc_pairs': [top_uc_pairs],
        'ud_pairs':[top_ud_pairs],
        'clashing_pairs': [top_all_clashing_pairs],
        'ingredient_names': [top_selected_ingredients['name'].values.tolist()],
        'leafy_green_names': [recipe_greens['name'].values.tolist()],
        'extra_names': [recipe_extras['name'].values.tolist()],
        'dressing_names': [recipe_dressing['name'].values.tolist()],
    })
    salad_recipe_data = salad_recipe_data.append(new_recipe, sort=False)
    print('SALAD RECIPE RECORDED. SCORE:', top_score)

SALAD RECIPE RECORDED. SCORE: 8.14895871392035


KeyboardInterrupt: 

In [None]:
from datetime import date
today = date.today()
date_string = f'{str(today.year)}_{str(today.month)}_{str(today.day)}'

salad_recipe_data.to_csv(os.path.join(root_path, 'DATA/salad_recipe_data_latest.csv'), index=False)
salad_recipe_data.to_csv(os.path.join(root_path, f'DATA/salad_recipe_data_{date_string}.csv'), index=False)

### 4.1.3. Displaying records

In [26]:
# !pip install pyvis

from pyvis import network as net

In [27]:
recipe = salad_recipe_data[(salad_recipe_data['basic'] == True) & (salad_recipe_data['vegan'] == True) & salad_recipe_data['gluten_free'] == True].sample(1).iloc[0]
recipe_net = net.Network(notebook=True)

nodes = recipe['ingredient_names']

nodes_color = []
for name in recipe['ingredient_names']:
    ingredient = salad_data[salad_data['name'] == name].iloc[0]
    if ingredient['salad_green'] == 'y':
        nodes_color.append('lightgreen')
    elif ingredient['salad_extra'] == 'y':
        if ingredient['veg'] == 'y':
            nodes_color.append('green')
        elif ingredient['fruit'] == 'y':
            nodes_color.append('orange')
        elif ingredient['protein_nut_seed'] == 'y':
            nodes_color.append('brown')
        else:
            nodes_color.append('lightblue')
    elif ingredient['salad_dressing'] == 'y':
        nodes_color.append('lightgrey')  
        
recipe_net.add_nodes(
    nodes=nodes,
    color=nodes_color
)

for pair in recipe['lc_pairs']:
    recipe_net.add_edge(pair[0], pair[1], physics=False, color='lightgrey')

for pair in recipe['ld_pairs']:
    recipe_net.add_edge(pair[0], pair[1], physics=False, color='grey')
    
for pair in recipe['uc_pairs']:
    recipe_net.add_edge(pair[0], pair[1], color='darkgrey')
    
for pair in recipe['ud_pairs']:
    recipe_net.add_edge(pair[0], pair[1], color='black')

if not recipe['vegan']:
    print('NOT VEGAN')
if not recipe['gluten_free']:
    print('CONTAINS GLUTEN')
print('SCORE:', recipe['score'])
recipe_net.show('recipe_net.html')

ValueError: a must be greater than 0

## 4.2. Random control

In [None]:
# ALGORITHM B (random control)
n_greens = random.randrange(1, 4)
n_extras = random.randrange(0, 10)
n_dressing_oils = 1
n_dressing_vinegars = 1
n_dressing_salts = 1
n_dressing_peppers = 1
# n_dressing_garlics = random.randrange(0, 2) # maybe make presence dependent on the rest. or, just leave out for now.

selected_greens = salad_greens.sample(n_greens)
selected_extras = salad_extras.sample(n_extras)
selected_dressing_oils = salad_dressing_oils.sample(n_dressing_oils)
selected_dressing_vinegars = salad_dressing_vinegars.sample(n_dressing_vinegars)
selected_dressing_salts = salad_dressing_salts.sample(n_dressing_salts)
selected_dressing_peppers = salad_dressing_peppers.sample(n_dressing_peppers)

selected_ingredients = selected_greens.append(selected_extras).append(selected_dressing_oils).append(selected_dressing_vinegars).append(selected_dressing_salts).append(selected_dressing_peppers)

upper_pairs = []
lower_pairs = []
for ingredient_name in selected_ingredients['name']:
    for pairs_with_name in pairing_data['upper_pairs_with_names'][pairing_data['name'] == ingredient_name].iloc[0]:
        if pairs_with_name in selected_ingredients['name'].values.tolist():
#             print('UPPER PAIR:', ingredient_name, 'WITH', pairs_with_name)
            upper_pairs.append([ingredient_name, pairs_with_name])
    for pairs_with_name in pairing_data['lower_pairs_with_names'][pairing_data['name'] == ingredient_name].iloc[0]:
        if pairs_with_name in selected_ingredients['name'].values.tolist():
#             print('LOWER PAIR:', ingredient_name, 'WITH', pairs_with_name)
            lower_pairs.append([ingredient_name, pairs_with_name])

score = (len(upper_pairs) * 3 + len(lower_pairs)) / (n_greens + n_extras)
print('RANDOM SCORE', score)



random_net = net.Network(notebook=True)

nodes = selected_ingredients['name'].tolist()

def get_color(row):
#     print(type(row))
#     print(row)
    if row['salad_green'] == 'y':
        return 'lightgreen'
    elif row['salad_extra'] == 'y':
        if row['veg'] == 'y':
            return 'green'
        elif row['fruit'] == 'y':
            return 'orange'
        elif row['protein_nut_seed'] == 'y':
            return 'brown'
        else:
            return 'lightblue'
    elif row['salad_dressing'] == 'y':
        return 'lightgrey'
    
nodes_color = selected_ingredients.apply(get_color, axis=1).tolist()

random_net.add_nodes(
    nodes=nodes,
    color=nodes_color
)


for pair in upper_pairs:
    random_net.add_edge(pair[0], pair[1], color='black')

for pair in lower_pairs:
    random_net.add_edge(pair[0], pair[1], physics=False, color='lightgrey')

random_net.show('selected_net.html')