# 0. Setup

In [1]:
# !pip install pandas==1.0.5

In [2]:
import pandas as pd
import os.path
import math
import re
import random

In [3]:
root_path = os.path.dirname(os.getcwd())
ingredients_data_raw = pd.read_csv(os.path.join(root_path, 'DATA/ingredients_data.csv'))
ingredients_data = ingredients_data_raw.replace(float('nan'), '')
salad_data_impractical = ingredients_data[ingredients_data['salad']=='y']
salad_data = salad_data_impractical[(salad_data_impractical['salad_umbrella'] != 'y') & (salad_data_impractical['redirect'] != 'y')]

salad_data_basic = salad_data[salad_data['salad_basic'] == 'y']
salad_data_current = salad_data[salad_data['2020_7_5'] == 'y']
salad_data_with_umbrella = ingredients_data[(ingredients_data['salad_umbrella'] != 'y') & (ingredients_data['salad'] == 'y')]
# salad_data = salad_data_basic
# salad_data = salad_data_current
# salad_data = salad_data_with_umbrella
salad_data_is_basic = False

salad_data.reset_index(inplace=True)

# 1. Matching "pairs with" terms to ingredient names

In [4]:
def get_terms_from_pairs_with(pairs_with):
    if str(pairs_with) == 'nan':
        return []
    else:
        return [term.strip() for term in pairs_with.split('\n\n') if term.strip() != '']

# break entries in column that has 'pairs with' strings into lists of ingredient terms
ingredient_pairs_with_terms = salad_data['pairs_with'].apply(get_terms_from_pairs_with)

In [5]:
# create list of all terms, ignoring case and excluding duplicates
all_terms = list(set(ingredient_pairs_with_terms.sum()))
all_terms_lower = list(set([term.lower() for term in ingredient_pairs_with_terms.sum()]))

In [6]:
# # !pip install inflect
# import inflect
# p = inflect.engine()

In [7]:
# def get_normal_tokens(phrase):
#     # should add and/or, but the data is good enough and I don't want to mess up the manual entry
#     tokens = [token.strip().lower() for token in re.split('\(|\)|,|e\.g\.|esp\.|and|—|or|aka|see|see also|;', phrase)]
# #     print(tokens)
#     tokens = [p.singular_noun(token) or token for token in tokens if token != '']
# #     print(tokens)
#     return tokens

# def get_score(name, term):
#     name_tokens = get_normal_tokens(name)
# #     print(name_tokens)
#     term_tokens = get_normal_tokens(term)
# #     print(term_tokens)
#     common_tokens = list(set(name_tokens).intersection(set(term_tokens)))
# #     print(common_tokens)
#     return len(common_tokens)

In [8]:
# # doing it this way so I can add 'print' to monitor progress
# score_data = []
# for name in salad_data['name']:
#     print(name)
#     score_data.append([get_score(name, term) for term in all_terms])

# term_name_scores = pd.DataFrame(score_data, columns = all_terms)
# term_name_scores['name'] = salad_data['name'].values

# term_name_scores.to_csv(os.path.join(root_path, 'DATA/term_name_scores_common.csv'), index=False)

# 2. Extracting "pairs with" data

In [9]:
# DELETE?
# term_name_matches_raw = pd.read_csv(os.path.join(root_path, 'DATA/term_name_matches.csv'))
# term_name_matches = term_name_matches_raw.replace(['0', '1', '2', '3', '4', '5', 0, 1, 2, 3, 4, 5, float('nan')], '')
# term_name_matches_lower = term_name_matches.replace('Y', 'y')

In [10]:
# DELETE?

# # create a dataframe with name and a list of pairing terms for each ingredient
# pairing_data = pd.DataFrame({
#     'name': salad_data['name'],
#     'pairs_with_terms': ingredient_pairs_with_terms
# })

# def get_pairs_with_names(row):
# #     print(row['name'], type(row['name']))
#     lower_names = []
#     upper_names = []
#     for term in row['pairs_with_terms']:
#         if term in term_name_matches.columns.values.tolist():
#             lower_names += term_name_matches[term_name_matches[term] == 'y']['name'].values.tolist()
#             upper_names += term_name_matches[term_name_matches[term] == 'Y']['name'].values.tolist()
#         else:
#             print('OH NO didnt find term:', term)
# #     for name in lower_names + upper_names:
# #         if name == row['name']:
# #             print('DUPLICATE')
# #     print(lower_names, upper_names)
#     row['lower_pairs_with_names'] = [lower_name for lower_name in lower_names if lower_name != row['name']]
#     row['upper_pairs_with_names'] = [upper_name for upper_name in upper_names if upper_name != row['name']]
#     row['all_pairs_with_names'] = lower_names + upper_names
#     return row

# pairing_data = pairing_data.apply(get_pairs_with_names, axis=1)

In [11]:
term_name_matches_raw = pd.read_csv(os.path.join(root_path, 'DATA/term_name_matches_specific.csv'))
term_name_matches = term_name_matches_raw.replace(['0', '1', '2', '3', '4', '5', 0, 1, 2, 3, 4, 5, float('nan')], '')

In [12]:
# create a dataframe with name and a list of pairing terms for each ingredient
# takes a minute
pairing_data = pd.DataFrame({
    'name': salad_data['name'],
    'pairs_with_terms': ingredient_pairs_with_terms
})

def get_pairs_with_names(row):
#     print(row['name'], type(row['name']))
    lower_category_names = []
    lower_direct_names = []
    upper_category_names = []
    upper_direct_names = []
    for term in row['pairs_with_terms']:
        if term in term_name_matches.columns.values.tolist():
            lower_category_names += term_name_matches[term_name_matches[term] == 'c']['name'].values.tolist()
            lower_direct_names += term_name_matches[term_name_matches[term] == 'd']['name'].values.tolist()
            upper_category_names += term_name_matches[term_name_matches[term] == 'C']['name'].values.tolist()
            upper_direct_names += term_name_matches[term_name_matches[term] == 'D']['name'].values.tolist()
        else:
            pass
#             print('OH NO didnt find term:', term)

    # still needed?
    row['lower_category_pairs_with_names'] = list(set([lower_category_name for lower_category_name in lower_category_names if lower_category_name != row['name']]))
    row['lower_direct_pairs_with_names'] = list(set([lower_direct_name for lower_direct_name in lower_direct_names if lower_direct_name != row['name']]))
    row['upper_category_pairs_with_names'] = list(set([upper_category_name for upper_category_name in upper_category_names if upper_category_name != row['name']]))
    row['upper_direct_pairs_with_names'] = list(set([upper_direct_name for upper_direct_name in upper_direct_names if upper_direct_name != row['name']]))
    row['lower_pairs_with_names'] = list(set(row['lower_category_pairs_with_names'] + row['lower_direct_pairs_with_names']))
    row['upper_pairs_with_names'] = list(set(row['upper_category_pairs_with_names'] + row['upper_direct_pairs_with_names']))
    row['all_pairs_with_names'] = list(set(row['lower_pairs_with_names'] + row['upper_pairs_with_names']))
    
    # still needed?
    row['lc_sorted_pairs'] = tuple(sorted(row['lower_category_pairs_with_names']))
    row['ld_sorted_pairs'] = tuple(sorted(row['lower_direct_pairs_with_names']))
    row['uc_sorted_pairs'] = tuple(sorted(row['upper_category_pairs_with_names']))
    row['ud_sorted_pairs'] = tuple(sorted(row['upper_direct_pairs_with_names']))
    row['l_sorted_pairs'] = tuple(sorted(row['lower_pairs_with_names']))
    row['u_sorted_pairs'] = tuple(sorted(row['upper_pairs_with_names']))
    row['a_sorted_pairs'] = tuple(sorted(row['all_pairs_with_names']))
    return row

pairing_data = pairing_data.apply(get_pairs_with_names, axis=1)

In [13]:
# CREATE PAIRING DATA MATRIX (names x names)
# takes a few minutes

pairing_data = pd.DataFrame({
    'name': salad_data['name'],
    'pairs_with_terms': ingredient_pairs_with_terms
})

for name in salad_data['name']:
    pairing_data[name] = pd.Series(['']*len(salad_data['name']))

def get_pairs_with_names(row):
#     print(row['name'], type(row['name']))
    lower_category_names = []
    lower_direct_names = []
    upper_category_names = []
    upper_direct_names = []
    for term in row['pairs_with_terms']:
        if term in term_name_matches.columns.values.tolist():
            lower_category_names += term_name_matches[term_name_matches[term] == 'c']['name'].values.tolist()
            lower_direct_names += term_name_matches[term_name_matches[term] == 'd']['name'].values.tolist()
            upper_category_names += term_name_matches[term_name_matches[term] == 'C']['name'].values.tolist()
            upper_direct_names += term_name_matches[term_name_matches[term] == 'D']['name'].values.tolist()
        else:
            pass
#             print('OH NO didnt find term:', term)
    
    for lower_category_name in lower_category_names:
        row[lower_category_name] = 'c'
    for lower_direct_name in lower_direct_names:
        row[lower_direct_name] = 'd'
    for upper_category_name in upper_category_names:
        row[upper_category_name] = 'C'
    for upper_direct_name in upper_direct_names:
        row[upper_direct_name] = 'D'

    return row

pairing_data = pairing_data.apply(get_pairs_with_names, axis=1)
pairing_data.replace(float('nan'), '', inplace=True)

In [14]:
# SYNC PAIRING DATA MATRIX (make sure [a][b] agrees with [b][a])

for index_1, name_1 in enumerate(pairing_data['name'].values.tolist()):
    for index_2, name_2 in enumerate(pairing_data['name'].values.tolist()):
        value_1 = pairing_data[name_1][index_2]
        value_2 = pairing_data[name_2][index_1]
        
        if name_1 == name_2:
            proper_value = ''
        elif value_1 == 'D' or value_2 == 'D':
            proper_value = 'D'
        elif value_1 == 'C' or value_2 == 'C':
            proper_value = 'C'
        elif value_1 == 'd' or value_2 == 'd':
            proper_value = 'd'
        elif value_1 == 'c' or value_2 == 'c':
            proper_value = 'c'
        else:
            proper_value = ''
        
        pairing_data[name_1][index_2] = proper_value
        pairing_data[name_2][index_1] = proper_value

In [15]:
# REPRESENT PAIRING DATA AS LISTS

def get_pairs_with_names(row):
    lower_category_name = pairing_data[pairing_data[row['name']] == 'c']['name'].values.tolist()
    lower_direct_name = pairing_data[pairing_data[row['name']] == 'd']['name'].values.tolist()
    upper_category_name = pairing_data[pairing_data[row['name']] == 'C']['name'].values.tolist()
    upper_direct_name = pairing_data[pairing_data[row['name']] == 'D']['name'].values.tolist()
    
    row['lower_category_names'] = pairing_data[pairing_data[row['name']] == 'c']['name'].values.tolist()
    row['lower_direct_names'] = pairing_data[pairing_data[row['name']] == 'd']['name'].values.tolist()
    row['upper_category_names'] = pairing_data[pairing_data[row['name']] == 'C']['name'].values.tolist()
    row['upper_direct_names'] = pairing_data[pairing_data[row['name']] == 'D']['name'].values.tolist()
    row['lower_names'] = row['lower_category_names'] + row['lower_direct_names']
    row['upper_names'] = row['upper_category_names'] + row['upper_direct_names']
    row['all_names'] = row['lower_names'] + row['upper_names']
    
#     row['lower_category_pairs_with_names'] = list(set([lower_category_name for lower_category_name in row['lower_category_names'] if lower_category_name != row['name']]))
#     row['lower_direct_pairs_with_names'] = list(set([lower_direct_name for lower_direct_name in row['lower_direct_names'] if lower_direct_name != row['name']]))
#     row['upper_category_pairs_with_names'] = list(set([upper_category_name for upper_category_name in row['upper_category_names'] if upper_category_name != row['name']]))
#     row['upper_direct_pairs_with_names'] = list(set([upper_direct_name for upper_direct_name in row['upper_direct_names'] if upper_direct_name != row['name']]))
#     row['lower_pairs_with_names'] = list(set(row['lower_category_pairs_with_names'] + row['lower_direct_pairs_with_names']))
#     row['upper_pairs_with_names'] = list(set(row['upper_category_pairs_with_names'] + row['upper_direct_pairs_with_names']))
#     row['all_pairs_with_names'] = list(set(row['lower_pairs_with_names'] + row['upper_pairs_with_names']))
    
    row['lc_sorted_pairs'] = [tuple(sorted((row['name'], other_name,))) for other_name in row['lower_category_names']]
    row['ld_sorted_pairs'] = [tuple(sorted((row['name'], other_name,))) for other_name in row['lower_direct_names']]
    row['uc_sorted_pairs'] = [tuple(sorted((row['name'], other_name,))) for other_name in row['upper_category_names']]
    row['ud_sorted_pairs'] = [tuple(sorted((row['name'], other_name,))) for other_name in row['upper_direct_names']]
    row['l_sorted_pairs'] = [tuple(sorted((row['name'], other_name,))) for other_name in row['lower_names']]
    row['u_sorted_pairs'] = [tuple(sorted((row['name'], other_name,))) for other_name in row['upper_names']]
    row['a_sorted_pairs'] = [tuple(sorted((row['name'], other_name,))) for other_name in row['all_names']]
    
    return row

pairing_data = pairing_data.apply(get_pairs_with_names, axis=1)

In [16]:
pairing_data.to_pickle(os.path.join(root_path, 'DATA/pairing_data.pickle'))
# pairing_data = pd.read_pickle(os.path.join(root_path, 'DATA/pairing_data.pickle'))

# 3. Creating "clashes with" data

In [17]:
# name_name_clashes_blank = salad_data[['name', 'protein_cheese_sub', 'salad_allium', 'fruit', 'veg']].copy()
# names = salad_data['name'].values.tolist()

# for i, col_name in enumerate(names):
#     name_name_clashes_blank[col_name] = pd.Series(['x']*(i+1) + ['']*(len(names)-(i+1)))

# name_name_clashes_blank.to_csv(os.path.join(root_path, 'DATA/name_name_clashes_blank.csv'), index=False)

In [18]:
name_name_clashes_input = pd.read_csv(os.path.join(root_path, 'DATA/name_name_clashes_input.csv'))
name_name_clashes_input = name_name_clashes_input.replace([float('nan'), 'x'], '')
columns = ['name'] + name_name_clashes_input['name'].values.tolist()
name_name_clashes_input = name_name_clashes_input[columns]

def get_clashes_with_data(row):
    data = pd.Series([])
#     print(row.index.values.tolist())
    name = row['name']
    data['name'] = name
    
    lower_names = name_name_clashes_input['name'][name_name_clashes_input[name] == 'y'].values.tolist()
    upper_names = name_name_clashes_input['name'][name_name_clashes_input[name] == 'Y'].values.tolist()
    if 'y' in lower_names:
        print(lower_names)
    
    for name in name_name_clashes_input['name']:
#         if name in ['y', 'Y']:
#             print(name)
        if row[name] == 'y':
            lower_names.append(name)
        elif row[name] == 'Y':
            upper_names.append(name)
    
    lower_names = list(set(lower_names))
    upper_names = list(set(upper_names))
    
    data['lower_clashes_with_names'] = lower_names
    data['upper_clashes_with_names'] = upper_names
    data['all_clashes_with_names'] = list(set(lower_names + upper_names)) # shouldn't be overlap here, but hey
    
    data['lower_clashes_with_pairs'] = [tuple(sorted([name, lower_name])) for lower_name in data['lower_clashes_with_names']]
    data['upper_clashes_with_pairs'] = [tuple(sorted([name, lower_name])) for lower_name in data['upper_clashes_with_names']]
    data['all_clashes_with_pairs'] = [tuple(sorted([name, lower_name])) for lower_name in data['all_clashes_with_names']]
    
    return data

clashes_with_data = name_name_clashes_input.apply(get_clashes_with_data, axis=1)
            
# # for row in name_name_clashes_input.iterrows():
# clashes_with_data = pd.DataFrame({'name': salad_data['name']})
# for i, row in clashes_with_data.iterrows():
# #     print(row, type(row))
#     name = row['name']
#     row['lower_clashes_with'] = name_name_clashes_input[name][name_name_clashes_input[name] == 'y']
#     row['upper_clashes_with'] = name_name_clashes_input[name][name_name_clashes_input[name] == 'Y']
#     row['all_clashes_with'] = row['lower_clashes_with'] + row['upper_clashes_with']

  import sys


In [19]:
# SYNC PAIRING DATA MATRIX (make sure [a][b] agrees with [b][a])

name_name_clashes_synced = name_name_clashes_input.copy()

for index_1, name_1 in enumerate(name_name_clashes_input['name'].values.tolist()):
    for index_2, name_2 in enumerate(name_name_clashes_input['name'].values.tolist()):
        value_1 = name_name_clashes_input[name_1][index_2]
        value_2 = name_name_clashes_input[name_2][index_1]
        
        if name_1 == name_2:
            proper_value = ''
        elif value_1 == 'Y' or value_2 == 'Y':
            proper_value = 'Y'
        elif value_1 == 'y' or value_2 == 'y':
            proper_value = 'y'
        else:
            proper_value = ''
        
        name_name_clashes_synced[name_1][index_2] = proper_value
        name_name_clashes_synced[name_2][index_1] = proper_value
        


In [20]:
# add name_name_clashes_synced to clashes_with_data
clashes_with_data[name_name_clashes_synced['name'].tolist()] = name_name_clashes_synced[name_name_clashes_synced['name'].tolist()].copy()

In [21]:
# all_lower_clashes_with_pairs = list(set(clashes_with_data['lower_clashes_with_pairs'].sum()))
# all_upper_clashes_with_pairs = list(set(clashes_with_data['upper_clashes_with_pairs'].sum()))
# all_clashes_with_pairs = list(set(clashes_with_data['all_clashes_with_pairs'].sum()))

In [22]:
clashes_with_data.to_pickle(os.path.join(root_path, 'DATA/clashes_with_data.pickle'))
# clashes_with_data = pd.read_pickle(os.path.join(root_path, 'DATA/clashes_with_data.pickle'))

# 4. Creating connection data

In [23]:
# CREATING CONNECTION DATA AND ADDING IT TO SALAD DATA (& getting salad_data in order)
# takes a minute
salad_names = salad_data['name'].values.tolist()
# for name in salad_names:
#     thing[name] = pd.Series([''] * len(salad_data))
# thing

for col_name in salad_names:
    col_values = []
    for row_name in salad_names:
        value_1 = name_name_clashes_input[name_1][index_2]
        value_2 = name_name_clashes_input[name_2][index_1]
        pairing_value = pairing_data[col_name][pairing_data['name'] == row_name].iloc[0]
        clashing_value = name_name_clashes_synced[col_name][name_name_clashes_synced['name'] == row_name].iloc[0]

        if clashing_value == 'Y':
            col_values.append('N')
        elif clashing_value == 'y':
            col_values.append('n')
        elif pairing_value == 'D':
            col_values.append('D')
        elif pairing_value == 'C':
            col_values.append('C')
        elif pairing_value == 'd':
            col_values.append('d')
        elif pairing_value == 'c':
            col_values.append('c')
        else:
            col_values.append('')

    salad_data[col_name] = pd.Series(col_values)
    
# just in case
salad_data.sort_values('name', inplace=True)
salad_data.reset_index(drop=True, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


# 5. Salad recipe generators

In [23]:
salad_greens = salad_data[salad_data['salad_green'] == 'y']

salad_extras = salad_data[salad_data['salad_extra'] == 'y']
salad_extra_veg = salad_data[(salad_data['veg'] == 'y') & (salad_data['salad_extra'] == 'y')]
salad_extra_fruits = salad_data[(salad_data['fruit'] == 'y') & (salad_data['salad_extra'] == 'y')]
salad_extra_nuts_seeds = salad_data[(salad_data['protein_nut_seed'] == 'y') & (salad_data['salad_extra'] == 'y')]
salad_extra_others = salad_data[(salad_data['protein_nut_seed'] != 'y') & (salad_data['fruit'] != 'y') & (salad_data['veg'] != 'y') & (salad_data['salad_extra'] == 'y')]
salad_extra_nuts = salad_data[(salad_data['protein_seed'] == 'y') & (salad_data['salad_extra'] == 'y')]
salad_extra_seeds = salad_data[(salad_data['protein_nut'] == 'y') & (salad_data['salad_extra'] == 'y')]
salad_extra_tomatoes = salad_data[salad_data['salad_extra_tomato'] == 'y']
salad_extra_olives = salad_data[salad_data['salad_extra_olive'] == 'y']
salad_extra_cheeses = salad_data[salad_data['salad_extra_cheese'] == 'y']
salad_extra_eggs = salad_data[salad_data['salad_extra_egg'] == 'y']
salad_extra_croutons = salad_data[salad_data['salad_extra_crouton'] == 'y']

salad_dressing_oils = salad_data[salad_data['salad_dressing_oil'] == 'y']
salad_dressing_vinegars = salad_data[salad_data['salad_dressing_vinegar'] == 'y']
salad_dressing_salts = salad_data[salad_data['salad_dressing_salt'] == 'y']
salad_dressing_peppers = salad_data[salad_data['salad_dressing_pepper'] == 'y']
salad_dressing_garlics = salad_data[salad_data['salad_dressing_garlic'] == 'y']

In [24]:
salad_data[salad_data['2020_7_5'] == 'y']['name']

0                                   ARUGULA (aka ROCKET)
3           LETTUCE, BUTTER (aka BIBB or BOSTON LETTUCE)
17     ALMONDS (and UNSWEETENED ALMOND BUTTER; see al...
19     APPLES (and APPLE CIDER, APPLE JUICE and/or AP...
22                                               BANANAS
23                                                 BEETS
24                     BELL PEPPERS—IN GENERAL, or MIXED
29                                              BROCCOLI
30           BROCCOLI RABE (aka BROCCOLI RAAB or RAPINI)
31                                            BROCCOLINI
34                                          CABBAGE, RED
35                                               CARROTS
36                         CASHEWS and CASHEW NUT BUTTER
37                                           CAULIFLOWER
38                                                CELERY
48                                                CHIVES
50     CILANTRO (aka CHINESE PARSLEY or FRESH CORIAND...
56                             

In [25]:
print('GREENS', len(salad_greens))
print('EXTRAS', len(salad_extras))
print('VEG', len(salad_extra_veg))
print('FRUIT', len(salad_extra_fruits))
print('NUTS n SEEDS', len(salad_extra_nuts_seeds))
print('OTHERS', len(salad_extra_others))
print('OILS', len(salad_dressing_oils))
print('VINEGARS', len(salad_dressing_vinegars))
print('SALTS', len(salad_dressing_salts))
print('PEPPERS', len(salad_dressing_peppers))

GREENS 15
EXTRAS 114
VEG 47
FRUIT 38
NUTS n SEEDS 17
OTHERS 12
OILS 4
VINEGARS 7
SALTS 6
PEPPERS 4


In [26]:
salad_extra_others['name']

15                                      “CHEESE,” VEGAN
27                                  BREAD CRUMBS, PANKO
28                            BREAD CRUMBS, WHOLE-GRAIN
39                                      CHEESE, CHEDDAR
40                                         CHEESE, FETA
41                                         CHEESE, GOAT
42                                     CHEESE, PARMESAN
43                                     CHEESE, PECORINO
44                            CHEESE, SMOKED MOZZARELLA
51    COCONUT, COCONUT CREAM, and COCONUT MILK (see ...
57                                    EGGS, HARD-BOILED
86      NUTRITIONAL YEAST (aka affectionately as NOOCH)
Name: name, dtype: object

## 5.1. Best of many

### 5.1.1. Generator

In [390]:
# !pip install networkx
# !pip install pyvis

import networkx as nx
from pyvis import network as net

In [391]:
clashes_with_data

Unnamed: 0,name,lower_clashes_with_names,upper_clashes_with_names,all_clashes_with_names,lower_clashes_with_pairs,upper_clashes_with_pairs,all_clashes_with_pairs
0,CHIVES,"[GARLIC, CHERRIES, SOUR and SWEET, BANANAS, BL...","[PEARS—IN GENERAL, MELON, CANTALOUPE, GRAPEFRU...","[PEARS—IN GENERAL, MELON, CANTALOUPE, BANANAS,...","[(GARLIC, WATERMELON), (CHERRIES, SOUR and SWE...","[(PEARS—IN GENERAL, WATERMELON), (MELON, CANTA...","[(PEARS—IN GENERAL, WATERMELON), (MELON, CANTA..."
1,"CHIVES, GARLIC (aka CHINESE CHIVES)","[GARLIC, CHERRIES, SOUR and SWEET, BANANAS, BL...","[PEARS—IN GENERAL, MELON, CANTALOUPE, GRAPEFRU...","[PEARS—IN GENERAL, MELON, CANTALOUPE, BANANAS,...","[(GARLIC, WATERMELON), (CHERRIES, SOUR and SWE...","[(PEARS—IN GENERAL, WATERMELON), (MELON, CANTA...","[(PEARS—IN GENERAL, WATERMELON), (MELON, CANTA..."
2,GARLIC,"[CHIVES, GARLIC (aka CHINESE CHIVES), CHIVES]","[PEARS—IN GENERAL, MELON, CANTALOUPE, BANANAS,...","[PEARS—IN GENERAL, MELON, CANTALOUPE, BANANAS,...","[(CHIVES, GARLIC (aka CHINESE CHIVES), WATERME...","[(PEARS—IN GENERAL, WATERMELON), (MELON, CANTA...","[(PEARS—IN GENERAL, WATERMELON), (MELON, CANTA..."
3,GARLIC SCAPES,[],"[PEARS—IN GENERAL, MELON, CANTALOUPE, BANANAS,...","[PEARS—IN GENERAL, MELON, CANTALOUPE, BANANAS,...",[],"[(PEARS—IN GENERAL, WATERMELON), (MELON, CANTA...","[(PEARS—IN GENERAL, WATERMELON), (MELON, CANTA..."
4,"GARLIC, BLACK",[],"[PEARS—IN GENERAL, MELON, CANTALOUPE, BANANAS,...","[PEARS—IN GENERAL, MELON, CANTALOUPE, BANANAS,...",[],"[(PEARS—IN GENERAL, WATERMELON), (MELON, CANTA...","[(PEARS—IN GENERAL, WATERMELON), (MELON, CANTA..."
5,"GARLIC, GREEN (aka BABY GARLIC or SPRING GARLIC)",[],"[PEARS—IN GENERAL, MELON, CANTALOUPE, BANANAS,...","[PEARS—IN GENERAL, MELON, CANTALOUPE, BANANAS,...",[],"[(PEARS—IN GENERAL, WATERMELON), (MELON, CANTA...","[(PEARS—IN GENERAL, WATERMELON), (MELON, CANTA..."
6,LEEKS,[],"[PEARS—IN GENERAL, MELON, CANTALOUPE, BANANAS,...","[PEARS—IN GENERAL, MELON, CANTALOUPE, BANANAS,...",[],"[(PEARS—IN GENERAL, WATERMELON), (MELON, CANTA...","[(PEARS—IN GENERAL, WATERMELON), (MELON, CANTA..."
7,"ONIONS, CIPOLLINI",[],"[PEARS—IN GENERAL, MELON, CANTALOUPE, BANANAS,...","[PEARS—IN GENERAL, MELON, CANTALOUPE, BANANAS,...",[],"[(PEARS—IN GENERAL, WATERMELON), (MELON, CANTA...","[(PEARS—IN GENERAL, WATERMELON), (MELON, CANTA..."
8,"ONIONS, PEARL",[],"[PEARS—IN GENERAL, MELON, CANTALOUPE, BANANAS,...","[PEARS—IN GENERAL, MELON, CANTALOUPE, BANANAS,...",[],"[(PEARS—IN GENERAL, WATERMELON), (MELON, CANTA...","[(PEARS—IN GENERAL, WATERMELON), (MELON, CANTA..."
9,"ONIONS, RED (see also ONIONS)",[],"[PEARS—IN GENERAL, MELON, CANTALOUPE, BANANAS,...","[PEARS—IN GENERAL, MELON, CANTALOUPE, BANANAS,...",[],"[(PEARS—IN GENERAL, WATERMELON), (MELON, CANTA...","[(PEARS—IN GENERAL, WATERMELON), (MELON, CANTA..."


In [399]:
#IDEAS
    # savory v sweet templates (avoid eggs/mushroom/hard veg w fruit...)
    # get eg working, weight it as 2
    # start out w 1 base salad, extra (randomly selected from vip list), then branch out
    # control for eg lots of types of citrus, alliums
    # mark strong flavors, treat them separately
    # select main ingredients from each category that go with each other, then branch out from each, weighting traditionally at the end
    # work off of pairs_with data for categories, while at same time picking categories then within categories (to account for eg allium bias)
    # *only* match eg.s with specifically named; also, consider not matching categories the weird way
    # add a bonus if category of ingredient pairs well with other ingredients in salad (meh)
    # add in dressing garlics, when applicable
    # CONSIDER
        # replacing average net dist with something like, edges/possible edges

# TODO
    # add dried cranberries to salad stuffs?
    # maybe some other dried fruit? maybe not.
    # create generator that selects main ingredient(s) then draws from strong connections
        # then ranks?
    # increase emphasis on connection density
    # make connection density dependent on strength of connection
    
# UI PRIORITIES
    # speed
    # translate names to common (e.g. ALLMONDS (AND UNSWEETENED ALMOND BUTTER) -> Almonds, sliced)
    # maybe consisder one of those spokes-of-wheel charts to display stats/scores
    
# RECIPE DETAILS
    # maybe suggest diluting e.g. sesame oil w regular olive oil
    
# ==================================================================================================================
# ==================================================================================================================

# ALGORITHM A (best of many)
top_score = 0
for i in range(300):
#     print('ITERATION', i)
    n_subgraphs = 2
    while n_subgraphs > 1: # keep shuffling until you get a well connected graph
#         print('TRYING FOR NO EXTRA SUBGRAPHS')
        n_greens = 2#random.randrange(2, 4)
        n_extras = random.randrange(2, 6)
        n_dressing_oils = 1
        n_dressing_vinegars = 1
        n_dressing_salts = 1
        n_dressing_peppers = 1
        # n_dressing_garlics = random.randrange(0, 2) # maybe make presence dependent on the rest. or, just leave out for now.
        
        selected_greens = salad_greens.sample(n_greens)
        selected_extras = salad_extras.sample(n_extras)
        selected_dressing_oils = salad_dressing_oils.sample(n_dressing_oils)
        selected_dressing_vinegars = salad_dressing_vinegars.sample(n_dressing_vinegars)
        selected_dressing_salts = salad_dressing_salts.sample(n_dressing_salts)
        selected_dressing_peppers = salad_dressing_peppers.sample(n_dressing_peppers)
        selected_ingredients = selected_greens.append(selected_extras).append(selected_dressing_oils).append(selected_dressing_vinegars).append(selected_dressing_salts).append(selected_dressing_peppers)

        lower_category_pairs = []
        lower_direct_pairs = []
        upper_category_pairs = []
        upper_direct_pairs = []
        ingredients_list = selected_ingredients['name'].values.tolist()
        already_checked = []
        for ingredient_name in ingredients_list:
#             print('ingredient_name', ingredient_name)
            for lc_name in pairing_data['lower_category_names'][pairing_data['name'] == ingredient_name].iloc[0]:
#                 print(lc_name)
                if lc_name in ingredients_list and not lc_name in already_checked:
                    lower_category_pairs.append([ingredient_name, lc_name])
            for ld_name in pairing_data['lower_direct_names'][pairing_data['name'] == ingredient_name].iloc[0]:
                if ld_name in ingredients_list and not ld_name in already_checked:
                    lower_direct_pairs.append([ingredient_name, ld_name])
            for uc_name in pairing_data['upper_category_names'][pairing_data['name'] == ingredient_name].iloc[0]:
                if uc_name in ingredients_list and not uc_name in already_checked:
                    upper_category_pairs.append([ingredient_name, uc_name])
            for ud_name in pairing_data['upper_direct_names'][pairing_data['name'] == ingredient_name].iloc[0]:
                if ud_name in ingredients_list and not ud_name in already_checked:
                    upper_direct_pairs.append([ingredient_name, ud_name])
            already_checked.append(ingredient_name)

        lower_pairs = lower_category_pairs + lower_direct_pairs
        upper_pairs = upper_category_pairs + upper_direct_pairs
        all_pairs = lower_pairs + upper_pairs
#         print(lower_pairs, upper_pairs, all_pairs)
        
#         all_pairs_sp = [tuple(sorted(pair)) for pair in all_pairs]
#         print(len(all_pairs_sp), len(list(set(all_pairs_sp))))

#         print('INGREDIENTS', ingredients_list)
#         print()
#         print('LC PAIRS', lower_category_pairs)
#         print()
#         print()

        G = nx.Graph()
        G.add_nodes_from(selected_ingredients['name'].values.tolist())
        G.add_edges_from(all_pairs)
        n_subgraphs = len(list(nx.connected_component_subgraphs(G)))
#         print(G.edges())
#         print(n_subgraphs)
            
    score = 0

# SIMPLER ALTERNATIVE: bonus for proportion of actual pairs over possible pairs? could then combine with pair strength bonus?
# PAIRING DENSITY BONUS ============================================================================================
    # ranges from roughly (.1 to 1) * 4
    average_shortest_path_length = nx.average_shortest_path_length(G)
    average_shortest_path_score = 2.5 / average_shortest_path_length - 1.1
#     print(average_shortest_path_score)
    score += average_shortest_path_score * 4    
    
# # UPPER PAIRING BONUS ==============================================================================================
#     # ranges from roughly (.25 to 1) * 3
#     upper_proportion_score = len(upper_pairs) / len(all_pairs) * 2.5 # messed with this, not sure if it still works
# #     print(upper_proportion_score)
# #     print(len(upper_pairs), len(lower_pairs), len(all_pairs))
# #     print()
#     score += upper_proportion_score * 3

# PAIR STRENGTH BONUS ==============================================================================================
    # ranges from roughly (0 to 1) * 3

    # I'm thinking of 'lower category' as default, and awarding points for steps up from that
    ld_bonus = 1*len(lower_direct_pairs)
    uc_bonus = 2*len(upper_category_pairs)
    ud_bonus = 5*len(upper_direct_pairs)
    pair_strength_score = .6*(ld_bonus + uc_bonus + ud_bonus)/len(all_pairs) # otherwise would tend toward large salads
#     print('LC', len(lower_category_pairs))
#     print('LD', len(lower_direct_pairs))
#     print('UC', len(upper_category_pairs))
#     print('UD', len(upper_direct_pairs))
#     print('SCORE', pair_strength_score)
#     print()
    score += pair_strength_score * 3

# important but easy to avoid, so not weighted too heavily
# CLASH PENALTY ====================================================================================================
    # ranges from roughly (0 to 1) * -1.5
    all_clashing_pairs = []
    selected_ingredients_list = selected_ingredients['name'].values.tolist()
    for name in selected_ingredients_list:
        names_that_clash_with_name = clashes_with_data['all_clashes_with_names'][clashes_with_data['name'] == name].iloc[0]
        all_clashing_names = set(selected_ingredients_list).intersection(set(names_that_clash_with_name)) # selected names that clash with this selected name
        all_clashing_pairs += [tuple(sorted([name, all_clashing_name])) for all_clashing_name in all_clashing_names]

    all_clashing_pairs = list(set(all_clashing_pairs))
    all_clashing_pairs_score = len(list(all_clashing_pairs)) / 4
    score += len(all_clashing_pairs) * -1.5

# # FRUIT BONUS ======================================================================================================
#     # ranges from roughly (0 to 3) * .1
#     n_fruit = len(selected_ingredients[selected_ingredients['fruit'] == 'y'])
#     score += n_fruit * .1
    
# # NUT SEED BONUS ===================================================================================================    
#     # ranges from roughly (0 to 2) * .15
#     n_nut_seed = len(selected_ingredients[selected_ingredients['protein_nut_seed'] == 'y'])
#     score += n_nut_seed * .15

# FLAVOR BALANCE BONUS =============================================================================================
    # ranges from roughly (0 to 1) * 1
    n_sweet_lower = len(selected_ingredients[selected_ingredients['sweet'] == 'y'])
    n_sweet_upper = len(selected_ingredients[selected_ingredients['sweet'] == 'Y'])
    n_salty_lower = len(selected_ingredients[selected_ingredients['salty'] == 'y'])
    n_salty_upper = len(selected_ingredients[selected_ingredients['salty'] == 'Y'])
    n_sour_lower = len(selected_ingredients[selected_ingredients['sour'] == 'y'])
    n_sour_upper = len(selected_ingredients[selected_ingredients['sour'] == 'Y'])
    n_savory_lower = len(selected_ingredients[selected_ingredients['savory'] == 'y'])
    n_savory_upper = len(selected_ingredients[selected_ingredients['savory'] == 'Y'])
    n_bitter_lower = len(selected_ingredients[selected_ingredients['bitter'] == 'y'])
    n_bitter_upper = len(selected_ingredients[selected_ingredients['bitter'] == 'Y'])
    n_spicy_lower = len(selected_ingredients[selected_ingredients['spicy'] == 'y'])
    n_spicy_upper = len(selected_ingredients[selected_ingredients['spicy'] == 'Y'])
    
    # each varies from roughly .5 to 1
    sweet_score = (n_sweet_lower/2 + n_sweet_upper)/5
    salty_score = (n_salty_lower/2 + n_salty_upper)*2/5
    sour_score = (n_sour_lower/2 + n_sour_upper)*2/5
    savory_score = (n_savory_lower/2 + n_savory_upper)*3/5
    bitter_score = (n_bitter_lower/2 + n_bitter_upper)*3/5
    spicy_score = (n_spicy_lower/2 + n_spicy_upper)*2/5
    
    flavor_balance_score = 5 / (1 + abs(1-sweet_score) + abs(1-salty_score) + abs(1-sour_score) + abs(1-savory_score) + abs(1-spicy_score)) - 1.25
#     print(flavor_balance_score)
    
    score += flavor_balance_score
#     print(sweet_score, salty_score, sour_score, savory_score, bitter_score, spicy_score)
#     print()
    
#     print(n_sweet_lower, n_sweet_upper)
#     print(n_salty_lower, n_salty_upper)
#     print(n_sour_lower, n_sour_upper)
#     print(n_savory_lower, n_savory_upper)
#     print(n_bitter_lower, n_bitter_upper)
#     print(n_spicy_lower, n_spicy_upper)
#     print()

# TEXTURE BALANCE BONUS ============================================================================================
    # ranges from roughly (0 to 1) * .75
    n_crunchy_lower = len(selected_ingredients[selected_ingredients['salad_crunchy'] == 'y'])
    n_crunchy_upper = len(selected_ingredients[selected_ingredients['salad_crunchy'] == 'Y'])
    n_chewy_lower = len(selected_ingredients[selected_ingredients['salad_chewy'] == 'y'])
    n_chewy_upper = len(selected_ingredients[selected_ingredients['salad_chewy'] == 'Y'])
    n_juicy_lower = len(selected_ingredients[selected_ingredients['salad_juicy'] == 'y'])
    n_juicy_upper = len(selected_ingredients[selected_ingredients['salad_juicy'] == 'Y'])
    
    # each ranges from roughly 0 to 1
    crunchy_score = (n_crunchy_lower/2 + n_crunchy_upper)/3
    chewy_score = (n_chewy_lower/2 + n_chewy_upper)
    juicy_score = (n_juicy_lower/2 + n_juicy_upper)/3
#     print(crunchy_score, chewy_score, juicy_score)
    
    texture_balance_score = 4 / (1 + abs(1-crunchy_score) + abs(1-chewy_score) + abs(1-juicy_score)) - 1
#     print(texture_balance_score)
#     print()
    
    score += texture_balance_score * .75

# seems like it's hard to balance food groups on top of everything else. pity the scores aren't more independent
# FOOD GROUP BALANCE BONUS =========================================================================================
    # ranges from roughly (25 to 1) * 2
    n_fruit = len(selected_ingredients[selected_ingredients['fruit'] == 'y'])
    n_veg = len(selected_ingredients[selected_ingredients['veg'] == 'y'])
    n_protein = len(selected_ingredients[selected_ingredients['protein'] == 'y'])
    
    # each varies from roughly 0 to 1 (sometimes a little over)
    fruit_score = n_fruit / 3
    veg_score = n_veg / 5
    protein_score = n_protein / 3
#     print(fruit_score, veg_score, protein_score)
    
    food_group_balance_score = 3 / (1 + abs(1-fruit_score) + abs(1-veg_score) + abs(1-protein_score)) - .75
#     print(food_group_balance_score)
#     print()
    
    score += food_group_balance_score * 2
    
    if score > top_score:
        top_score = score
        top_food_group_balance_score = food_group_balance_score
        top_average_shortest_path_score = average_shortest_path_score
        top_flavor_balance_score = flavor_balance_score
        top_texture_balance_score = texture_balance_score
        top_all_clashing_pairs_score = all_clashing_pairs_score
        top_pair_strength_score = pair_strength_score
#         top_upper_pairs = upper_pairs
#         top_lower_pairs = lower_pairs
        top_lc_pairs = lower_category_pairs
        top_ld_pairs = lower_direct_pairs
        top_uc_pairs = upper_category_pairs
        top_ud_pairs = upper_direct_pairs
        top_selected_ingredients = selected_ingredients
        top_average_shortest_path_length = average_shortest_path_length
#         top_upper_proportion = len(upper_pairs) / (len(upper_pairs) + len(lower_pairs))
# print('TOP AVG SHORTEST PATH LENGTH', top_average_shortest_path_length)
# print('TOP UPPER PROPORTION', top_upper_proportion)
print('RAW AVERAGE SHORTEST PATH SCORE', top_average_shortest_path_score)
print('RAW PAIR STRENGTH SCORE', top_pair_strength_score)
print('RAW ALL CLASHING PAIRS SCORE', top_all_clashing_pairs_score)
print('RAW FLAVOR BALANCE SCORE', top_flavor_balance_score)
print('RAW TEXTURE BALANCE SCORE', top_texture_balance_score)
print('RAW FOOD GROUP BALANCE SCORE', top_food_group_balance_score)
print('TOTAL SCORE', top_score)            
    
top_net = net.Network(notebook=True)

nodes = top_selected_ingredients['name'].tolist()

def get_color(row):
#     print(type(row))
#     print(row)
    if row['salad_green'] == 'y':
        return 'lightgreen'
    elif row['salad_extra'] == 'y':
        if row['veg'] == 'y':
            return 'green'
        elif row['fruit'] == 'y':
            return 'orange'
        elif row['protein_nut_seed'] == 'y':
            return 'brown'
        else:
            return 'lightblue'
    elif row['salad_dressing'] == 'y':
        return 'lightgrey'
    
nodes_color = top_selected_ingredients.apply(get_color, axis=1).tolist()

top_net.add_nodes(
    nodes=nodes,
    color=nodes_color
)

for pair in top_lc_pairs:
    top_net.add_edge(pair[0], pair[1], physics=False, color='lightgrey')

for pair in top_ld_pairs:
    top_net.add_edge(pair[0], pair[1], physics=False, color='grey')
    
for pair in top_uc_pairs:
    top_net.add_edge(pair[0], pair[1], color='darkgrey')
    
for pair in top_ud_pairs:
    top_net.add_edge(pair[0], pair[1], color='black')

vegan = top_selected_ingredients['not_vegan'].sum() == ''
gluten_free = top_selected_ingredients['gluten'].sum() == ''
# print(vegan, gluten_free)

top_net.show('top_net.html')

RAW AVERAGE SHORTEST PATH SCORE 1.0825396825396822
RAW PAIR STRENGTH SCORE 0.9957446808510638
RAW ALL CLASHING PAIRS SCORE 0.0
RAW FLAVOR BALANCE SCORE 1.2500000000000004
RAW TEXTURE BALANCE SCORE 0.6000000000000001
RAW FOOD GROUP BALANCE SCORE 0.6136363636363635
TOTAL SCORE 10.244665499984647


### 5.1.2. Recording recipes

In [31]:
try:
    salad_recipe_data = pd.read_pickle(os.path.join(root_path, 'DATA/salad_recipe_data_latest.pickle'))
    print('SALAD RECIPE DATA IMPORT SUCCESSFUL')
except:
    salad_recipe_data = pd.DataFrame({
        'vegan': [],
        'gluten_free': [],
        'basic': [],
        'best_of': [],
        'score': [],
        'pairing_density_bonus': [],
        'pair_strength_bonus': [],
        'clash_penalty': [],
        'flavor_balance_bonus': [],
        'texture_balance_bonus': [],
        'food_group_balance_bonus': [],
        'lc_pairs': [],
        'ld_pairs': [],
        'uc_pairs': [],
        'ud_pairs':[],
        'clashing_pairs': [],
        'ingredient_names': [],
        'leafy_green_names': [],
        'extra_names': [],
        'dressing_names': [],
    })
    print('IMPORT FAILED, CREATING NEW SALAD RECIPE DATAFRAME')

SALAD RECIPE DATA IMPORT SUCCESSFUL


In [33]:
salad_recipe_data.head()

Unnamed: 0,vegan,gluten_free,basic,best_of,score,pairing_density_bonus,pair_strength_bonus,clash_penalty,flavor_balance_bonus,texture_balance_bonus,food_group_balance_bonus,lc_pairs,ld_pairs,uc_pairs,ud_pairs,clashing_pairs,ingredient_names,leafy_green_names,extra_names,dressing_names


In [34]:
salad_greens = salad_data[salad_data['salad_green'] == 'y']

salad_extras = salad_data[salad_data['salad_extra'] == 'y']
salad_extra_veg = salad_data[(salad_data['veg'] == 'y') & (salad_data['salad_extra'] == 'y')]
salad_extra_fruits = salad_data[(salad_data['fruit'] == 'y') & (salad_data['salad_extra'] == 'y')]
salad_extra_nuts = salad_data[(salad_data['protein_seed'] == 'y') & (salad_data['salad_extra'] == 'y')]
salad_extra_seeds = salad_data[(salad_data['protein_nut'] == 'y') & (salad_data['salad_extra'] == 'y')]
salad_extra_tomatoes = salad_data[salad_data['salad_extra_tomato'] == 'y']
salad_extra_olives = salad_data[salad_data['salad_extra_olive'] == 'y']
salad_extra_cheeses = salad_data[salad_data['salad_extra_cheese'] == 'y']
salad_extra_eggs = salad_data[salad_data['salad_extra_egg'] == 'y']
salad_extra_croutons = salad_data[salad_data['salad_extra_crouton'] == 'y']

salad_dressing_oils = salad_data[salad_data['salad_dressing_oil'] == 'y']
salad_dressing_vinegars = salad_data[salad_data['salad_dressing_vinegar'] == 'y']
salad_dressing_salts = salad_data[salad_data['salad_dressing_salt'] == 'y']
salad_dressing_peppers = salad_data[salad_data['salad_dressing_pepper'] == 'y']
salad_dressing_garlics = salad_data[salad_data['salad_dressing_garlic'] == 'y']

In [35]:
# !pip install networkx

import networkx as nx

In [38]:
best_of = 300
for recipe_count in range(500):
    top_score = 0
    for i in range(best_of):
        n_subgraphs = 2
        while n_subgraphs > 1: # keep shuffling until you get a well connected graph
            n_greens = random.randrange(2, 4)
            n_extras = random.randrange(2, 6)
            n_dressing_oils = 1
            n_dressing_vinegars = 1
            n_dressing_salts = 1
            n_dressing_peppers = 1
            # n_dressing_garlics = random.randrange(0, 2) # maybe make presence dependent on the rest. or, just leave out for now.

            selected_greens = salad_greens.sample(n_greens)
            selected_extras = salad_extras.sample(n_extras)
            selected_dressing_oils = salad_dressing_oils.sample(n_dressing_oils)
            selected_dressing_vinegars = salad_dressing_vinegars.sample(n_dressing_vinegars)
            selected_dressing_salts = salad_dressing_salts.sample(n_dressing_salts)
            selected_dressing_peppers = salad_dressing_peppers.sample(n_dressing_peppers)
            selected_ingredients = selected_greens.append(selected_extras).append(selected_dressing_oils).append(selected_dressing_vinegars).append(selected_dressing_salts).append(selected_dressing_peppers)

            lower_category_pairs = []
            lower_direct_pairs = []
            upper_category_pairs = []
            upper_direct_pairs = []
            ingredients_list = selected_ingredients['name'].values.tolist()
            already_checked = []
            for ingredient_name in ingredients_list:
                for lc_name in pairing_data['lower_category_names'][pairing_data['name'] == ingredient_name].iloc[0]:
                    if lc_name in ingredients_list and not lc_name in already_checked:
                        lower_category_pairs.append([ingredient_name, lc_name])
                for ld_name in pairing_data['lower_direct_names'][pairing_data['name'] == ingredient_name].iloc[0]:
                    if ld_name in ingredients_list and not ld_name in already_checked:
                        lower_direct_pairs.append([ingredient_name, ld_name])
                for uc_name in pairing_data['upper_category_names'][pairing_data['name'] == ingredient_name].iloc[0]:
                    if uc_name in ingredients_list and not uc_name in already_checked:
                        upper_category_pairs.append([ingredient_name, uc_name])
                for ud_name in pairing_data['upper_direct_names'][pairing_data['name'] == ingredient_name].iloc[0]:
                    if ud_name in ingredients_list and not ud_name in already_checked:
                        upper_direct_pairs.append([ingredient_name, ud_name])
                already_checked.append(ingredient_name)

            lower_pairs = lower_category_pairs + lower_direct_pairs
            upper_pairs = upper_category_pairs + upper_direct_pairs
            all_pairs = lower_pairs + upper_pairs

    #         all_pairs_sp = [tuple(sorted(pair)) for pair in all_pairs]
    #         print(len(all_pairs_sp), len(list(set(all_pairs_sp))))

    #         print('INGREDIENTS', ingredients_list)
    #         print()
    #         print('LC PAIRS', lower_category_pairs)
    #         print()
    #         print()

            G = nx.Graph()
            G.add_nodes_from(selected_ingredients['name'].values.tolist())
            G.add_edges_from(all_pairs)
            n_subgraphs = len(list(nx.connected_component_subgraphs(G)))

        score = 0

    # SIMPLER ALTERNATIVE: bonus for proportion of actual pairs over possible pairs? could then combine with pair strength bonus?
    # PAIRING DENSITY BONUS ============================================================================================
        # ranges from roughly (.1 to 1) * 4
        average_shortest_path_length = nx.average_shortest_path_length(G)
        average_shortest_path_score = 2.5 / average_shortest_path_length - 1.1
    #     print(average_shortest_path_score)
        score += average_shortest_path_score * 4    

    # # UPPER PAIRING BONUS ==============================================================================================
    #     # ranges from roughly (.25 to 1) * 3
    #     upper_proportion_score = len(upper_pairs) / len(all_pairs) * 2.5 # messed with this, not sure if it still works
    # #     print(upper_proportion_score)
    # #     print(len(upper_pairs), len(lower_pairs), len(all_pairs))
    # #     print()
    #     score += upper_proportion_score * 3

    # PAIR STRENGTH BONUS ==============================================================================================
        # ranges from roughly (0 to 1) * 3

        # I'm thinking of 'lower category' as default, and awarding points for steps up from that
        ld_bonus = 1*len(lower_direct_pairs)
        uc_bonus = 2*len(upper_category_pairs)
        ud_bonus = 5*len(upper_direct_pairs)
        pair_strength_score = .6*(ld_bonus + uc_bonus + ud_bonus)/len(all_pairs) # otherwise would tend toward large salads
    #     print('LC', len(lower_category_pairs))
    #     print('LD', len(lower_direct_pairs))
    #     print('UC', len(upper_category_pairs))
    #     print('UD', len(upper_direct_pairs))
    #     print('SCORE', pair_strength_score)
    #     print()
        score += pair_strength_score * 3

    # important but easy to avoid, so not weighted too heavily
    # CLASH PENALTY ====================================================================================================
        # ranges from roughly (0 to 1) * -1.5
        all_clashing_pairs = []
        selected_ingredients_list = selected_ingredients['name'].values.tolist()
        for name in selected_ingredients_list:
            names_that_clash_with_name = clashes_with_data['all_clashes_with_names'][clashes_with_data['name'] == name].iloc[0]
            all_clashing_names = set(selected_ingredients_list).intersection(set(names_that_clash_with_name)) # selected names that clash with this selected name
            all_clashing_pairs += [tuple(sorted([name, all_clashing_name])) for all_clashing_name in all_clashing_names]

        all_clashing_pairs = list(set(all_clashing_pairs))
        all_clashing_pairs_score = len(list(all_clashing_pairs)) / 4
        score += len(all_clashing_pairs) * -1.5

    # # FRUIT BONUS ======================================================================================================
    #     # ranges from roughly (0 to 3) * .1
    #     n_fruit = len(selected_ingredients[selected_ingredients['fruit'] == 'y'])
    #     score += n_fruit * .1

    # # NUT SEED BONUS ===================================================================================================    
    #     # ranges from roughly (0 to 2) * .15
    #     n_nut_seed = len(selected_ingredients[selected_ingredients['protein_nut_seed'] == 'y'])
    #     score += n_nut_seed * .15

    # FLAVOR BALANCE BONUS =============================================================================================
        # ranges from roughly (0 to 1) * 1
        n_sweet_lower = len(selected_ingredients[selected_ingredients['sweet'] == 'y'])
        n_sweet_upper = len(selected_ingredients[selected_ingredients['sweet'] == 'Y'])
        n_salty_lower = len(selected_ingredients[selected_ingredients['salty'] == 'y'])
        n_salty_upper = len(selected_ingredients[selected_ingredients['salty'] == 'Y'])
        n_sour_lower = len(selected_ingredients[selected_ingredients['sour'] == 'y'])
        n_sour_upper = len(selected_ingredients[selected_ingredients['sour'] == 'Y'])
        n_savory_lower = len(selected_ingredients[selected_ingredients['savory'] == 'y'])
        n_savory_upper = len(selected_ingredients[selected_ingredients['savory'] == 'Y'])
        n_bitter_lower = len(selected_ingredients[selected_ingredients['bitter'] == 'y'])
        n_bitter_upper = len(selected_ingredients[selected_ingredients['bitter'] == 'Y'])
        n_spicy_lower = len(selected_ingredients[selected_ingredients['spicy'] == 'y'])
        n_spicy_upper = len(selected_ingredients[selected_ingredients['spicy'] == 'Y'])

        # each varies from roughly .5 to 1
        sweet_score = (n_sweet_lower/2 + n_sweet_upper)/5
        salty_score = (n_salty_lower/2 + n_salty_upper)*2/5
        sour_score = (n_sour_lower/2 + n_sour_upper)*2/5
        savory_score = (n_savory_lower/2 + n_savory_upper)*3/5
        bitter_score = (n_bitter_lower/2 + n_bitter_upper)*3/5
        spicy_score = (n_spicy_lower/2 + n_spicy_upper)*2/5

        flavor_balance_score = 5 / (1 + abs(1-sweet_score) + abs(1-salty_score) + abs(1-sour_score) + abs(1-savory_score) + abs(1-spicy_score)) - 1.25
    #     print(flavor_balance_score)

        score += flavor_balance_score
    #     print(sweet_score, salty_score, sour_score, savory_score, bitter_score, spicy_score)
    #     print()

    #     print(n_sweet_lower, n_sweet_upper)
    #     print(n_salty_lower, n_salty_upper)
    #     print(n_sour_lower, n_sour_upper)
    #     print(n_savory_lower, n_savory_upper)
    #     print(n_bitter_lower, n_bitter_upper)
    #     print(n_spicy_lower, n_spicy_upper)
    #     print()

    # TEXTURE BALANCE BONUS ============================================================================================
        # ranges from roughly (0 to 1) * .75
        n_crunchy_lower = len(selected_ingredients[selected_ingredients['salad_crunchy'] == 'y'])
        n_crunchy_upper = len(selected_ingredients[selected_ingredients['salad_crunchy'] == 'Y'])
        n_chewy_lower = len(selected_ingredients[selected_ingredients['salad_chewy'] == 'y'])
        n_chewy_upper = len(selected_ingredients[selected_ingredients['salad_chewy'] == 'Y'])
        n_juicy_lower = len(selected_ingredients[selected_ingredients['salad_juicy'] == 'y'])
        n_juicy_upper = len(selected_ingredients[selected_ingredients['salad_juicy'] == 'Y'])

        # each ranges from roughly 0 to 1
        crunchy_score = (n_crunchy_lower/2 + n_crunchy_upper)/3
        chewy_score = (n_chewy_lower/2 + n_chewy_upper)
        juicy_score = (n_juicy_lower/2 + n_juicy_upper)/3
    #     print(crunchy_score, chewy_score, juicy_score)

        texture_balance_score = 4 / (1 + abs(1-crunchy_score) + abs(1-chewy_score) + abs(1-juicy_score)) - 1
    #     print(texture_balance_score)
    #     print()

        score += texture_balance_score * .75

    # seems like it's hard to balance food groups on top of everything else. pity the scores aren't more independent
    # FOOD GROUP BALANCE BONUS =========================================================================================
        # ranges from roughly (25 to 1) * 2
        n_fruit = len(selected_ingredients[selected_ingredients['fruit'] == 'y'])
        n_veg = len(selected_ingredients[selected_ingredients['veg'] == 'y'])
        n_protein = len(selected_ingredients[selected_ingredients['protein'] == 'y'])

        # each varies from roughly 0 to 1 (sometimes a little over)
        fruit_score = n_fruit / 3
        veg_score = n_veg / 5
        protein_score = n_protein / 3
    #     print(fruit_score, veg_score, protein_score)

        food_group_balance_score = 3 / (1 + abs(1-fruit_score) + abs(1-veg_score) + abs(1-protein_score)) - .75
    #     print(food_group_balance_score)
    #     print()

        score += food_group_balance_score * 2

        if score > top_score:
            top_score = score
            top_food_group_balance_score = food_group_balance_score
            top_average_shortest_path_score = average_shortest_path_score
            top_flavor_balance_score = flavor_balance_score
            top_texture_balance_score = texture_balance_score
            top_all_clashing_pairs_score = all_clashing_pairs_score
            top_pair_strength_score = pair_strength_score
    #         top_upper_pairs = upper_pairs
    #         top_lower_pairs = lower_pairs
            top_lc_pairs = lower_category_pairs
            top_ld_pairs = lower_direct_pairs
            top_uc_pairs = upper_category_pairs
            top_ud_pairs = upper_direct_pairs
            top_selected_ingredients = selected_ingredients
            top_average_shortest_path_length = average_shortest_path_length
            top_all_clashing_pairs = all_clashing_pairs
    #         top_upper_proportion = len(upper_pairs) / (len(upper_pairs) + len(lower_pairs))
    # print('TOP AVG SHORTEST PATH LENGTH', top_average_shortest_path_length)
    # print('TOP UPPER PROPORTION', top_upper_proportion)
#     print('TOP AVERAGE SHORTEST PATH SCORE', top_average_shortest_path_score * 4)
#     print('TOP PAIR STRENGTH SCORE', top_pair_strength_score * 3)
#     print('TOP ALL CLASHING PAIRS SCORE', top_all_clashing_pairs_score * -1.5)
#     print('TOP FLAVOR BALANCE SCORE', top_flavor_balance_score)
#     print('TOP TEXTURE BALANCE SCORE', top_texture_balance_score * .75)
#     print('TOP FOOD GROUP BALANCE SCORE', top_food_group_balance_score * 2)
#     print('TOP_SCORE', top_score)  
    
    vegan = top_selected_ingredients['not_vegan'].sum() == ''
    gluten_free = top_selected_ingredients['gluten'].sum() == ''
    
    recipe_greens = top_selected_ingredients[top_selected_ingredients['salad_green'] == 'y']
    recipe_extras = top_selected_ingredients[top_selected_ingredients['salad_extra'] == 'y']
    recipe_dressing_oils = top_selected_ingredients[top_selected_ingredients['salad_dressing_oil'] == 'y']
    recipe_dressing_vinegars = top_selected_ingredients[top_selected_ingredients['salad_dressing_vinegar'] == 'y']
    recipe_dressing_salts = top_selected_ingredients[top_selected_ingredients['salad_dressing_salt'] == 'y']
    recipe_dressing_peppers = top_selected_ingredients[top_selected_ingredients['salad_dressing_pepper'] == 'y']
    recipe_dressing_garlics = top_selected_ingredients[top_selected_ingredients['salad_dressing_garlic'] == 'y']
    # could just select 'salad_dressing', but this includes garlics
    recipe_dressing = recipe_dressing_oils.append(recipe_dressing_vinegars).append(recipe_dressing_salts).append(recipe_dressing_peppers)

    new_recipe = pd.DataFrame({
        'vegan': [vegan],
        'gluten_free': [gluten_free],
        'basic': [salad_data_is_basic],
        'best_of': [best_of],
        'score': [top_score],
        'pairing_density_bonus': [top_average_shortest_path_score * 4],
        'pair_strength_bonus': [top_pair_strength_score * 3],
        'clash_penalty': [top_all_clashing_pairs_score * 4],
        'flavor_balance_bonus': [top_flavor_balance_score],
        'texture_balance_bonus': [top_texture_balance_score * 75],
        'food_group_balance_bonus': [top_food_group_balance_score * 2],
        'lc_pairs': [top_lc_pairs],
        'ld_pairs': [top_ld_pairs],
        'uc_pairs': [top_uc_pairs],
        'ud_pairs':[top_ud_pairs],
        'clashing_pairs': [top_all_clashing_pairs],
        'ingredient_names': [top_selected_ingredients['name'].values.tolist()],
        'leafy_green_names': [recipe_greens['name'].values.tolist()],
        'extra_names': [recipe_extras['name'].values.tolist()],
        'dressing_names': [recipe_dressing['name'].values.tolist()],
    })
    salad_recipe_data = salad_recipe_data.append(new_recipe, sort=False)
    print('SALAD RECIPE RECORDED. SCORE:', top_score)

SALAD RECIPE RECORDED. SCORE: 9.247372687282732
SALAD RECIPE RECORDED. SCORE: 8.632772250740025
SALAD RECIPE RECORDED. SCORE: 8.734544902673036
SALAD RECIPE RECORDED. SCORE: 8.584865424430642
SALAD RECIPE RECORDED. SCORE: 8.90463054187192
SALAD RECIPE RECORDED. SCORE: 7.923617723169311
SALAD RECIPE RECORDED. SCORE: 7.862569886254096
SALAD RECIPE RECORDED. SCORE: 9.661852805638382
SALAD RECIPE RECORDED. SCORE: 8.157793448212688
SALAD RECIPE RECORDED. SCORE: 8.835729028453486
SALAD RECIPE RECORDED. SCORE: 8.589238394302942
SALAD RECIPE RECORDED. SCORE: 8.30968992248062
SALAD RECIPE RECORDED. SCORE: 8.72170163259812
SALAD RECIPE RECORDED. SCORE: 8.260653934166521
SALAD RECIPE RECORDED. SCORE: 8.410684211500378
SALAD RECIPE RECORDED. SCORE: 9.851507555330862
SALAD RECIPE RECORDED. SCORE: 9.7671304967093
SALAD RECIPE RECORDED. SCORE: 8.297193362193362
SALAD RECIPE RECORDED. SCORE: 8.391835900061707
SALAD RECIPE RECORDED. SCORE: 9.44700568752091
SALAD RECIPE RECORDED. SCORE: 8.43590189018194

SALAD RECIPE RECORDED. SCORE: 8.746269859173086
SALAD RECIPE RECORDED. SCORE: 8.316503496503497
SALAD RECIPE RECORDED. SCORE: 8.836790620696569
SALAD RECIPE RECORDED. SCORE: 8.445871740662438
SALAD RECIPE RECORDED. SCORE: 8.803529279115663
SALAD RECIPE RECORDED. SCORE: 8.963244650086756
SALAD RECIPE RECORDED. SCORE: 8.11221664873824
SALAD RECIPE RECORDED. SCORE: 10.433539417160105
SALAD RECIPE RECORDED. SCORE: 8.398377171154765
SALAD RECIPE RECORDED. SCORE: 8.278488925934747
SALAD RECIPE RECORDED. SCORE: 8.63701923076923
SALAD RECIPE RECORDED. SCORE: 8.983115027387473
SALAD RECIPE RECORDED. SCORE: 8.734297281665702
SALAD RECIPE RECORDED. SCORE: 8.55887739711269
SALAD RECIPE RECORDED. SCORE: 7.98492277992278
SALAD RECIPE RECORDED. SCORE: 9.06570465798643
SALAD RECIPE RECORDED. SCORE: 9.857935251853498
SALAD RECIPE RECORDED. SCORE: 8.570588235294117
SALAD RECIPE RECORDED. SCORE: 8.570498571637092
SALAD RECIPE RECORDED. SCORE: 7.780317460317461
SALAD RECIPE RECORDED. SCORE: 8.610860296236

SALAD RECIPE RECORDED. SCORE: 9.457168354338165
SALAD RECIPE RECORDED. SCORE: 9.928940357020513
SALAD RECIPE RECORDED. SCORE: 8.069424964936887
SALAD RECIPE RECORDED. SCORE: 8.414912280701753
SALAD RECIPE RECORDED. SCORE: 8.811904761904762
SALAD RECIPE RECORDED. SCORE: 8.953361344537813
SALAD RECIPE RECORDED. SCORE: 9.033312732212345
SALAD RECIPE RECORDED. SCORE: 8.545622119815668
SALAD RECIPE RECORDED. SCORE: 8.159387439186201
SALAD RECIPE RECORDED. SCORE: 8.491138273491215
SALAD RECIPE RECORDED. SCORE: 8.914080148188306
SALAD RECIPE RECORDED. SCORE: 8.651848151848151
SALAD RECIPE RECORDED. SCORE: 8.727368421052631
SALAD RECIPE RECORDED. SCORE: 8.390669987839798
SALAD RECIPE RECORDED. SCORE: 8.662643239113827
SALAD RECIPE RECORDED. SCORE: 8.648301698301697
SALAD RECIPE RECORDED. SCORE: 8.628899835796389
SALAD RECIPE RECORDED. SCORE: 8.299355029341397
SALAD RECIPE RECORDED. SCORE: 8.773524720893143
SALAD RECIPE RECORDED. SCORE: 8.29190517670483
SALAD RECIPE RECORDED. SCORE: 8.332376540

In [39]:
from datetime import date
today = date.today()
date_string = f'{str(today.year)}_{str(today.month)}_{str(today.day)}'

salad_recipe_data.to_pickle(os.path.join(root_path, 'DATA/salad_recipe_data_latest.pickle'))
salad_recipe_data.to_pickle(os.path.join(root_path, f'DATA/salad_recipe_data_{date_string}.pickle'))

In [28]:
salad_recipe_data

Unnamed: 0,vegan,gluten_free,basic,best_of,score,pairing_density_bonus,pair_strength_bonus,clash_penalty,flavor_balance_bonus,texture_balance_bonus,food_group_balance_bonus,lc_pairs,ld_pairs,uc_pairs,ud_pairs,clashing_pairs,ingredient_names,leafy_green_names,extra_names,dressing_names


### 5.1.3. Displaying records

In [None]:
# !pip install pyvis

from pyvis import network as net

In [None]:
recipe = salad_recipe_data[(salad_recipe_data['basic'] == True) & (salad_recipe_data['vegan'] == True) & salad_recipe_data['gluten_free'] == True].sample(1).iloc[0]
recipe_net = net.Network(notebook=True)

nodes = recipe['ingredient_names']

nodes_color = []
for name in recipe['ingredient_names']:
    ingredient = salad_data[salad_data['name'] == name].iloc[0]
    if ingredient['salad_green'] == 'y':
        nodes_color.append('lightgreen')
    elif ingredient['salad_extra'] == 'y':
        if ingredient['veg'] == 'y':
            nodes_color.append('green')
        elif ingredient['fruit'] == 'y':
            nodes_color.append('orange')
        elif ingredient['protein_nut_seed'] == 'y':
            nodes_color.append('brown')
        else:
            nodes_color.append('lightblue')
    elif ingredient['salad_dressing'] == 'y':
        nodes_color.append('lightgrey')  
        
recipe_net.add_nodes(
    nodes=nodes,
    color=nodes_color
)

for pair in recipe['lc_pairs']:
    recipe_net.add_edge(pair[0], pair[1], physics=False, color='lightgrey')

for pair in recipe['ld_pairs']:
    recipe_net.add_edge(pair[0], pair[1], physics=False, color='grey')
    
for pair in recipe['uc_pairs']:
    recipe_net.add_edge(pair[0], pair[1], color='darkgrey')
    
for pair in recipe['ud_pairs']:
    recipe_net.add_edge(pair[0], pair[1], color='black')

if not recipe['vegan']:
    print('NOT VEGAN')
if not recipe['gluten_free']:
    print('CONTAINS GLUTEN')
print('SCORE:', recipe['score'])
recipe_net.show('recipe_net.html')

## 5.2. Successive picks

In [None]:
# !pip install networkx
# !pip install pyvis

import networkx as nx
from pyvis import network as net

In [402]:
successive_net = net.Network(notebook=True)
nodes = salad_data['name'].tolist()

def get_color(row):
#     print(type(row))
#     print(row)
    if row['salad_green'] == 'y':
        return 'lightgreen'
    elif row['salad_extra'] == 'y':
        if row['veg'] == 'y':
            return 'green'
        elif row['fruit'] == 'y':
            return 'orange'
        elif row['protein_nut_seed'] == 'y':
            return 'brown'
        else:
            return 'lightblue'
    elif row['salad_dressing'] == 'y':
        return 'lightgrey'
    
nodes_color = salad_data.apply(get_color, axis=1).tolist()

successive_net.add_nodes(
    nodes=nodes,
    color=nodes_color
)

ud_pairs = list(set(pairing_data['ud_sorted_pairs'].sum()))
for pair in ud_pairs:
    try:
        successive_net.add_edge(pair[0], pair[1], color='black', physics=False)
    except:
        pass
print(len(successive_net.edges))
successive_net.show('successive_net.html')

110


In [260]:
successive_net = net.Network(notebook=True)
new_ingredients = [salad_data.sample(1)['name'].tolist()[0]]
ingredients = new_ingredients
ingredient_pairs = []
while new_ingredients != []:
    new_ingredient_uds = []
    for new_ingredient in new_ingredients:
        new_ingredient_uds += pairing_data[pairing_data['name'] == new_ingredient]['upper_direct_names'].iloc[0]
    new_ingredients = []
    for new_ingredient_ud in list(set(new_ingredient_uds)):
        if set(ingredients).issubset(set(pairing_data[pairing_data['name'] == new_ingredient_ud]['upper_direct_names'].iloc[0])):
            new_ingredients.append(new_ingredient_ud)
    ingredients += new_ingredients

print(ingredients)
            
# problem
    # when I go through new ingredients, I don't have anywhere to put potential new ingredients
    # we have old, new, new new (for the cycle)
#     ingredients += new_ingredients:
    
# for ud_name in pairing_data[pairing_data['name'] == ingredient_name]['upper_direct_names'].tolist()[0]:
#     print(ingredient_name, ud_name)
#     print()

# successive_net = net.Network(notebook=True)
# nodes = salad_data['name'].tolist()

# def get_color(row):
# #     print(type(row))
# #     print(row)
#     if row['salad_green'] == 'y':
#         return 'lightgreen'
#     elif row['salad_extra'] == 'y':
#         if row['veg'] == 'y':
#             return 'green'
#         elif row['fruit'] == 'y':
#             return 'orange'
#         elif row['protein_nut_seed'] == 'y':
#             return 'brown'
#         else:
#             return 'lightblue'
#     elif row['salad_dressing'] == 'y':
#         return 'lightgrey'
    
# nodes_color = salad_data.apply(get_color, axis=1).tolist()

# successive_net.add_nodes(
#     nodes=nodes,
#     color=nodes_color
# )

# ud_pairs = list(set(pairing_data['ud_sorted_pairs'].sum()))
# for pair in ud_pairs:
#     successive_net.add_edge(pair[0], pair[1], color='black')
    
# successive_net.show('successive_net.html')

['PINE NUTS (aka PIGNOLI)', 'GARLIC', 'DATES', 'ZUCCHINI (see also SQUASH, SUMMER)', 'ORANGES, ORANGE JUICE, and ORANGE ZEST', 'BROCCOLI', 'SPINACH', 'TOMATOES, TOMATO JUICE, TOMATO PASTE, and TOMATO SAUCE', 'CHEESE, GOAT', 'BEETS', 'RAISINS', 'CHEESE, PARMESAN', 'CHEESE, FETA', 'TOMATOES, SUN-DRIED (or OVEN-DRIED TOMATOES)']


In [322]:
def get_color(row):
#     print(type(row))
#     print(row)
    if row['salad_green'] == 'y':
        return 'lightgreen'
    elif row['salad_extra'] == 'y':
        if row['veg'] == 'y':
            return 'green'
        elif row['fruit'] == 'y':
            return 'orange'
        elif row['protein_nut_seed'] == 'y':
            return 'brown'
        else:
            return 'lightblue'
    elif row['salad_dressing'] == 'y':
        return 'lightgrey'

successive_net = net.Network(notebook=True)
for ingredient_name in salad_data['name'].tolist():
    new_ingredient_names = [ingredient_name]
    ingredient_names = new_ingredient_names
    while new_ingredient_names != []:
        new_ingredient_ud_names = []
        for new_ingredient_name in new_ingredient_names:
            new_ingredient_ud_names += pairing_data[pairing_data['name'] == new_ingredient_name]['upper_direct_names'].iloc[0]
        new_ingredient_names = []
        for new_ingredient_ud_name in list(set(new_ingredient_ud_names)):
            if set(ingredient_names).issubset(set(pairing_data[pairing_data['name'] == new_ingredient_ud_name]['upper_direct_names'].iloc[0])):
#                 print('ADDING', new_ingredient_ud_name)
                new_ingredient_names.append(new_ingredient_ud_name)
        ingredient_names += new_ingredient_names
    
    nodes = ingredient_names
    
    if len(ingredient_names) > 30:
        ingredients = salad_data[salad_data['name'].isin(ingredient_names)]
        nodes_color = ingredients.apply(get_color, axis=1).tolist()
        successive_net.add_nodes(
            nodes=nodes,
            color=nodes_color
        )

        for i, name_1 in enumerate(ingredient_names[:-1]):
            for j, name_2 in enumerate(ingredient_names[i+1:]):
#                 print(name_1, name_2)
#                 print()
                successive_net.add_edge(name_1, name_2, color='black')
    
# problem
    # when I go through new ingredients, I don't have anywhere to put potential new ingredients
    # we have old, new, new new (for the cycle)
#     ingredients += new_ingredients:
    
# for ud_name in pairing_data[pairing_data['name'] == ingredient_name]['upper_direct_names'].tolist()[0]:
#     print(ingredient_name, ud_name)
#     print()

# successive_net = net.Network(notebook=True)
# nodes = salad_data['name'].tolist()


    
# nodes_color = salad_data.apply(get_color, axis=1).tolist()

# successive_net.add_nodes(
#     nodes=nodes,
#     color=nodes_color
# )

# ud_pairs = list(set(pairing_data['ud_sorted_pairs'].sum()))
# for pair in ud_pairs:
#     successive_net.add_edge(pair[0], pair[1], color='black')
print(len(successive_net.edges))
successive_net.show('successive_net.html')

995


In [314]:
for name in pairing_data[pairing_data['name'] == 'BEETS']['upper_direct_names']:
    print(name)

['APPLES (and APPLE CIDER, APPLE JUICE and/or APPLESAUCE)', 'CARROTS', 'CHEESE, PARMESAN', 'CHIVES', 'FENNEL', 'GARLIC', 'GREENS, DANDELION', 'LEMONS', 'OIL, OLIVE', 'ONIONS, RED (see also ONIONS)', 'ORANGES, ORANGE JUICE, and ORANGE ZEST', 'PINE NUTS (aka PIGNOLI)', 'PISTACHIOS', 'SHALLOTS', 'VINEGAR, BALSAMIC', 'VINEGAR, RED WINE (see also VINEGAR, WINE–IN GENERAL)', 'WALNUTS', 'ENDIVE (aka BELGIAN ENDIVE)', 'FENNEL FRONDS (or LEAVES)', 'HAZELNUTS (aka FILBERTS)', 'NUTS, MACADAMIA', 'PECANS', 'VINEGAR, SHERRY', 'CHEESE, FETA', 'CHEESE, GOAT', 'LETTUCE, LAMB’S (aka CORN SALAD or MCHE)']


In [315]:
print(salad_data[salad_data['name'] == 'BEETS']['pairs_with'].iloc[0])

agave nectar

allspice

anise hyssop

anise seeds

APPLES and apple juice

arame

ARUGULA

asparagus

avocado

baked goods, e.g., cakes

basil

bay leaf

beans, e.g., fava, green

bell peppers, e.g., green, red, yellow

blackberries

black-eyed peas

breads, e.g., dark, rye

butter

buttermilk

cabbage, e.g., green, red, savoy

capers

CARAWAY SEEDS

cardamom

“carpaccio”

CARROTS

cauliflower

celery and celery leaves

celery root

chard, Swiss

CHEESE, e.g., blue, Cambozola, cashew, cream, FETA, GOAT, Gorgonzola, Gouda, Havarti, Monterey Jack, Parmesan, queso blanco, ricotta, ricotta salata, Roquefort, salty

chervil

chickpeas

chicory

chiles and chili pepper flakes

chips, e.g., fried

CHIVES

chocolate and cocoa

chutneys

cilantro

cinnamon

citrus, e.g., juice

cloves

coriander

couscous

cranberries

cream

crème fraîche

crudités

cucumbers

cumin

curry powder

desserts, e.g., “red velvet”

DILL

edamame

eggs, esp. hard-boiled

endive

escarole hearts

falafel

FENNEL, fen

In [118]:
# ud_pairs = list(set(pairing_data['ud_sorted_pairs'].tolist()))
ud_sorted_pairs = list(set(pairing_data['ud_sorted_pairs'].sum()))
print(len(ud_sorted_pairs))

523


In [95]:
ud_sorted_pairs

['RADISHES—IN GENERAL (see also DAIKON)',
 'FIGS (see also FIGS, DRIED)',
 'CHIVES',
 'LETTUCE, LAMB’S (aka CORN SALAD or MCHE)',
 'GARLIC SCAPES',
 'CHEESE, PECORINO',
 'TOMATOES, SUN-DRIED (or OVEN-DRIED TOMATOES)',
 'POMEGRANATES and POMEGRANATE JUICE (see also POMEGRANATE MOLASSES)',
 'WATERMELON',
 'PLUMS',
 'COCONUT, COCONUT CREAM, and COCONUT MILK (see also COCONUT BUTTER, COCONUT NECTAR, COCONUT WATER, and MILK, COCONUT)',
 'TOMATOES, TOMATO JUICE, TOMATO PASTE, and TOMATO SAUCE',
 'WALNUTS',
 'SEEDS, PUMPKIN',
 'SUGAR SNAP PEAS (aka SNAP PEAS)',
 'GREENS, DANDELION',
 'KIWI (aka KIWIFRUIT)',
 'SORREL',
 'MANGOES',
 'RAISINS',
 'AVOCADO',
 'PEARS—IN GENERAL',
 'CHEESE, SMOKED MOZZARELLA',
 'VINEGAR, SHERRY',
 '“CHEESE,” VEGAN',
 'LEMONS',
 'RASPBERRIES',
 'CAULIFLOWER',
 'NUTS, MACADAMIA',
 'LETTUCE, BUTTER (aka BIBB or BOSTON LETTUCE)',
 'STRAWBERRIES',
 'CARROTS',
 'PECANS',
 'CHEESE, CHEDDAR',
 'LEEKS',
 'CELERY',
 'APRICOTS (see also APRICOTS, DRIED)',
 'VINEGAR, BROWN RICE

In [None]:
#IDEAS
    # savory v sweet templates (avoid eggs/mushroom/hard veg w fruit...)
    # get eg working, weight it as 2
    # start out w 1 base salad, extra (randomly selected from vip list), then branch out
    # control for eg lots of types of citrus, alliums
    # mark strong flavors, treat them separately
    # select main ingredients from each category that go with each other, then branch out from each, weighting traditionally at the end
    # work off of pairs_with data for categories, while at same time picking categories then within categories (to account for eg allium bias)
    # *only* match eg.s with specifically named; also, consider not matching categories the weird way
    # add a bonus if category of ingredient pairs well with other ingredients in salad (meh)
    # add in dressing garlics, when applicable
    # CONSIDER
        # replacing average net dist with something like, edges/possible edges

# TODO
    # add dried cranberries to salad stuffs?
    # maybe some other dried fruit? maybe not.
    # create generator that selects main ingredient(s) then draws from strong connections
        # then ranks?
    # increase emphasis on connection density
    # make connection density dependent on strength of connection
    
# UI PRIORITIES
    # speed
    # translate names to common (e.g. ALLMONDS (AND UNSWEETENED ALMOND BUTTER) -> Almonds, sliced)
    # maybe consisder one of those spokes-of-wheel charts to display stats/scores
    
# RECIPE DETAILS
    # maybe suggest diluting e.g. sesame oil w regular olive oil
    
# ==================================================================================================================
# ==================================================================================================================

# ALGORITHM A (best of many)
top_score = 0
for i in range(300):
#     print('ITERATION', i)
    n_subgraphs = 2
    while n_subgraphs > 1: # keep shuffling until you get a well connected graph
#         print('TRYING FOR NO EXTRA SUBGRAPHS')
        n_greens = random.randrange(2, 4)
        n_extras = random.randrange(2, 6)
        n_dressing_oils = 1
        n_dressing_vinegars = 1
        n_dressing_salts = 1
        n_dressing_peppers = 1
        # n_dressing_garlics = random.randrange(0, 2) # maybe make presence dependent on the rest. or, just leave out for now.
        
        selected_greens = salad_greens.sample(n_greens)
        selected_extras = salad_extras.sample(n_extras)
        selected_dressing_oils = salad_dressing_oils.sample(n_dressing_oils)
        selected_dressing_vinegars = salad_dressing_vinegars.sample(n_dressing_vinegars)
        selected_dressing_salts = salad_dressing_salts.sample(n_dressing_salts)
        selected_dressing_peppers = salad_dressing_peppers.sample(n_dressing_peppers)
        selected_ingredients = selected_greens.append(selected_extras).append(selected_dressing_oils).append(selected_dressing_vinegars).append(selected_dressing_salts).append(selected_dressing_peppers)

        lower_category_pairs = []
        lower_direct_pairs = []
        upper_category_pairs = []
        upper_direct_pairs = []
        ingredients_list = selected_ingredients['name'].values.tolist()
        already_checked = []
        for ingredient_name in ingredients_list:
#             print('ingredient_name', ingredient_name)
            for lc_name in pairing_data['lower_category_names'][pairing_data['name'] == ingredient_name].iloc[0]:
#                 print(lc_name)
                if lc_name in ingredients_list and not lc_name in already_checked:
                    lower_category_pairs.append([ingredient_name, lc_name])
            for ld_name in pairing_data['lower_direct_names'][pairing_data['name'] == ingredient_name].iloc[0]:
                if ld_name in ingredients_list and not ld_name in already_checked:
                    lower_direct_pairs.append([ingredient_name, ld_name])
            for uc_name in pairing_data['upper_category_names'][pairing_data['name'] == ingredient_name].iloc[0]:
                if uc_name in ingredients_list and not uc_name in already_checked:
                    upper_category_pairs.append([ingredient_name, uc_name])
            for ud_name in pairing_data['upper_direct_names'][pairing_data['name'] == ingredient_name].iloc[0]:
                if ud_name in ingredients_list and not ud_name in already_checked:
                    upper_direct_pairs.append([ingredient_name, ud_name])
            already_checked.append(ingredient_name)

        lower_pairs = lower_category_pairs + lower_direct_pairs
        upper_pairs = upper_category_pairs + upper_direct_pairs
        all_pairs = lower_pairs + upper_pairs
#         print(lower_pairs, upper_pairs, all_pairs)
        
#         all_pairs_sp = [tuple(sorted(pair)) for pair in all_pairs]
#         print(len(all_pairs_sp), len(list(set(all_pairs_sp))))

#         print('INGREDIENTS', ingredients_list)
#         print()
#         print('LC PAIRS', lower_category_pairs)
#         print()
#         print()

        G = nx.Graph()
        G.add_nodes_from(selected_ingredients['name'].values.tolist())
        G.add_edges_from(all_pairs)
        n_subgraphs = len(list(nx.connected_component_subgraphs(G)))
#         print(G.edges())
#         print(n_subgraphs)
            
    score = 0

# SIMPLER ALTERNATIVE: bonus for proportion of actual pairs over possible pairs? could then combine with pair strength bonus?
# PAIRING DENSITY BONUS ============================================================================================
    # ranges from roughly (.1 to 1) * 4
    average_shortest_path_length = nx.average_shortest_path_length(G)
    average_shortest_path_score = 2.5 / average_shortest_path_length - 1.1
#     print(average_shortest_path_score)
    score += average_shortest_path_score * 4    
    
# # UPPER PAIRING BONUS ==============================================================================================
#     # ranges from roughly (.25 to 1) * 3
#     upper_proportion_score = len(upper_pairs) / len(all_pairs) * 2.5 # messed with this, not sure if it still works
# #     print(upper_proportion_score)
# #     print(len(upper_pairs), len(lower_pairs), len(all_pairs))
# #     print()
#     score += upper_proportion_score * 3

# PAIR STRENGTH BONUS ==============================================================================================
    # ranges from roughly (0 to 1) * 3

    # I'm thinking of 'lower category' as default, and awarding points for steps up from that
    ld_bonus = 1*len(lower_direct_pairs)
    uc_bonus = 2*len(upper_category_pairs)
    ud_bonus = 5*len(upper_direct_pairs)
    pair_strength_score = .6*(ld_bonus + uc_bonus + ud_bonus)/len(all_pairs) # otherwise would tend toward large salads
#     print('LC', len(lower_category_pairs))
#     print('LD', len(lower_direct_pairs))
#     print('UC', len(upper_category_pairs))
#     print('UD', len(upper_direct_pairs))
#     print('SCORE', pair_strength_score)
#     print()
    score += pair_strength_score * 3

# important but easy to avoid, so not weighted too heavily
# CLASH PENALTY ====================================================================================================
    # ranges from roughly (0 to 1) * -1.5
    all_clashing_pairs = []
    selected_ingredients_list = selected_ingredients['name'].values.tolist()
    for name in selected_ingredients_list:
        names_that_clash_with_name = clashes_with_data['all_clashes_with_names'][clashes_with_data['name'] == name].iloc[0]
        all_clashing_names = set(selected_ingredients_list).intersection(set(names_that_clash_with_name)) # selected names that clash with this selected name
        all_clashing_pairs += [tuple(sorted([name, all_clashing_name])) for all_clashing_name in all_clashing_names]

    all_clashing_pairs = list(set(all_clashing_pairs))
    all_clashing_pairs_score = len(list(all_clashing_pairs)) / 4
    score += len(all_clashing_pairs) * -1.5

# # FRUIT BONUS ======================================================================================================
#     # ranges from roughly (0 to 3) * .1
#     n_fruit = len(selected_ingredients[selected_ingredients['fruit'] == 'y'])
#     score += n_fruit * .1
    
# # NUT SEED BONUS ===================================================================================================    
#     # ranges from roughly (0 to 2) * .15
#     n_nut_seed = len(selected_ingredients[selected_ingredients['protein_nut_seed'] == 'y'])
#     score += n_nut_seed * .15

# FLAVOR BALANCE BONUS =============================================================================================
    # ranges from roughly (0 to 1) * 1
    n_sweet_lower = len(selected_ingredients[selected_ingredients['sweet'] == 'y'])
    n_sweet_upper = len(selected_ingredients[selected_ingredients['sweet'] == 'Y'])
    n_salty_lower = len(selected_ingredients[selected_ingredients['salty'] == 'y'])
    n_salty_upper = len(selected_ingredients[selected_ingredients['salty'] == 'Y'])
    n_sour_lower = len(selected_ingredients[selected_ingredients['sour'] == 'y'])
    n_sour_upper = len(selected_ingredients[selected_ingredients['sour'] == 'Y'])
    n_savory_lower = len(selected_ingredients[selected_ingredients['savory'] == 'y'])
    n_savory_upper = len(selected_ingredients[selected_ingredients['savory'] == 'Y'])
    n_bitter_lower = len(selected_ingredients[selected_ingredients['bitter'] == 'y'])
    n_bitter_upper = len(selected_ingredients[selected_ingredients['bitter'] == 'Y'])
    n_spicy_lower = len(selected_ingredients[selected_ingredients['spicy'] == 'y'])
    n_spicy_upper = len(selected_ingredients[selected_ingredients['spicy'] == 'Y'])
    
    # each varies from roughly .5 to 1
    sweet_score = (n_sweet_lower/2 + n_sweet_upper)/5
    salty_score = (n_salty_lower/2 + n_salty_upper)*2/5
    sour_score = (n_sour_lower/2 + n_sour_upper)*2/5
    savory_score = (n_savory_lower/2 + n_savory_upper)*3/5
    bitter_score = (n_bitter_lower/2 + n_bitter_upper)*3/5
    spicy_score = (n_spicy_lower/2 + n_spicy_upper)*2/5
    
    flavor_balance_score = 5 / (1 + abs(1-sweet_score) + abs(1-salty_score) + abs(1-sour_score) + abs(1-savory_score) + abs(1-spicy_score)) - 1.25
#     print(flavor_balance_score)
    
    score += flavor_balance_score
#     print(sweet_score, salty_score, sour_score, savory_score, bitter_score, spicy_score)
#     print()
    
#     print(n_sweet_lower, n_sweet_upper)
#     print(n_salty_lower, n_salty_upper)
#     print(n_sour_lower, n_sour_upper)
#     print(n_savory_lower, n_savory_upper)
#     print(n_bitter_lower, n_bitter_upper)
#     print(n_spicy_lower, n_spicy_upper)
#     print()

# TEXTURE BALANCE BONUS ============================================================================================
    # ranges from roughly (0 to 1) * .75
    n_crunchy_lower = len(selected_ingredients[selected_ingredients['salad_crunchy'] == 'y'])
    n_crunchy_upper = len(selected_ingredients[selected_ingredients['salad_crunchy'] == 'Y'])
    n_chewy_lower = len(selected_ingredients[selected_ingredients['salad_chewy'] == 'y'])
    n_chewy_upper = len(selected_ingredients[selected_ingredients['salad_chewy'] == 'Y'])
    n_juicy_lower = len(selected_ingredients[selected_ingredients['salad_juicy'] == 'y'])
    n_juicy_upper = len(selected_ingredients[selected_ingredients['salad_juicy'] == 'Y'])
    
    # each ranges from roughly 0 to 1
    crunchy_score = (n_crunchy_lower/2 + n_crunchy_upper)/3
    chewy_score = (n_chewy_lower/2 + n_chewy_upper)
    juicy_score = (n_juicy_lower/2 + n_juicy_upper)/3
#     print(crunchy_score, chewy_score, juicy_score)
    
    texture_balance_score = 4 / (1 + abs(1-crunchy_score) + abs(1-chewy_score) + abs(1-juicy_score)) - 1
#     print(texture_balance_score)
#     print()
    
    score += texture_balance_score * .75

# seems like it's hard to balance food groups on top of everything else. pity the scores aren't more independent
# FOOD GROUP BALANCE BONUS =========================================================================================
    # ranges from roughly (25 to 1) * 2
    n_fruit = len(selected_ingredients[selected_ingredients['fruit'] == 'y'])
    n_veg = len(selected_ingredients[selected_ingredients['veg'] == 'y'])
    n_protein = len(selected_ingredients[selected_ingredients['protein'] == 'y'])
    
    # each varies from roughly 0 to 1 (sometimes a little over)
    fruit_score = n_fruit / 3
    veg_score = n_veg / 5
    protein_score = n_protein / 3
#     print(fruit_score, veg_score, protein_score)
    
    food_group_balance_score = 3 / (1 + abs(1-fruit_score) + abs(1-veg_score) + abs(1-protein_score)) - .75
#     print(food_group_balance_score)
#     print()
    
    score += food_group_balance_score * 2
    
    if score > top_score:
        top_score = score
        top_food_group_balance_score = food_group_balance_score
        top_average_shortest_path_score = average_shortest_path_score
        top_flavor_balance_score = flavor_balance_score
        top_texture_balance_score = texture_balance_score
        top_all_clashing_pairs_score = all_clashing_pairs_score
        top_pair_strength_score = pair_strength_score
#         top_upper_pairs = upper_pairs
#         top_lower_pairs = lower_pairs
        top_lc_pairs = lower_category_pairs
        top_ld_pairs = lower_direct_pairs
        top_uc_pairs = upper_category_pairs
        top_ud_pairs = upper_direct_pairs
        top_selected_ingredients = selected_ingredients
        top_average_shortest_path_length = average_shortest_path_length
#         top_upper_proportion = len(upper_pairs) / (len(upper_pairs) + len(lower_pairs))
# print('TOP AVG SHORTEST PATH LENGTH', top_average_shortest_path_length)
# print('TOP UPPER PROPORTION', top_upper_proportion)
print('RAW AVERAGE SHORTEST PATH SCORE', top_average_shortest_path_score)
print('RAW PAIR STRENGTH SCORE', top_pair_strength_score)
print('RAW ALL CLASHING PAIRS SCORE', top_all_clashing_pairs_score)
print('RAW FLAVOR BALANCE SCORE', top_flavor_balance_score)
print('RAW TEXTURE BALANCE SCORE', top_texture_balance_score)
print('RAW FOOD GROUP BALANCE SCORE', top_food_group_balance_score)
print('TOTAL SCORE', top_score)            
    
top_net = net.Network(notebook=True)

nodes = top_selected_ingredients['name'].tolist()

def get_color(row):
#     print(type(row))
#     print(row)
    if row['salad_green'] == 'y':
        return 'lightgreen'
    elif row['salad_extra'] == 'y':
        if row['veg'] == 'y':
            return 'green'
        elif row['fruit'] == 'y':
            return 'orange'
        elif row['protein_nut_seed'] == 'y':
            return 'brown'
        else:
            return 'lightblue'
    elif row['salad_dressing'] == 'y':
        return 'lightgrey'
    
nodes_color = top_selected_ingredients.apply(get_color, axis=1).tolist()

top_net.add_nodes(
    nodes=nodes,
    color=nodes_color
)

for pair in top_lc_pairs:
    top_net.add_edge(pair[0], pair[1], physics=False, color='lightgrey')

for pair in top_ld_pairs:
    top_net.add_edge(pair[0], pair[1], physics=False, color='grey')
    
for pair in top_uc_pairs:
    top_net.add_edge(pair[0], pair[1], color='darkgrey')
    
for pair in top_ud_pairs:
    top_net.add_edge(pair[0], pair[1], color='black')

vegan = top_selected_ingredients['not_vegan'].sum() == ''
gluten_free = top_selected_ingredients['gluten'].sum() == ''
# print(vegan, gluten_free)

top_net.show('top_net.html')

## 5.3. Random control

In [339]:
# ALGORITHM B (random control)
n_greens = random.randrange(1, 4)
n_extras = random.randrange(0, 10)
n_dressing_oils = 1
n_dressing_vinegars = 1
n_dressing_salts = 1
n_dressing_peppers = 1
# n_dressing_garlics = random.randrange(0, 2) # maybe make presence dependent on the rest. or, just leave out for now.

selected_greens = salad_greens.sample(n_greens)
selected_extras = salad_extras.sample(n_extras)
selected_dressing_oils = salad_dressing_oils.sample(n_dressing_oils)
selected_dressing_vinegars = salad_dressing_vinegars.sample(n_dressing_vinegars)
selected_dressing_salts = salad_dressing_salts.sample(n_dressing_salts)
selected_dressing_peppers = salad_dressing_peppers.sample(n_dressing_peppers)

selected_ingredients = selected_greens.append(selected_extras).append(selected_dressing_oils).append(selected_dressing_vinegars).append(selected_dressing_salts).append(selected_dressing_peppers)

upper_pairs = []
lower_pairs = []
for ingredient_name in selected_ingredients['name']:
    for pairs_with_name in pairing_data['upper_pairs_with_names'][pairing_data['name'] == ingredient_name].iloc[0]:
        if pairs_with_name in selected_ingredients['name'].values.tolist():
#             print('UPPER PAIR:', ingredient_name, 'WITH', pairs_with_name)
            upper_pairs.append([ingredient_name, pairs_with_name])
    for pairs_with_name in pairing_data['lower_pairs_with_names'][pairing_data['name'] == ingredient_name].iloc[0]:
        if pairs_with_name in selected_ingredients['name'].values.tolist():
#             print('LOWER PAIR:', ingredient_name, 'WITH', pairs_with_name)
            lower_pairs.append([ingredient_name, pairs_with_name])

score = (len(upper_pairs) * 3 + len(lower_pairs)) / (n_greens + n_extras)
print('RANDOM SCORE', score)



random_net = net.Network(notebook=True)

nodes = selected_ingredients['name'].tolist()

def get_color(row):
#     print(type(row))
#     print(row)
    if row['salad_green'] == 'y':
        return 'lightgreen'
    elif row['salad_extra'] == 'y':
        if row['veg'] == 'y':
            return 'green'
        elif row['fruit'] == 'y':
            return 'orange'
        elif row['protein_nut_seed'] == 'y':
            return 'brown'
        else:
            return 'lightblue'
    elif row['salad_dressing'] == 'y':
        return 'lightgrey'
    
nodes_color = selected_ingredients.apply(get_color, axis=1).tolist()

random_net.add_nodes(
    nodes=nodes,
    color=nodes_color
)


for pair in upper_pairs:
    random_net.add_edge(pair[0], pair[1], color='black')

for pair in lower_pairs:
    random_net.add_edge(pair[0], pair[1], physics=False, color='lightgrey')

random_net.show('selected_net.html')

RANDOM SCORE 11.2


### 5.4. Special ingredient

In [50]:
salad_flavor_data.index

Int64Index([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,
            ...
            141, 142, 143, 144, 145, 146, 147, 148, 149, 150],
           dtype='int64', length=151)

## 5.5. Flavor tool generator

In [31]:
# !pip install networkx
# !pip install pyvis

import networkx as nx
from pyvis import network as net

In [178]:
salad_greens = salad_data[salad_data['salad_green'] == 'y']

salad_extras = salad_data[salad_data['salad_extra'] == 'y']
salad_extra_veg = salad_data[(salad_data['veg'] == 'y') & (salad_data['salad_extra'] == 'y')]
salad_extra_fruits = salad_data[(salad_data['fruit'] == 'y') & (salad_data['salad_extra'] == 'y')]
salad_extra_nuts_seeds = salad_data[(salad_data['protein_nut_seed'] == 'y') & (salad_data['salad_extra'] == 'y')]
salad_extra_others = salad_data[(salad_data['protein_nut_seed'] != 'y') & (salad_data['fruit'] != 'y') & (salad_data['veg'] != 'y') & (salad_data['salad_extra'] == 'y')]
salad_extra_nuts = salad_data[(salad_data['protein_seed'] == 'y') & (salad_data['salad_extra'] == 'y')]
salad_extra_seeds = salad_data[(salad_data['protein_nut'] == 'y') & (salad_data['salad_extra'] == 'y')]
salad_extra_tomatoes = salad_data[salad_data['salad_extra_tomato'] == 'y']
salad_extra_olives = salad_data[salad_data['salad_extra_olive'] == 'y']
salad_extra_cheeses = salad_data[salad_data['salad_extra_cheese'] == 'y']
salad_extra_eggs = salad_data[salad_data['salad_extra_egg'] == 'y']
salad_extra_croutons = salad_data[salad_data['salad_extra_crouton'] == 'y']

salad_dressing_oils = salad_data[salad_data['salad_dressing_oil'] == 'y']
salad_dressing_vinegars = salad_data[salad_data['salad_dressing_vinegar'] == 'y']
salad_dressing_salts = salad_data[salad_data['salad_dressing_salt'] == 'y']
salad_dressing_peppers = salad_data[salad_data['salad_dressing_pepper'] == 'y']
salad_dressing_garlics = salad_data[salad_data['salad_dressing_garlic'] == 'y']

In [184]:
n_locked = random.randrange(2, 6)
locked = salad_data.sample(n_locked)
print(locked['name'])

locked_greens = locked[locked['salad_green'] == 'y']

locked_extras = locked[locked['salad_extra'] == 'y']
# locked_extra_veg = salad_data[(salad_data['veg'] == 'y') & (salad_data['salad_extra'] == 'y')]
# locked_extra_fruits = salad_data[(salad_data['fruit'] == 'y') & (salad_data['salad_extra'] == 'y')]
# locked_extra_nuts_seeds = salad_data[(salad_data['protein_nut_seed'] == 'y') & (salad_data['salad_extra'] == 'y')]
# locked_extra_others = salad_data[(salad_data['protein_nut_seed'] != 'y') & (salad_data['fruit'] != 'y') & (salad_data['veg'] != 'y') & (salad_data['salad_extra'] == 'y')]
# locked_locked_greensextra_nuts = salad_data[(salad_data['protein_seed'] == 'y') & (salad_data['salad_extra'] == 'y')]
# locked_extra_seeds = salad_data[(salad_data['protein_nut'] == 'y') & (salad_data['salad_extra'] == 'y')]
# locked_extra_tomatoes = salad_data[salad_data['salad_extra_tomato'] == 'y']
# locked_extra_olives = salad_data[salad_data['salad_extra_olive'] == 'y']
# locked_extra_cheeses = salad_data[salad_data['salad_extra_cheese'] == 'y']
# locked_extra_eggs = salad_data[salad_data['salad_extra_egg'] == 'y']
# locked_extra_croutons = salad_data[salad_data['salad_extra_crouton'] == 'y']

locked_dressing_oils = locked[locked['salad_dressing_oil'] == 'y']
locked_dressing_vinegars = locked[locked['salad_dressing_vinegar'] == 'y']
locked_dressing_salts = locked[locked['salad_dressing_salt'] == 'y']
locked_dressing_peppers = locked[locked['salad_dressing_pepper'] == 'y']
# locked_dressing_garlics = salad_data[salad_data['salad_dressing_garlic'] == 'y']

the_rest = salad_data[~salad_data['name'].isin(locked['name'])]

34                    CABBAGE, RED
109                         PECANS
127                  SALT, TRUFFLE
8                 SPROUTS, ALFALFA
61     FIGS (see also FIGS, DRIED)
Name: name, dtype: object


In [217]:
n_gen_greens_min = max(2-len(locked_greens), 0)
n_gen_greens_max = max(3-len(locked_greens), 0)
n_gen_extras_min = max(2-len(locked_extras), 0)
n_gen_extras_max = max(4-len(locked_extras), 0)


n_gen_greens = random.randrange(n_gen_greens_min, n_gen_greens_max+1)
# print('n_locked_greens', len(locked_greens))
# print('n_gen_greens', n_gen_greens)
# print('n_greens', n_gen_greens + len(locked_greens))
n_gen_extras = random.randrange(n_gen_extras_min, n_gen_extras_max+1)
# print('n_locked_extras', len(locked_extras))
# print('n_gen_extras', n_gen_extras)
# print('n_extras', n_gen_extras + len(locked_extras))

top_score = 0
for i in range(100):
#     print('ITERATION', i)
    n_subgraphs = 2
    while n_subgraphs > 1: # keep shuffling until you get a well connected graph
        n_gen_greens = random.randrange(n_gen_greens_min, n_gen_greens_max+1)
        
        n_gen_extras = random.randrange(n_gen_extras_min, n_gen_extras_max+1)
        
        n_gen_dressing_oils = max(1-len(locked_dressing_oils), 0)
        n_gen_dressing_vinegars = max(1-len(locked_dressing_vinegars), 0)
        n_gen_dressing_salts = max(1-len(locked_dressing_salts), 0)
        n_gen_dressing_peppers = max(1-len(locked_dressing_oils), 0)
        
        selected_greens = locked_greens.append(the_rest.sample(n_gen_greens))
        selected_extras = locked_extras.append(the_rest.sample(n_gen_extras))
        selected_dressing_oils = locked_dressing_oils.append(the_rest.sample(n_gen_dressing_oils))
        selected_dressing_vinegars = locked_dressing_vinegars.append(the_rest.sample(n_gen_dressing_vinegars))
        selected_dressing_salts = locked_dressing_salts.append(the_rest.sample(n_gen_dressing_salts))
        selected_dressing_peppers = locked_dressing_peppers.append(the_rest.sample(n_gen_dressing_peppers))
        selected_ingredients = selected_greens.append(selected_extras).append(selected_dressing_oils).append(selected_dressing_vinegars).append(selected_dressing_salts).append(selected_dressing_peppers)

        lower_category_pairs = []
        lower_direct_pairs = []
        upper_category_pairs = []
        upper_direct_pairs = []
        ingredients_list = selected_ingredients['name'].values.tolist()
        already_checked = []
        for ingredient_name in ingredients_list:
#             print('ingredient_name', ingredient_name)
            for lc_name in pairing_data['lower_category_names'][pairing_data['name'] == ingredient_name].iloc[0]:
#                 print(lc_name)
                if lc_name in ingredients_list and not lc_name in already_checked:
                    lower_category_pairs.append([ingredient_name, lc_name])
            for ld_name in pairing_data['lower_direct_names'][pairing_data['name'] == ingredient_name].iloc[0]:
                if ld_name in ingredients_list and not ld_name in already_checked:
                    lower_direct_pairs.append([ingredient_name, ld_name])
            for uc_name in pairing_data['upper_category_names'][pairing_data['name'] == ingredient_name].iloc[0]:
                if uc_name in ingredients_list and not uc_name in already_checked:
                    upper_category_pairs.append([ingredient_name, uc_name])
            for ud_name in pairing_data['upper_direct_names'][pairing_data['name'] == ingredient_name].iloc[0]:
                if ud_name in ingredients_list and not ud_name in already_checked:
                    upper_direct_pairs.append([ingredient_name, ud_name])
            already_checked.append(ingredient_name)

        lower_pairs = lower_category_pairs + lower_direct_pairs
        upper_pairs = upper_category_pairs + upper_direct_pairs
        all_pairs = lower_pairs + upper_pairs
#         print(lower_pairs, upper_pairs, all_pairs)
        
#         all_pairs_sp = [tuple(sorted(pair)) for pair in all_pairs]
#         print(len(all_pairs_sp), len(list(set(all_pairs_sp))))

#         print('INGREDIENTS', ingredients_list)
#         print()
#         print('LC PAIRS', lower_category_pairs)
#         print()
#         print()

        G = nx.Graph()
        G.add_nodes_from(selected_ingredients['name'].values.tolist())
        G.add_edges_from(all_pairs)
        n_subgraphs = len(list(nx.connected_component_subgraphs(G)))
#         print(G.edges())
#         print(n_subgraphs)
            
    score = 0

# SIMPLER ALTERNATIVE: bonus for proportion of actual pairs over possible pairs? could then combine with pair strength bonus?
# PAIRING DENSITY BONUS ============================================================================================
    # ranges from roughly (.1 to 1) * 4
    average_shortest_path_length = nx.average_shortest_path_length(G)
    average_shortest_path_score = 2.5 / average_shortest_path_length - 1.1
#     print(average_shortest_path_score)
    score += average_shortest_path_score * 4    
    
# # UPPER PAIRING BONUS ==============================================================================================
#     # ranges from roughly (.25 to 1) * 3
#     upper_proportion_score = len(upper_pairs) / len(all_pairs) * 2.5 # messed with this, not sure if it still works
# #     print(upper_proportion_score)
# #     print(len(upper_pairs), len(lower_pairs), len(all_pairs))
# #     print()
#     score += upper_proportion_score * 3

# PAIR STRENGTH BONUS ==============================================================================================
    # ranges from roughly (0 to 1) * 3

    # I'm thinking of 'lower category' as default, and awarding points for steps up from that
    ld_bonus = 1*len(lower_direct_pairs)
    uc_bonus = 2*len(upper_category_pairs)
    ud_bonus = 5*len(upper_direct_pairs)
    pair_strength_score = .6*(ld_bonus + uc_bonus + ud_bonus)/len(all_pairs) # otherwise would tend toward large salads
#     print('LC', len(lower_category_pairs))
#     print('LD', len(lower_direct_pairs))
#     print('UC', len(upper_category_pairs))
#     print('UD', len(upper_direct_pairs))
#     print('SCORE', pair_strength_score)
#     print()
    score += pair_strength_score * 3

# important but easy to avoid, so not weighted too heavily
# CLASH PENALTY ====================================================================================================
    # ranges from roughly (0 to 1) * -1.5
    all_clashing_pairs = []
    selected_ingredients_list = selected_ingredients['name'].values.tolist()
    for name in selected_ingredients_list:
        names_that_clash_with_name = clashes_with_data['all_clashes_with_names'][clashes_with_data['name'] == name].iloc[0]
        all_clashing_names = set(selected_ingredients_list).intersection(set(names_that_clash_with_name)) # selected names that clash with this selected name
        all_clashing_pairs += [tuple(sorted([name, all_clashing_name])) for all_clashing_name in all_clashing_names]

    all_clashing_pairs = list(set(all_clashing_pairs))
    all_clashing_pairs_score = len(list(all_clashing_pairs)) / 4
    score += len(all_clashing_pairs) * -1.5

# # FRUIT BONUS ======================================================================================================
#     # ranges from roughly (0 to 3) * .1
#     n_fruit = len(selected_ingredients[selected_ingredients['fruit'] == 'y'])
#     score += n_fruit * .1
    
# # NUT SEED BONUS ===================================================================================================    
#     # ranges from roughly (0 to 2) * .15
#     n_nut_seed = len(selected_ingredients[selected_ingredients['protein_nut_seed'] == 'y'])
#     score += n_nut_seed * .15

# FLAVOR BALANCE BONUS =============================================================================================
    # ranges from roughly (0 to 1) * 1
    n_sweet_lower = len(selected_ingredients[selected_ingredients['sweet'] == 'y'])
    n_sweet_upper = len(selected_ingredients[selected_ingredients['sweet'] == 'Y'])
    n_salty_lower = len(selected_ingredients[selected_ingredients['salty'] == 'y'])
    n_salty_upper = len(selected_ingredients[selected_ingredients['salty'] == 'Y'])
    n_sour_lower = len(selected_ingredients[selected_ingredients['sour'] == 'y'])
    n_sour_upper = len(selected_ingredients[selected_ingredients['sour'] == 'Y'])
    n_savory_lower = len(selected_ingredients[selected_ingredients['savory'] == 'y'])
    n_savory_upper = len(selected_ingredients[selected_ingredients['savory'] == 'Y'])
    n_bitter_lower = len(selected_ingredients[selected_ingredients['bitter'] == 'y'])
    n_bitter_upper = len(selected_ingredients[selected_ingredients['bitter'] == 'Y'])
    n_spicy_lower = len(selected_ingredients[selected_ingredients['spicy'] == 'y'])
    n_spicy_upper = len(selected_ingredients[selected_ingredients['spicy'] == 'Y'])
    
    # each varies from roughly .5 to 1
    sweet_score = (n_sweet_lower/2 + n_sweet_upper)/5
    salty_score = (n_salty_lower/2 + n_salty_upper)*2/5
    sour_score = (n_sour_lower/2 + n_sour_upper)*2/5
    savory_score = (n_savory_lower/2 + n_savory_upper)*3/5
    bitter_score = (n_bitter_lower/2 + n_bitter_upper)*3/5
    spicy_score = (n_spicy_lower/2 + n_spicy_upper)*2/5
    
    flavor_balance_score = 5 / (1 + abs(1-sweet_score) + abs(1-salty_score) + abs(1-sour_score) + abs(1-savory_score) + abs(1-spicy_score)) - 1.25
#     print(flavor_balance_score)
    
    score += flavor_balance_score
#     print(sweet_score, salty_score, sour_score, savory_score, bitter_score, spicy_score)
#     print()
    
#     print(n_sweet_lower, n_sweet_upper)
#     print(n_salty_lower, n_salty_upper)
#     print(n_sour_lower, n_sour_upper)
#     print(n_savory_lower, n_savory_upper)
#     print(n_bitter_lower, n_bitter_upper)
#     print(n_spicy_lower, n_spicy_upper)
#     print()

# TEXTURE BALANCE BONUS ============================================================================================
    # ranges from roughly (0 to 1) * .75
    n_crunchy_lower = len(selected_ingredients[selected_ingredients['salad_crunchy'] == 'y'])
    n_crunchy_upper = len(selected_ingredients[selected_ingredients['salad_crunchy'] == 'Y'])
    n_chewy_lower = len(selected_ingredients[selected_ingredients['salad_chewy'] == 'y'])
    n_chewy_upper = len(selected_ingredients[selected_ingredients['salad_chewy'] == 'Y'])
    n_juicy_lower = len(selected_ingredients[selected_ingredients['salad_juicy'] == 'y'])
    n_juicy_upper = len(selected_ingredients[selected_ingredients['salad_juicy'] == 'Y'])
    
    # each ranges from roughly 0 to 1
    crunchy_score = (n_crunchy_lower/2 + n_crunchy_upper)/3
    chewy_score = (n_chewy_lower/2 + n_chewy_upper)
    juicy_score = (n_juicy_lower/2 + n_juicy_upper)/3
#     print(crunchy_score, chewy_score, juicy_score)
    
    texture_balance_score = 4 / (1 + abs(1-crunchy_score) + abs(1-chewy_score) + abs(1-juicy_score)) - 1
#     print(texture_balance_score)
#     print()
    
    score += texture_balance_score * .75

# seems like it's hard to balance food groups on top of everything else. pity the scores aren't more independent
# FOOD GROUP BALANCE BONUS =========================================================================================
    # ranges from roughly (25 to 1) * 2
    n_fruit = len(selected_ingredients[selected_ingredients['fruit'] == 'y'])
    n_veg = len(selected_ingredients[selected_ingredients['veg'] == 'y'])
    n_protein = len(selected_ingredients[selected_ingredients['protein'] == 'y'])
    
    # each varies from roughly 0 to 1 (sometimes a little over)
    fruit_score = n_fruit / 3
    veg_score = n_veg / 5
    protein_score = n_protein / 3
#     print(fruit_score, veg_score, protein_score)
    
    food_group_balance_score = 3 / (1 + abs(1-fruit_score) + abs(1-veg_score) + abs(1-protein_score)) - .75
#     print(food_group_balance_score)
#     print()
    
    score += food_group_balance_score * 2
    
    if score > top_score:
        top_score = score
        top_food_group_balance_score = food_group_balance_score
        top_average_shortest_path_score = average_shortest_path_score
        top_flavor_balance_score = flavor_balance_score
        top_texture_balance_score = texture_balance_score
        top_all_clashing_pairs_score = all_clashing_pairs_score
        top_pair_strength_score = pair_strength_score
#         top_upper_pairs = upper_pairs
#         top_lower_pairs = lower_pairs
        top_lc_pairs = lower_category_pairs
        top_ld_pairs = lower_direct_pairs
        top_uc_pairs = upper_category_pairs
        top_ud_pairs = upper_direct_pairs
        top_selected_ingredients = selected_ingredients
        top_average_shortest_path_length = average_shortest_path_length
#         top_upper_proportion = len(upper_pairs) / (len(upper_pairs) + len(lower_pairs))
# print('TOP AVG SHORTEST PATH LENGTH', top_average_shortest_path_length)
# print('TOP UPPER PROPORTION', top_upper_proportion)
print('RAW AVERAGE SHORTEST PATH SCORE', top_average_shortest_path_score)
print('RAW PAIR STRENGTH SCORE', top_pair_strength_score)
print('RAW ALL CLASHING PAIRS SCORE', top_all_clashing_pairs_score)
print('RAW FLAVOR BALANCE SCORE', top_flavor_balance_score)
print('RAW TEXTURE BALANCE SCORE', top_texture_balance_score)
print('RAW FOOD GROUP BALANCE SCORE', top_food_group_balance_score)
print('TOTAL SCORE', top_score)            
    
top_net = net.Network(notebook=True)

nodes = top_selected_ingredients['name'].tolist()

def get_color(row):
#     print(type(row))
#     print(row)
    if row['salad_green'] == 'y':
        return 'lightgreen'
    elif row['salad_extra'] == 'y':
        if row['veg'] == 'y':
            return 'green'
        elif row['fruit'] == 'y':
            return 'orange'
        elif row['protein_nut_seed'] == 'y':
            return 'brown'
        else:
            return 'lightblue'
    elif row['salad_dressing'] == 'y':
        return 'lightgrey'
    
nodes_color = top_selected_ingredients.apply(get_color, axis=1).tolist()

top_net.add_nodes(
    nodes=nodes,
    color=nodes_color
)

for pair in top_lc_pairs:
    top_net.add_edge(pair[0], pair[1], physics=False, color='lightgrey')

for pair in top_ld_pairs:
    top_net.add_edge(pair[0], pair[1], physics=False, color='grey')
    
for pair in top_uc_pairs:
    top_net.add_edge(pair[0], pair[1], color='darkgrey')
    
for pair in top_ud_pairs:
    top_net.add_edge(pair[0], pair[1], color='black')

vegan = top_selected_ingredients['not_vegan'].sum() == ''
gluten_free = top_selected_ingredients['gluten'].sum() == ''
# print(vegan, gluten_free)

top_net.show('top_net.html')

RAW AVERAGE SHORTEST PATH SCORE 0.6857142857142857
RAW PAIR STRENGTH SCORE 0.6444444444444444
RAW ALL CLASHING PAIRS SCORE 0.25
RAW FLAVOR BALANCE SCORE 0.8333333333333335
RAW TEXTURE BALANCE SCORE 1.0
RAW FOOD GROUP BALANCE SCORE 1.3928571428571428
TOTAL SCORE 7.545238095238095


In [290]:
n_gen_greens_min = max(2-len(locked_greens), 0)
n_gen_greens_max = max(3-len(locked_greens), 0)
n_gen_greens = random.randrange(n_gen_greens_min, n_gen_greens_max+1)

n_gen_extras_min = max(2-len(locked_extras), 0)
n_gen_extras_max = max(4-len(locked_extras), 0)
n_gen_extras = random.randrange(n_gen_extras_min, n_gen_extras_max+1)

top_score = 0
for i in range(100):
    n_gen_greens = random.randrange(n_gen_greens_min, n_gen_greens_max+1)

    n_gen_extras = random.randrange(n_gen_extras_min, n_gen_extras_max+1)

    n_gen_dressing_oils = max(1-len(locked_dressing_oils), 0)
    n_gen_dressing_vinegars = max(1-len(locked_dressing_vinegars), 0)
    n_gen_dressing_salts = max(1-len(locked_dressing_salts), 0)
    n_gen_dressing_peppers = max(1-len(locked_dressing_oils), 0)

    selected_greens = locked_greens.append(the_rest.sample(n_gen_greens))
    selected_extras = locked_extras.append(the_rest.sample(n_gen_extras))
    selected_dressing_oils = locked_dressing_oils.append(the_rest.sample(n_gen_dressing_oils))
    selected_dressing_vinegars = locked_dressing_vinegars.append(the_rest.sample(n_gen_dressing_vinegars))
    selected_dressing_salts = locked_dressing_salts.append(the_rest.sample(n_gen_dressing_salts))
    selected_dressing_peppers = locked_dressing_peppers.append(the_rest.sample(n_gen_dressing_peppers))
    selected_ingredients = selected_greens.append(selected_extras).append(selected_dressing_oils).append(selected_dressing_vinegars).append(selected_dressing_salts).append(selected_dressing_peppers)

    lower_category_pairs = []
    lower_direct_pairs = []
    upper_category_pairs = []
    upper_direct_pairs = []
    ingredients_list = selected_ingredients['name'].values.tolist()
    already_checked = []
    for ingredient_name in ingredients_list:
#             print('ingredient_name', ingredient_name)
        for lc_name in pairing_data['lower_category_names'][pairing_data['name'] == ingredient_name].iloc[0]:
#                 print(lc_name)
            if lc_name in ingredients_list and not lc_name in already_checked:
                lower_category_pairs.append([ingredient_name, lc_name])
        for ld_name in pairing_data['lower_direct_names'][pairing_data['name'] == ingredient_name].iloc[0]:
            if ld_name in ingredients_list and not ld_name in already_checked:
                lower_direct_pairs.append([ingredient_name, ld_name])
        for uc_name in pairing_data['upper_category_names'][pairing_data['name'] == ingredient_name].iloc[0]:
            if uc_name in ingredients_list and not uc_name in already_checked:
                upper_category_pairs.append([ingredient_name, uc_name])
        for ud_name in pairing_data['upper_direct_names'][pairing_data['name'] == ingredient_name].iloc[0]:
            if ud_name in ingredients_list and not ud_name in already_checked:
                upper_direct_pairs.append([ingredient_name, ud_name])
        already_checked.append(ingredient_name)

    lower_pairs = lower_category_pairs + lower_direct_pairs
    upper_pairs = upper_category_pairs + upper_direct_pairs
    all_pairs = lower_pairs + upper_pairs
            
    score = 0

# SIMPLER ALTERNATIVE: bonus for proportion of actual pairs over possible pairs? could then combine with pair strength bonus?
# PAIRING DENSITY BONUS ============================================================================================
    # ranges from roughly (.1 to 1) * 4
#     average_shortest_path_length = nx.average_shortest_path_length(G)
#     average_shortest_path_score = 2.5 / average_shortest_path_length - 1.1
# #     print(average_shortest_path_score)
#     score += average_shortest_path_score * 4    
    
# # UPPER PAIRING BONUS ==============================================================================================
#     # ranges from roughly (.25 to 1) * 3
#     upper_proportion_score = len(upper_pairs) / len(all_pairs) * 2.5 # messed with this, not sure if it still works
# #     print(upper_proportion_score)
# #     print(len(upper_pairs), len(lower_pairs), len(all_pairs))
# #     print()
#     score += upper_proportion_score * 3

# PAIR STRENGTH BONUS ==============================================================================================
    # ranges from roughly (0 to 1) * 3

    # I'm thinking of 'lower category' as default, and awarding points for steps up from that
    ld_bonus = 1*len(lower_direct_pairs)
    uc_bonus = 2*len(upper_category_pairs)
    ud_bonus = 5*len(upper_direct_pairs)
    pair_strength_score = .6*(ld_bonus + uc_bonus + ud_bonus)/len(all_pairs) # otherwise would tend toward large salads
#     print('LC', len(lower_category_pairs))
#     print('LD', len(lower_direct_pairs))
#     print('UC', len(upper_category_pairs))
#     print('UD', len(upper_direct_pairs))
#     print('SCORE', pair_strength_score)
#     print()
    score += pair_strength_score * 3

# important but easy to avoid, so not weighted too heavily
# CLASH PENALTY ====================================================================================================
    # ranges from roughly (0 to 1) * -1.5
    all_clashing_pairs = []
    selected_ingredients_list = selected_ingredients['name'].values.tolist()
    for name in selected_ingredients_list:
        names_that_clash_with_name = clashes_with_data['all_clashes_with_names'][clashes_with_data['name'] == name].iloc[0]
        all_clashing_names = set(selected_ingredients_list).intersection(set(names_that_clash_with_name)) # selected names that clash with this selected name
        all_clashing_pairs += [tuple(sorted([name, all_clashing_name])) for all_clashing_name in all_clashing_names]

    all_clashing_pairs = list(set(all_clashing_pairs))
    all_clashing_pairs_score = len(list(all_clashing_pairs)) / 4
    score += len(all_clashing_pairs) * -1.5

# # FRUIT BONUS ======================================================================================================
#     # ranges from roughly (0 to 3) * .1
#     n_fruit = len(selected_ingredients[selected_ingredients['fruit'] == 'y'])
#     score += n_fruit * .1
    
# # NUT SEED BONUS ===================================================================================================    
#     # ranges from roughly (0 to 2) * .15
#     n_nut_seed = len(selected_ingredients[selected_ingredients['protein_nut_seed'] == 'y'])
#     score += n_nut_seed * .15

# FLAVOR BALANCE BONUS =============================================================================================
    # ranges from roughly (0 to 1) * 1
    n_sweet_lower = len(selected_ingredients[selected_ingredients['sweet'] == 'y'])
    n_sweet_upper = len(selected_ingredients[selected_ingredients['sweet'] == 'Y'])
    n_salty_lower = len(selected_ingredients[selected_ingredients['salty'] == 'y'])
    n_salty_upper = len(selected_ingredients[selected_ingredients['salty'] == 'Y'])
    n_sour_lower = len(selected_ingredients[selected_ingredients['sour'] == 'y'])
    n_sour_upper = len(selected_ingredients[selected_ingredients['sour'] == 'Y'])
    n_savory_lower = len(selected_ingredients[selected_ingredients['savory'] == 'y'])
    n_savory_upper = len(selected_ingredients[selected_ingredients['savory'] == 'Y'])
    n_bitter_lower = len(selected_ingredients[selected_ingredients['bitter'] == 'y'])
    n_bitter_upper = len(selected_ingredients[selected_ingredients['bitter'] == 'Y'])
    n_spicy_lower = len(selected_ingredients[selected_ingredients['spicy'] == 'y'])
    n_spicy_upper = len(selected_ingredients[selected_ingredients['spicy'] == 'Y'])
    
    # each varies from roughly .5 to 1
    sweet_score = (n_sweet_lower/2 + n_sweet_upper)/5
    salty_score = (n_salty_lower/2 + n_salty_upper)*2/5
    sour_score = (n_sour_lower/2 + n_sour_upper)*2/5
    savory_score = (n_savory_lower/2 + n_savory_upper)*3/5
    bitter_score = (n_bitter_lower/2 + n_bitter_upper)*3/5
    spicy_score = (n_spicy_lower/2 + n_spicy_upper)*2/5
    
    flavor_balance_score = 5 / (1 + abs(1-sweet_score) + abs(1-salty_score) + abs(1-sour_score) + abs(1-savory_score) + abs(1-spicy_score)) - 1.25
#     print(flavor_balance_score)
    
    score += flavor_balance_score
#     print(sweet_score, salty_score, sour_score, savory_score, bitter_score, spicy_score)
#     print()
    
#     print(n_sweet_lower, n_sweet_upper)
#     print(n_salty_lower, n_salty_upper)
#     print(n_sour_lower, n_sour_upper)
#     print(n_savory_lower, n_savory_upper)
#     print(n_bitter_lower, n_bitter_upper)
#     print(n_spicy_lower, n_spicy_upper)
#     print()

# TEXTURE BALANCE BONUS ============================================================================================
    # ranges from roughly (0 to 1) * .75
    n_crunchy_lower = len(selected_ingredients[selected_ingredients['salad_crunchy'] == 'y'])
    n_crunchy_upper = len(selected_ingredients[selected_ingredients['salad_crunchy'] == 'Y'])
    n_chewy_lower = len(selected_ingredients[selected_ingredients['salad_chewy'] == 'y'])
    n_chewy_upper = len(selected_ingredients[selected_ingredients['salad_chewy'] == 'Y'])
    n_juicy_lower = len(selected_ingredients[selected_ingredients['salad_juicy'] == 'y'])
    n_juicy_upper = len(selected_ingredients[selected_ingredients['salad_juicy'] == 'Y'])
    
    # each ranges from roughly 0 to 1
    crunchy_score = (n_crunchy_lower/2 + n_crunchy_upper)/3
    chewy_score = (n_chewy_lower/2 + n_chewy_upper)
    juicy_score = (n_juicy_lower/2 + n_juicy_upper)/3
#     print(crunchy_score, chewy_score, juicy_score)
    
    texture_balance_score = 4 / (1 + abs(1-crunchy_score) + abs(1-chewy_score) + abs(1-juicy_score)) - 1
#     print(texture_balance_score)
#     print()
    
    score += texture_balance_score * .75

# seems like it's hard to balance food groups on top of everything else. pity the scores aren't more independent
# FOOD GROUP BALANCE BONUS =========================================================================================
    # ranges from roughly (25 to 1) * 2
    n_fruit = len(selected_ingredients[selected_ingredients['fruit'] == 'y'])
    n_veg = len(selected_ingredients[selected_ingredients['veg'] == 'y'])
    n_protein = len(selected_ingredients[selected_ingredients['protein'] == 'y'])
    
    # each varies from roughly 0 to 1 (sometimes a little over)
    fruit_score = n_fruit / 3
    veg_score = n_veg / 5
    protein_score = n_protein / 3
#     print(fruit_score, veg_score, protein_score)
    
    food_group_balance_score = 3 / (1 + abs(1-fruit_score) + abs(1-veg_score) + abs(1-protein_score)) - .75
#     print(food_group_balance_score)
#     print()
    
    score += food_group_balance_score * 2
    
    if score > top_score:
        top_score = score
        top_food_group_balance_score = food_group_balance_score
        top_average_shortest_path_score = average_shortest_path_score
        top_flavor_balance_score = flavor_balance_score
        top_texture_balance_score = texture_balance_score
        top_all_clashing_pairs_score = all_clashing_pairs_score
        top_pair_strength_score = pair_strength_score
#         top_upper_pairs = upper_pairs
#         top_lower_pairs = lower_pairs
        top_lc_pairs = lower_category_pairs
        top_ld_pairs = lower_direct_pairs
        top_uc_pairs = upper_category_pairs
        top_ud_pairs = upper_direct_pairs
        top_selected_ingredients = selected_ingredients
        top_average_shortest_path_length = average_shortest_path_length
#         top_upper_proportion = len(upper_pairs) / (len(upper_pairs) + len(lower_pairs))
# print('TOP AVG SHORTEST PATH LENGTH', top_average_shortest_path_length)
# print('TOP UPPER PROPORTION', top_upper_proportion)
print('RAW AVERAGE SHORTEST PATH SCORE', top_average_shortest_path_score)
print('RAW PAIR STRENGTH SCORE', top_pair_strength_score)
print('RAW ALL CLASHING PAIRS SCORE', top_all_clashing_pairs_score)
print('RAW FLAVOR BALANCE SCORE', top_flavor_balance_score)
print('RAW TEXTURE BALANCE SCORE', top_texture_balance_score)
print('RAW FOOD GROUP BALANCE SCORE', top_food_group_balance_score)
print('TOTAL SCORE', top_score)            
    
top_net = net.Network(notebook=True)

nodes = top_selected_ingredients['name'].tolist()

def get_color(row):
#     print(type(row))
#     print(row)
    if row['salad_green'] == 'y':
        return 'lightgreen'
    elif row['salad_extra'] == 'y':
        if row['veg'] == 'y':
            return 'green'
        elif row['fruit'] == 'y':
            return 'orange'
        elif row['protein_nut_seed'] == 'y':
            return 'brown'
        else:
            return 'lightblue'
    elif row['salad_dressing'] == 'y':
        return 'lightgrey'
    
nodes_color = top_selected_ingredients.apply(get_color, axis=1).tolist()

top_net.add_nodes(
    nodes=nodes,
    color=nodes_color
)

for pair in top_lc_pairs:
    top_net.add_edge(pair[0], pair[1], physics=False, color='lightgrey')

for pair in top_ld_pairs:
    top_net.add_edge(pair[0], pair[1], physics=False, color='grey')
    
for pair in top_uc_pairs:
    top_net.add_edge(pair[0], pair[1], color='darkgrey')
    
for pair in top_ud_pairs:
    top_net.add_edge(pair[0], pair[1], color='black')

vegan = top_selected_ingredients['not_vegan'].sum() == ''
gluten_free = top_selected_ingredients['gluten'].sum() == ''
# print(vegan, gluten_free)

top_net.show('top_net.html')

RAW AVERAGE SHORTEST PATH SCORE 0.6307692307692307
RAW PAIR STRENGTH SCORE 0.5172413793103449
RAW ALL CLASHING PAIRS SCORE 0.5
RAW FLAVOR BALANCE SCORE 0.3125000000000002
RAW TEXTURE BALANCE SCORE 1.9999999999999996
RAW FOOD GROUP BALANCE SCORE 1.206521739130435
TOTAL SCORE 2.7772676161919048


In [28]:
n_locked = random.randrange(0, 1)
locked = salad_data.sample(n_locked)
locked_greens = locked[locked['salad_green'] == 'y']
locked_extras = locked[locked['salad_extra'] == 'y']
locked_dressing_oils = locked[locked['salad_dressing_oil'] == 'y']
locked_dressing_vinegars = locked[locked['salad_dressing_vinegar'] == 'y']
locked_dressing_salts = locked[locked['salad_dressing_salt'] == 'y']
locked_dressing_peppers = locked[locked['salad_dressing_pepper'] == 'y']

the_rest = salad_data[~salad_data['name'].isin(locked['name'])]
the_rest_greens = the_rest[the_rest['salad_green'] == 'y']
the_rest_extras = the_rest[the_rest['salad_extra'] == 'y']
the_rest_dressing_oils = the_rest[the_rest['salad_dressing_oil'] == 'y']
the_rest_dressing_vinegars = the_rest[the_rest['salad_dressing_vinegar'] == 'y']
the_rest_dressing_salts = the_rest[the_rest['salad_dressing_salt'] == 'y']
the_rest_dressing_peppers = the_rest[the_rest['salad_dressing_pepper'] == 'y']

n_gen_greens_min = max(2-len(locked_greens), 0)
n_gen_greens_max = max(3-len(locked_greens), 0)
n_gen_greens = random.randrange(n_gen_greens_min, n_gen_greens_max+1)

n_gen_extras_min = max(2-len(locked_extras), 0)
n_gen_extras_max = max(4-len(locked_extras), 0)

In [29]:
locked['name']

Series([], Name: name, dtype: object)

In [33]:
# account for if connected subgraph is impossible
# naming conventions (e.g. for clashing, pairing pairs) kinda wonky

top_score = 0
for try_i in range(100):
    n_subgraphs = 2
    while n_subgraphs > 1: # keep shuffling until you get a well connected graph
        
        n_gen_greens = random.randrange(n_gen_greens_min, n_gen_greens_max+1)

        n_gen_extras = random.randrange(n_gen_extras_min, n_gen_extras_max+1)

        n_gen_dressing_oils = max(1-len(locked_dressing_oils), 0)
        n_gen_dressing_vinegars = max(1-len(locked_dressing_vinegars), 0)
        n_gen_dressing_salts = max(1-len(locked_dressing_salts), 0)
        n_gen_dressing_peppers = max(1-len(locked_dressing_oils), 0)

        selected_greens = locked_greens.append(the_rest_greens.sample(n_gen_greens))
        selected_extras = locked_extras.append(the_rest_extras.sample(n_gen_extras))
        selected_dressing_oils = locked_dressing_oils.append(the_rest_dressing_oils.sample(n_gen_dressing_oils))
        selected_dressing_vinegars = locked_dressing_vinegars.append(the_rest_dressing_vinegars.sample(n_gen_dressing_vinegars))
        selected_dressing_salts = locked_dressing_salts.append(the_rest_dressing_salts.sample(n_gen_dressing_salts))
        selected_dressing_peppers = locked_dressing_peppers.append(the_rest_dressing_peppers.sample(n_gen_dressing_peppers))
        selected_ingredients = selected_greens.append(selected_extras).append(selected_dressing_oils).append(selected_dressing_vinegars).append(selected_dressing_salts).append(selected_dressing_peppers)
        selected_names = selected_ingredients['name'].values.tolist()

        lower_category_pairs = []
        lower_direct_pairs = []
        upper_category_pairs = []
        upper_direct_pairs = []
        lower_clashing_pairs = []
        upper_clashing_pairs = []

        # finicky but pretty fast
        for i, col_name in enumerate(selected_names):
            for j, row_name in enumerate(selected_names[i+1:]):
                connection = selected_ingredients[col_name].tolist()[i+1+j] # this is what is finicky
                if connection == 'c':
                    lower_category_pairs.append((col_name, row_name,))
                elif connection == 'd':
                    lower_direct_pairs.append((col_name, row_name,))
                elif connection == 'C':
                    upper_category_pairs.append((col_name, row_name,))
                elif connection == 'D':
                    upper_direct_pairs.append((col_name, row_name,))
                elif connection == 'n':
                    lower_clashing_pairs.append((col_name, row_name,))
                elif connection == 'N':
                    upper_clashing_pairs.append((col_name, row_name,))
        lower_pairs = lower_category_pairs + lower_direct_pairs
        upper_pairs = upper_category_pairs + upper_direct_pairs
        all_pairs = lower_pairs + upper_pairs
        all_clashing_pairs = lower_clashing_pairs + upper_clashing_pairs

        G = nx.Graph()
        G.add_nodes_from(selected_names)
        G.add_edges_from(lower_category_pairs, length=2)
        G.add_edges_from(lower_direct_pairs, length=1.5)
        G.add_edges_from(upper_category_pairs, length=1.2)
        G.add_edges_from(upper_direct_pairs, length=1)
        n_subgraphs = len(list(nx.connected_component_subgraphs(G)))

    score = 0

# PAIRING BONUS ============================================================================================
    # ranges from roughly (0 to 1) * 3, tho could be a lil over or under that range
    average_shortest_path_length = nx.average_shortest_path_length(G, weight='length')
#     print(average_shortest_path_length)
    average_shortest_path_score = 1 / average_shortest_path_length * 4 - 1
#     print(average_shortest_path_length, average_shortest_path_score)
    score += average_shortest_path_score * 3

# important but easy to avoid, so not weighted too heavily
# CLASH PENALTY ====================================================================================================
    # ranges from roughly (0 to 1) * -1.5
#     all_clashing_pairs_score = len(list(all_clashing_pairs)) / 4
    clashing_pairs_score = len(all_clashing_pairs)
#     print(clashing_pairs_score)
    score += clashing_pairs_score * -1.5

# FLAVOR BALANCE BONUS =============================================================================================
    # ranges from roughly (0 to 1) * 1
    n_sweet_lower = (selected_ingredients['sweet'] == 'y').sum()
    n_sweet_upper = (selected_ingredients['sweet'] == 'Y').sum()
    n_salty_lower = (selected_ingredients['salty'] == 'y').sum()
    n_salty_upper = (selected_ingredients['salty'] == 'Y').sum()
    n_sour_lower = (selected_ingredients['sour'] == 'y').sum()
    n_sour_upper = (selected_ingredients['sour'] == 'Y').sum()
    n_savory_lower = (selected_ingredients['savory'] == 'y').sum()
    n_savory_upper = (selected_ingredients['savory'] == 'Y').sum()
    n_bitter_lower = (selected_ingredients['bitter'] == 'y').sum()
    n_bitter_upper = (selected_ingredients['bitter'] == 'Y').sum()
    n_spicy_lower = (selected_ingredients['spicy'] == 'y').sum()
    n_spicy_upper = (selected_ingredients['spicy'] == 'Y').sum()

    # each varies from roughly .5 to 1
    sweet_score = (n_sweet_lower/2 + n_sweet_upper)/5
    salty_score = (n_salty_lower/2 + n_salty_upper)*2/5
    sour_score = (n_sour_lower/2 + n_sour_upper)*2/5
    savory_score = (n_savory_lower/2 + n_savory_upper)*3/5
    bitter_score = (n_bitter_lower/2 + n_bitter_upper)*3/5
    spicy_score = (n_spicy_lower/2 + n_spicy_upper)*2/5

    flavor_balance_score = 5 / (1 + abs(1-sweet_score) + abs(1-salty_score) + abs(1-sour_score) + abs(1-savory_score) + abs(1-spicy_score)) - 1.2
#     print(flavor_balance_score)
    score += flavor_balance_score

# TEXTURE BALANCE BONUS ============================================================================================
    # ranges from roughly (0 to 1) * .75
    n_crunchy_lower = (selected_ingredients['salad_crunchy'] == 'y').sum()
    n_crunchy_upper = (selected_ingredients['salad_crunchy'] == 'Y').sum()
    n_chewy_lower = (selected_ingredients['salad_chewy'] == 'y').sum()
    n_chewy_upper = (selected_ingredients['salad_chewy'] == 'Y').sum()
    n_juicy_lower = (selected_ingredients['salad_juicy'] == 'y').sum()
    n_juicy_upper = (selected_ingredients['salad_juicy'] == 'Y').sum()

    # each ranges from roughly 0 to 1
    crunchy_score = (n_crunchy_lower/2 + n_crunchy_upper)/3
    chewy_score = (n_chewy_lower/2 + n_chewy_upper)
    juicy_score = (n_juicy_lower/2 + n_juicy_upper)/3

    texture_balance_score = 5 / (1 + abs(1-crunchy_score) + abs(1-chewy_score) + abs(1-juicy_score)) - 1.25
#     print(texture_balance_score)
    score += texture_balance_score * .75

# will bias toward larger salads, slightly
# seems like it's hard to balance food groups on top of everything else. pity the scores aren't more independent
# FOOD GROUP BALANCE BONUS =========================================================================================
    # ranges from roughly (0 to 1) * 2
    n_fruit = (selected_ingredients['fruit'] == 'y').sum()
    n_veg = (selected_ingredients['veg'] == 'y').sum()
    n_protein = (selected_ingredients['protein'] == 'y').sum()

    # highest possible score is with 3 fruit, 4 veg, 3 protein (= 2.25). Fruit matters the most, followed by protein, then veg. Usually there are fewer of each than this, tho, so basically rewards more stuff of any?
#     food_group_balance_score = 10 / (1 + 3*abs(3-n_fruit) + abs(4-n_veg) + 2*abs(2-n_protein)) -.7
#     print(n_fruit, n_veg, n_protein, food_group_balance_score)
    
    # /2 for steep diminishing returns (?)
    fruit_score = (n_fruit/2)**.5
    veg_score = (n_veg/2)**.5
    protein_score = (n_protein/2)**.5 #(0->0, 1->1, 4->2, 9->3)
#     print(n_fruit, n_veg, n_protein)
#     print(fruit_score, veg_score, protein_score)
#     print(fruit_score + veg_score + protein_score)
    food_group_balance_score = (3*fruit_score + veg_score + 2*protein_score) * .22 - .33
#     print(food_group_balance_score)
#     print()
    score += food_group_balance_score * 2
    
    if score > top_score:
        top_score = score
        top_food_group_balance_score = food_group_balance_score
        top_flavor_balance_score = flavor_balance_score
        top_texture_balance_score = texture_balance_score
        top_clashing_pairs_score = clashing_pairs_score
        top_lc_pairs = lower_category_pairs
        top_ld_pairs = lower_direct_pairs
        top_uc_pairs = upper_category_pairs
        top_ud_pairs = upper_direct_pairs
        top_selected_ingredients = selected_ingredients
        top_average_shortest_path_score = average_shortest_path_score
print('AVG SHORTEST PATH SCORE', top_average_shortest_path_score)
print('RAW ALL CLASHING PAIRS SCORE', top_all_clashing_pairs_score)
print('RAW FLAVOR BALANCE SCORE', top_flavor_balance_score)
print('RAW TEXTURE BALANCE SCORE', top_texture_balance_score)
print('RAW FOOD GROUP BALANCE SCORE', top_food_group_balance_score)
print('TOTAL SCORE', top_score)         

top_net = net.Network(notebook=True)

nodes = top_selected_ingredients['name'].tolist()

def get_color(row):
    if row['salad_green'] == 'y':
        return 'lightgreen'
    elif row['salad_extra'] == 'y':
        if row['veg'] == 'y':
            return 'green'
        elif row['fruit'] == 'y':
            return 'orange'
        elif row['protein_nut_seed'] == 'y':
            return 'brown'
        else:
            return 'lightblue'
    elif row['salad_dressing'] == 'y':
        return 'lightgrey'

nodes_color = top_selected_ingredients.apply(get_color, axis=1).tolist()

top_net.add_nodes(
    nodes=nodes,
    color=nodes_color
)

for pair in top_lc_pairs:
    top_net.add_edge(pair[0], pair[1], physics=False, color='lightgrey')

for pair in top_ld_pairs:
    top_net.add_edge(pair[0], pair[1], physics=False, color='grey')

for pair in top_uc_pairs:
    top_net.add_edge(pair[0], pair[1], color='darkgrey')

for pair in top_ud_pairs:
    top_net.add_edge(pair[0], pair[1], color='black')

vegan = top_selected_ingredients['not_vegan'].sum() == ''
gluten_free = top_selected_ingredients['gluten'].sum() == ''

top_net.show('top_net.html')

-0.0
-0.0
-0.0
-0.0
-1.5
-0.0
-1.5
-0.0
-0.0
-0.0
-3.0
-0.0
-0.0
-0.0
-6.0
-0.0
-1.5
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-3.0
-3.0
-0.0
-0.0
-3.0
-0.0
-0.0
-3.0
-0.0
-0.0
-1.5
-0.0
-0.0
-0.0
-3.0
-0.0
-1.5
-0.0
-0.0
-1.5
-0.0
-0.0
-0.0
-0.0
-0.0
-1.5
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-3.0
-1.5
-3.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-3.0
-3.0
-0.0
-0.0
-0.0
-1.5
-0.0
-0.0
-3.0
-0.0
-0.0
-1.5
-0.0
-0.0
-1.5
-6.0
-0.0
-0.0
-1.5
-0.0
-3.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-1.5
-0.0
-0.0
-1.5
-0.0
-1.5
AVG SHORTEST PATH SCORE 0.882845188284519


NameError: name 'top_all_clashing_pairs_score' is not defined

In [944]:
lengths = nx.all_shortest_paths(G, weight='weight')
# print('all pairs dijkstra path length', nx.all_pairs_dijkstra_path_length(G))
for i, name_1 in enumerate(selected_names):
    for j, name_2 in enumerate(selected_names[i+1:]):
        print(name_1, name_2, lengths[name_1][name_2], G.get_edge_data(name_1, name_2))

TypeError: all_shortest_paths() missing 2 required positional arguments: 'source' and 'target'

In [705]:
for i, col_name in enumerate(selected_names):
    for j, row_name in enumerate(selected_names[i+1:]):
        match_value = selected_ingredients[col_name].tolist()[j]
        print(col_name, row_name, match_value)
#     print(i, 'COL NAME', col_name)
#     for match_value in selected_ingredients[col_name].tolist()[i+1:]:
#         print
#         print('MATCH VALUE', )

SPROUTS, ALFALFA SPROUTS, LENTIL 
SPROUTS, ALFALFA PECANS c
SPROUTS, ALFALFA LEEKS 
SPROUTS, ALFALFA TOMATOES, SUN-DRIED (or OVEN-DRIED TOMATOES) 
SPROUTS, ALFALFA ONIONS, VIDALIA 
SPROUTS, ALFALFA OIL, OLIVE c
SPROUTS, ALFALFA VINEGAR, BALSAMIC c
SPROUTS, ALFALFA SALT, HIMALAYAN c
SPROUTS, ALFALFA PEPPER, BLACK 
SPROUTS, LENTIL PECANS c
SPROUTS, LENTIL LEEKS 
SPROUTS, LENTIL TOMATOES, SUN-DRIED (or OVEN-DRIED TOMATOES) 
SPROUTS, LENTIL ONIONS, VIDALIA 
SPROUTS, LENTIL OIL, OLIVE 
SPROUTS, LENTIL VINEGAR, BALSAMIC c
SPROUTS, LENTIL SALT, HIMALAYAN c
SPROUTS, LENTIL PEPPER, BLACK c
PECANS LEEKS 
PECANS TOMATOES, SUN-DRIED (or OVEN-DRIED TOMATOES) 
PECANS ONIONS, VIDALIA 
PECANS OIL, OLIVE 
PECANS VINEGAR, BALSAMIC 
PECANS SALT, HIMALAYAN 
PECANS PEPPER, BLACK 
LEEKS TOMATOES, SUN-DRIED (or OVEN-DRIED TOMATOES) 
LEEKS ONIONS, VIDALIA 
LEEKS OIL, OLIVE 
LEEKS VINEGAR, BALSAMIC 
LEEKS SALT, HIMALAYAN 
LEEKS PEPPER, BLACK c
TOMATOES, SUN-DRIED (or OVEN-DRIED TOMATOES) ONIONS, VIDALIA 
TOMAT

In [265]:
n_locked = random.randrange(2, 6)
locked = salad_data.sample(n_locked)
locked_greens = locked[locked['salad_green'] == 'y']
locked_extras = locked[locked['salad_extra'] == 'y']
locked_dressing_oils = locked[locked['salad_dressing_oil'] == 'y']
locked_dressing_vinegars = locked[locked['salad_dressing_vinegar'] == 'y']
locked_dressing_salts = locked[locked['salad_dressing_salt'] == 'y']
locked_dressing_peppers = locked[locked['salad_dressing_pepper'] == 'y']

the_rest = salad_data[~salad_data['name'].isin(locked['name'])]
the_rest_greens = the_rest[the_rest['salad_green'] == 'y']
the_rest_extras = the_rest[the_rest['salad_extra'] == 'y']
the_rest_dressing_oils = the_rest[the_rest['salad_dressing_oil'] == 'y']
the_rest_dressing_vinegars = the_rest[the_rest['salad_dressing_vinegar'] == 'y']
the_rest_dressing_salts = the_rest[the_rest['salad_dressing_salt'] == 'y']
the_rest_dressing_peppers = the_rest[the_rest['salad_dressing_pepper'] == 'y']

n_gen_greens_min = max(2-len(locked_greens), 0)
n_gen_greens_max = max(3-len(locked_greens), 0)
n_gen_greens = random.randrange(n_gen_greens_min, n_gen_greens_max+1)

n_gen_extras_min = max(2-len(locked_extras), 0)
n_gen_extras_max = max(4-len(locked_extras), 0)

n_gen_greens = random.randrange(n_gen_greens_min, n_gen_greens_max+1)

n_gen_extras = random.randrange(n_gen_extras_min, n_gen_extras_max+1)

n_gen_dressing_oils = max(1-len(locked_dressing_oils), 0)
n_gen_dressing_vinegars = max(1-len(locked_dressing_vinegars), 0)
n_gen_dressing_salts = max(1-len(locked_dressing_salts), 0)
n_gen_dressing_peppers = max(1-len(locked_dressing_oils), 0)

selected_greens = locked_greens.append(the_rest_greens.sample(n_gen_greens))
selected_extras = locked_extras.append(the_rest_extras.sample(n_gen_extras))
selected_dressing_oils = locked_dressing_oils.append(the_rest_dressing_oils.sample(n_gen_dressing_oils))
selected_dressing_vinegars = locked_dressing_vinegars.append(the_rest_dressing_vinegars.sample(n_gen_dressing_vinegars))
selected_dressing_salts = locked_dressing_salts.append(the_rest_dressing_salts.sample(n_gen_dressing_salts))
selected_dressing_peppers = locked_dressing_peppers.append(the_rest_dressing_peppers.sample(n_gen_dressing_peppers))
selected_ingredients = selected_greens.append(selected_extras).append(selected_dressing_oils).append(selected_dressing_vinegars).append(selected_dressing_salts).append(selected_dressing_peppers)
selected_names = selected_ingredients['name'].values.tolist()
print()
for name in selected_names:
    print(name)


SPINACH
SPROUTS, PEA
PINEAPPLE
OLIVES, PICHOLINE
BROCCOLI RABE (aka BROCCOLI RAAB or RAPINI)
APPLES (and APPLE CIDER, APPLE JUICE and/or APPLESAUCE)
OIL, OLIVE
VINEGAR, BROWN RICE (aka CHINESE BLACK VINEGAR)
SALT, HIMALAYAN
PEPPER, WHITE


In [391]:
G = nx.Graph()
G.add_nodes_from([1, 2, 3])
G.add_edge(1, 2, length=1)
G.add_edge(2, 3, length=2)
# G.add_weighted_edges_from([(1, 2, 1), (2, 3,)])
# subgraph = list(nx.connected_component_subgraphs(G))[0]
# subgraph.edges()
nx.average_shortest_path_length(G, weight='length')

2.0

In [234]:
pairing_data[selected_names]

Unnamed: 0,"SPROUTS, ALFALFA",WALNUTS,"PEPPER, BLACK","CABBAGE, RED",PECANS,"FIGS (see also FIGS, DRIED)",FLAXSEEDS,PISTACHIOS,"SALT, TRUFFLE",DATES
0,,C,c,,,D,c,c,c,d
1,,,c,,,,c,c,c,
2,c,C,d,c,c,,c,c,c,
3,,C,d,c,d,d,c,d,c,
4,,C,c,c,c,,c,c,c,
...,...,...,...,...,...,...,...,...,...,...
145,,d,c,D,,D,,c,c,c
146,,,,,,c,,,,c
147,d,,c,d,c,D,d,d,c,D
148,,c,d,c,,c,,d,c,


## 6. Creating flavor tool data

In [24]:
salad_flavor_data = salad_data[['staple', 'not_vegan', 'gluten', 'flavoring_sweet', 'flavoring_fresh', 'flavoring_wet', 'flavoring_concentrate', 'flavoring', 'flavoring_dry', 'protein', 'protein_cheese_sub', 'protein_milk_sub', 'protein_meat_sub', 'protein_bean', 'veg', 'veg_leafy', 'grain', 'grain_flour', 'fat_oil', 'oil', 'fat', 'fruit', 'fruit_berry', 'salad_green', 'salad_extra', 'salad_dressing', 'salad_dressing_oil', 'salad_dressing_salt', 'salad_dressing_pepper', 'salad_dressing_garlic', 'salad_dressing_vinegar', 'protein_nut_seed', 'protein_nut', 'protein_seed', 'salad_extra_tomato', 'salad_extra_olive', 'salad_extra_cheese', 'salad_extra_crouton', 'salad_extra_egg', 'salad_extra_other', 'salad_starter', 'salad_allium', 'salty', 'sour', 'spicy', 'bitter', 'savory', 'sweet', 'salad_crunchy', 'salad_chewy', 'salad_juicy', 'salad_basic', 'redirect', 'salad_umbrella', 'name', 'flavor', 'volume', 'pairs_with', 'phonetic', 'techniques', 'dishes', 'tip', 'possible_substitutes', 'flavor_affinities', 'nutritional_profile', 'season', 'botanical_relatives', 'protein_content', 'what_they_are', 'brands', 'vegan_substitutes', 'vegan_brands'] + salad_data['name'].tolist()].copy()
salad_flavor_data['id'] = salad_data.index

selected_pairing_data = pairing_data[['name', 'pairs_with_terms', 'lower_category_names', 'lower_direct_names', 'upper_category_names', 'upper_direct_names', 'lower_names', 'upper_names', 'all_names', 'lc_sorted_pairs', 'ld_sorted_pairs', 'uc_sorted_pairs', 'ud_sorted_pairs', 'l_sorted_pairs', 'u_sorted_pairs', 'a_sorted_pairs']]
salad_flavor_data = salad_flavor_data.merge(selected_pairing_data, how='inner', on='name')

selected_clashes_with_data = clashes_with_data[['name', 'lower_clashes_with_names', 'upper_clashes_with_names', 'all_clashes_with_names', 'lower_clashes_with_pairs', 'upper_clashes_with_pairs', 'all_clashes_with_pairs']]
salad_flavor_data = salad_flavor_data.merge(selected_clashes_with_data, how='inner', on='name')

# just in case
salad_flavor_data = salad_flavor_data.replace(float('nan'), '')

In [25]:
salad_flavor_data.to_pickle(os.path.join(root_path, 'DATA/salad_flavor_data.pickle'))
salad_flavor_data.to_pickle(os.path.join(root_path, '../flavor_tool/data/salad_flavor_data.pickle'))

In [26]:
salad_flavor_data.columns.tolist()

['staple',
 'not_vegan',
 'gluten',
 'flavoring_sweet',
 'flavoring_fresh',
 'flavoring_wet',
 'flavoring_concentrate',
 'flavoring',
 'flavoring_dry',
 'protein',
 'protein_cheese_sub',
 'protein_milk_sub',
 'protein_meat_sub',
 'protein_bean',
 'veg',
 'veg_leafy',
 'grain',
 'grain_flour',
 'fat_oil',
 'oil',
 'fat',
 'fruit',
 'fruit_berry',
 'salad_green',
 'salad_extra',
 'salad_dressing',
 'salad_dressing_oil',
 'salad_dressing_salt',
 'salad_dressing_pepper',
 'salad_dressing_garlic',
 'salad_dressing_vinegar',
 'protein_nut_seed',
 'protein_nut',
 'protein_seed',
 'salad_extra_tomato',
 'salad_extra_olive',
 'salad_extra_cheese',
 'salad_extra_crouton',
 'salad_extra_egg',
 'salad_extra_other',
 'salad_starter',
 'salad_allium',
 'salty',
 'sour',
 'spicy',
 'bitter',
 'savory',
 'sweet',
 'salad_crunchy',
 'salad_chewy',
 'salad_juicy',
 'salad_basic',
 'redirect',
 'salad_umbrella',
 'name',
 'flavor',
 'volume',
 'pairs_with',
 'phonetic',
 'techniques',
 'dishes',
 'tip',


In [67]:
for name in salad_flavor_data[salad_flavor_data['veg'] == 'y']['name']:
    print(name)

ARUGULA (aka ROCKET)
FLOWERS, EDIBLE
GREENS, DANDELION
LETTUCE, BUTTER (aka BIBB or BOSTON LETTUCE)
LETTUCE, LAMB’S (aka CORN SALAD or MCHE)
LETTUCE, ROMAINE
SORREL
SPINACH
SPROUTS, ALFALFA
SPROUTS, BROCCOLI
SPROUTS, CHICKPEA
SPROUTS, CLOVER
SPROUTS, DAIKON (see also SPROUTS, RADISH)
SPROUTS, LENTIL
SPROUTS, PEA
AVOCADO
BEETS
BELL PEPPERS—IN GENERAL, or MIXED
BROCCOLI
BROCCOLI RABE (aka BROCCOLI RAAB or RAPINI)
BROCCOLINI
CABBAGE, CHINESE (aka NAPA CABBAGE; see also BOK CHOY)
CABBAGE, GREEN
CABBAGE, RED
CARROTS
CAULIFLOWER
CELERY
CHIVES
CHIVES, GARLIC (aka CHINESE CHIVES)
CILANTRO (aka CHINESE PARSLEY or FRESH CORIANDER LEAF)
CORN
CUCUMBERS
DAIKON
ENDIVE (aka BELGIAN ENDIVE)
FENNEL
FENNEL FRONDS (or LEAVES)
GARLIC
GARLIC SCAPES
GARLIC, BLACK
GARLIC, GREEN (aka BABY GARLIC or SPRING GARLIC)
KELP, KELP GRANULES, and KELP POWDER (see also ARAME, KOMBU, SEA VEGETABLES, and WAKAME)
LEEKS
MUSHROOMS, BUTTON (aka WHITE MUSHROOMS)
MUSHROOMS, PORTOBELLO
NORI (aka LAVER)
OLIVES, KALAMATA
OLIVES, 

In [28]:
salad_flavor_data.columns

Index(['staple', 'not_vegan', 'gluten', 'flavoring_sweet', 'flavoring_fresh',
       'flavoring_wet', 'flavoring_concentrate', 'flavoring', 'flavoring_dry',
       'protein', 'protein_cheese_sub', 'protein_milk_sub', 'protein_meat_sub',
       'protein_bean', 'veg', 'veg_leafy', 'grain', 'grain_flour', 'fat_oil',
       'oil', 'fat', 'fruit', 'fruit_berry', 'salad_green', 'salad_extra',
       'salad_dressing', 'salad_dressing_oil', 'salad_dressing_salt',
       'salad_dressing_pepper', 'salad_dressing_garlic',
       'salad_dressing_vinegar', 'protein_nut_seed', 'protein_nut',
       'protein_seed', 'salad_extra_tomato', 'salad_extra_olive',
       'salad_extra_cheese', 'salad_extra_crouton', 'salad_extra_egg',
       'salad_extra_other', 'salad_starter', 'salad_allium', 'salty', 'sour',
       'spicy', 'bitter', 'savory', 'sweet', 'salad_crunchy', 'salad_chewy',
       'salad_juicy', 'salad_basic', 'redirect', 'salad_umbrella', 'name',
       'flavor', 'volume', 'pairs_with', 'phonet

In [26]:
for colname in salad_flavor_data.columns:
    print(colname)

staple
not_vegan
gluten
flavoring_sweet
flavoring_fresh
flavoring_wet
flavoring_concentrate
flavoring
flavoring_dry
protein
protein_cheese_sub
protein_milk_sub
protein_meat_sub
protein_bean
veg
veg_leafy
grain
grain_flour
fat_oil
oil
fat
fruit
fruit_berry
salad_green
salad_extra
salad_dressing
salad_dressing_oil
salad_dressing_salt
salad_dressing_pepper
salad_dressing_garlic
salad_dressing_vinegar
protein_nut_seed
protein_nut
protein_seed
salad_extra_tomato
salad_extra_olive
salad_extra_cheese
salad_extra_crouton
salad_extra_egg
salad_extra_other
salad_starter
salad_allium
salty
sour
spicy
bitter
savory
sweet
salad_crunchy
salad_chewy
salad_juicy
salad_basic
redirect
salad_umbrella
name
flavor
volume
pairs_with
phonetic
techniques
dishes
tip
possible_substitutes
flavor_affinities
nutritional_profile
season
botanical_relatives
protein_content
what_they_are
brands
vegan_substitutes
vegan_brands


In [27]:
for i, row in salad_flavor_data.iterrows():
    for col_name in salad_flavor_data.columns.tolist():
        try:
            print(type(row[col_name]), col_name.upper(), row[col_name], row[col_name].index, row[col_name].values, len(row[col_name]))
        except:
            pass

In [32]:
for col_name in pairing_data.columns:
    print(col_name)

ALMONDS (and UNSWEETENED ALMOND BUTTER; see also MILK, ALMOND)
ALMONDS, MARCONA
APPLES (and APPLE CIDER, APPLE JUICE and/or APPLESAUCE)
APRICOTS (see also APRICOTS, DRIED)
ARUGULA (aka ROCKET)
AVOCADO
AÇAI
BANANAS
BEETS
BELL PEPPERS—IN GENERAL, or MIXED
BLACKBERRIES (see also BERRIES)
BLUEBERRIES
BOYSENBERRIES (see also BLACKBERRIES)
BREAD CRUMBS, PANKO
BREAD CRUMBS, WHOLE-GRAIN
BROCCOLI
BROCCOLI RABE (aka BROCCOLI RAAB or RAPINI)
BROCCOLINI
CABBAGE, CHINESE (aka NAPA CABBAGE; see also BOK CHOY)
CABBAGE, GREEN
CABBAGE, NAPA (see CABBAGE, CHINESE)
CABBAGE, RED
CARROTS
CASHEWS and CASHEW NUT BUTTER
CAULIFLOWER
CELERY
CHEESE, CHEDDAR
CHEESE, CHÈVRE (aka FRESH GOAT CHEESE; see CHEESE, GOAT)
CHEESE, COTIJA (see CHEESE, QUESO AÑEJO)
CHEESE, FETA
CHEESE, GOAT
CHEESE, PARMESAN
CHEESE, PECORINO
CHEESE, SMOKED MOZZARELLA
CHERRIES, SOUR and SWEET
CHESTNUTS
CHIA SEEDS
CHIVES
CHIVES, GARLIC (aka CHINESE CHIVES)
CILANTRO (aka CHINESE PARSLEY or FRESH CORIANDER LEAF)
COCONUT, COCONUT CREAM, and COCON

In [33]:
print(pd.__version__)

1.0.5


In [41]:
for name in pairing_data[pairing_data['name'] == 'SORREL']['upper_category_names']:
    print(name)

['CHIVES', 'ONIONS, PEARL', 'ONIONS, SPRING (see also SCALLIONS)', 'ONIONS, VIDALIA', 'SCALLIONS (aka GREEN ONIONS or SPRING ONIONS)', 'EGGS, HARD-BOILED', 'ONIONS, CIPOLLINI']
