In [1]:
import itertools
import re
import numpy as np
import pandas as pd

In [2]:
df = pd.read_parquet("Halved-DF.parquet.gzip")

In [3]:
def data_query(df, ingredients_combinations): ##Added a penalty of -5 for pairings that are not in the dataframe
    data = []
    for combination in ingredients_combinations:
        if len(combination) < 3:
            ingredient1, ingredient2 = combination
            query_str = f'(ingredient1 == "{ingredient1}" & ingredient2 == "{ingredient2}") | (ingredient1 == "{ingredient2}" & ingredient2 == "{ingredient1}")'
            score = df.query(query_str)['scaled_col'].values
            if len(score) > 0:
                data.append({'Combination': combination, 'Score': score})
            else:
                continue
        else:
            scores = []
            for i in combination:
                ingredient1, ingredient2 = i
                query_str = f'(ingredient1 == "{ingredient1}" & ingredient2 == "{ingredient2}") | (ingredient1 == "{ingredient2}" & ingredient2 == "{ingredient1}")'
                score = df.query(query_str)['scaled_col'].values
                if len(score) > 0:
                    scores.append(score[0])
                else:
                    scores.append(-5)
            data.append({'Combination': combination, 'Score': scores})
        
    df_comb = pd.DataFrame(data) 
    return df_comb

In [4]:
def combinations_of_two(ingredients_input): ###dealt with the issue of missing space crash

    '''The function generates all unique pairs of ingredients that can be made from the input list of ingredients.'''

    powerset = []
    powerpowerset = []
    ingredients = re.split(r',', ingredients_input.strip())
    ingredients_list = list(set(ingredient.strip() for ingredient in ingredients))
    for r in range(len(ingredients_list)+1):
        combinations = itertools.combinations(ingredients_list, r)
        for comb in combinations:
            if len(comb) > 1:
                if len(comb) < 3:
                    powerset.append(comb)
                else:
                    powerpowerset.append(comb)
                    for power in powerpowerset:
                        lowerset = []
                        combins = itertools.combinations(power, 2)
                        for arrange in combins:
                            lowerset.append(arrange)
                    powerset.append(lowerset)
    return powerset

In [23]:
def muse_comb(data_query_df): ###If this takes too long, consider taking the nested calculate_sum(array) outside of the function
    '''
     the function calculates the sum of the "Score" values and returns the three combinations with the largest sums
     OUTPUT: [['yeast', 'butter', 'eggs', 'pepper', 'cabbage', 'pork', 'flour', 'sugar'],
                 ['butter', 'eggs', 'pepper', 'cabbage', 'pork', 'flour', 'sugar'],
                 ['yeast', 'butter', 'eggs', 'pepper', 'cabbage', 'flour', 'sugar']]
                 
     NOTE FOR FRONT-END: The return is a list of lists so access the values by indexing e.g. output[0]
     
                         The output of this function is the input for the recipe generator
                         
                         We might need a function to convert each lists into strings if
                         the recipe generator doesn't do this automatically.
    '''
    
    def calculate_sum(array):
        return sum(array)
    
    def ingredients_to_lists(lists):
        ingredients_list = []
        for i in range(3):
            tmp_list = []
            for x in lists[i]:
                tmp_list.append(x[0])
                tmp_list.append(x[1])
            ingredients_list.append(list(set(tmp_list)))
    
        return ingredients_list

    for i in range(len(data_query_df)):
        data_query_df["Sum"] = data_query_df["Score"].apply(calculate_sum)

    max_values = data_query_df.nlargest(3, "Sum")
    
    max_values = max_values["Combination"].reset_index(drop=True)
    
    ingredients_lists = ingredients_to_lists(max_values) 
    
    return ingredients_lists

In [6]:
def get_ingredients_combinations(ingredients: str):
    ingredients = [x.strip() for x in ingredients.split(',')]
    candidates = []
    for i in range(2, len(ingredients) + 1):
        for c in itertools.combinations(ingredients, i):
            keep = True
            min_score = 1000
            for a, b in itertools.combinations(c, 2):
                min_score = min(scores.get((a, b), 0), min_score)
                if min_score == 0:
                    keep = False
                    break
            if keep:
                candidates.append((c, min_score))
        return candidates

In [7]:
combinations = combinations_of_two('sugar, butter, flour, yeast, cabbage, pork, eggs, pepper')

In [8]:
result = data_query(df, combinations)

In [24]:
lists = muse_comb(result)

In [37]:
lists

[['yeast', 'butter', 'eggs', 'pepper', 'cabbage', 'pork', 'flour', 'sugar'],
 ['butter', 'eggs', 'pepper', 'cabbage', 'pork', 'flour', 'sugar'],
 ['yeast', 'butter', 'eggs', 'pepper', 'cabbage', 'flour', 'sugar']]