In [37]:

#https://towardsdatascience.com/genetic-algorithms-in-python-using-the-deap-library-e67f7ce4024c
import pandas as pd
import numpy as np
import random
from deap import base
from deap import creator
from deap import tools



In [38]:
#goal percentages
total_calories = 2500 * 7
percentage_prot = 0.3
percentage_carb = 0.5
percentage_fat = 0.2

# compute total calories per macro
cal_prot = round(percentage_prot * total_calories)
cal_carb = round(percentage_carb * total_calories)
cal_fat = round(percentage_fat * total_calories)
print(cal_prot, cal_carb, cal_fat)

# fixed info on macro nutriments: calories per gram of protein, carb and fat
prot_cal_p_gram = 4
carb_cal_p_gram = 4
fat_cal_p_gram = 9

#goal grams
gram_prot = cal_prot / prot_cal_p_gram
gram_carb = cal_carb / carb_cal_p_gram
gram_fat = cal_fat / fat_cal_p_gram
print(gram_prot, gram_carb, gram_fat)


5250 8750 3500
1312.5 2187.5 388.8888888888889


In [39]:
# per week: min, max, cal unit, prot g,  fat g, carb g
# products_table = pd.DataFrame.from_records([
#     ['Banana 1u', 0, 4, 89, 1, 0, 23],
#     ['Mandarin 1u', 0, 4, 40, 1, 0, 10],
#     ['Ananas 100g', 0, 7, 50, 1, 0, 13],
#     ['Grapes 100g', 0, 7, 76, 1, 0, 17],
#     ['Chocolate 1 bar', 0, 4, 230, 3, 13, 25],
    
#     ['Hard Cheese 100g', 0, 8, 350, 28, 26, 2],
#     ['Soft Cheese 100g', 0, 8, 374, 18, 33, 1],
#     ['Pesto 100g', 0, 8, 303, 3, 30, 4],
#     ['Hoummous 100g', 0, 8, 306, 7, 25, 11],
#     ['Aubergine Paste 100g', 0, 4, 228, 1, 20, 8],
    
#     ['Protein Shake', 0, 5, 160, 30, 3, 5],
#     ['Veggie Burger 1', 0, 5, 220, 21, 12, 3],
#     ['Veggie Burger 2', 0, 12, 165, 16, 9, 2],
#     ['Boiled Egg', 0, 8, 155, 13, 11, 1],
#     ['Backed Egg', 0, 16, 196, 14, 15, 1],
    
#     ['Baguette Bread Half', 0, 3, 274, 10, 0, 52],
#     ['Square Bread 1 slice', 0, 3, 97, 3, 1, 17],
#     ['Cheese Pizza 1u', 0, 3, 903, 36, 47, 81],
#     ['Veggie Pizza 1u', 0, 3, 766, 26, 35, 85],
    
#     ['Soy Milk 200ml', 0, 1, 115, 8, 4, 11],
#     ['Soy Chocolate Milk 250ml', 0, 3, 160, 7, 6,20],
    
# ])
# products_table.columns = ['Name', 'Min', 'Max', 'Calories', 'Gram_Prot', 'Gram_Fat', 'Gram_Carb']

products_table = pd.read_csv('../data/large_grocery.csv',delimiter=";")
products_table



Unnamed: 0,Name,Min,Max,Calories,Gram_Prot,Gram_Fat,Gram_Carb
0,"BUTTER,WITH SALT 1 cup",0,7,717,85,8111,6
1,"BUTTER,WHIPPED,WITH SALT 1 cup",0,15,717,85,8111,6
2,"BUTTER OIL,ANHYDROUS 1 cup",0,19,876,28,9948,0
3,"CHEESE,BLUE 1 oz",0,2,353,214,2874,234
4,"CHEESE,BRICK 1 cup, diced",0,13,371,2324,2968,279
...,...,...,...,...,...,...,...
7408,"FROG LEGS,RAW",0,15,73,164,3,0
7409,"MACKEREL,SALTED 1 cup, cooked",0,1,305,185,251,0
7410,"SCALLOP,(BAY&SEA),CKD,STMD",0,12,112,232,14,0
7411,"SNAIL,RAW",0,7,90,161,14,20


In [40]:
# extract the information of products in a format that is easier to use in the deap algorithms cost function
cal_data = products_table[['Gram_Prot', 'Gram_Fat', 'Gram_Carb']]

prot_data = list(cal_data['Gram_Prot'])
fat_data = list(cal_data['Gram_Fat'])
carb_data = list(cal_data['Gram_Carb'])

In [41]:


# the random initialization of the genetic algorithm is done here
# it gives a list of integers with for each products the number of times it is bought
def n_per_product():
    return random.choices( range(0, 10), k = 7413)



In [42]:
# this is the function used by the algorithm for evaluation
# I chose it to be the absolute difference of the number of calories in the planning and the goal of calories
def evaluate(individual):
    individual = individual[0]
    tot_prot = sum(x*y for x,y in zip(prot_data,individual))
    tot_fat = sum(x*y for x,y in zip(fat_data,individual))
    tot_carb = sum(x*y for x,y in zip(carb_data,individual))
    cals = prot_cal_p_gram * tot_prot + carb_cal_p_gram * tot_carb + fat_cal_p_gram * tot_fat
    return abs(cals - total_calories),

In [43]:
# this is the setup of the deap library: registering the different function into the toolbox
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", list, fitness=creator.FitnessMin)

toolbox = base.Toolbox()

toolbox.register("n_per_product", n_per_product)

toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.n_per_product, n=1)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

toolbox.register("evaluate", evaluate)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)



In [44]:


# as an example, this is what a population of 10 shopping lists looks like
toolbox.population(n=10)



[[[5,
   1,
   9,
   8,
   8,
   6,
   3,
   6,
   3,
   6,
   1,
   0,
   2,
   6,
   5,
   4,
   6,
   8,
   3,
   4,
   2,
   8,
   1,
   2,
   0,
   1,
   5,
   3,
   3,
   4,
   7,
   5,
   2,
   3,
   3,
   3,
   7,
   8,
   4,
   2,
   8,
   1,
   9,
   6,
   8,
   7,
   7,
   8,
   7,
   2,
   5,
   4,
   8,
   1,
   9,
   6,
   0,
   8,
   6,
   0,
   6,
   9,
   9,
   9,
   8,
   7,
   4,
   9,
   7,
   4,
   1,
   7,
   5,
   9,
   7,
   2,
   1,
   3,
   6,
   2,
   4,
   2,
   7,
   8,
   8,
   3,
   3,
   2,
   7,
   3,
   9,
   9,
   2,
   3,
   5,
   0,
   5,
   7,
   3,
   7,
   5,
   6,
   4,
   7,
   7,
   2,
   8,
   6,
   3,
   6,
   8,
   4,
   0,
   3,
   7,
   3,
   9,
   8,
   1,
   9,
   5,
   3,
   1,
   4,
   8,
   6,
   5,
   6,
   0,
   0,
   5,
   3,
   9,
   2,
   8,
   0,
   0,
   7,
   7,
   1,
   9,
   5,
   1,
   4,
   7,
   2,
   3,
   5,
   2,
   4,
   7,
   6,
   0,
   2,
   6,
   1,
   4,
   8,
   8,
   2,
   9,
   3,
   3,
   1,
   3,
   2,
   6

In [45]:
# this is the definition of the total genetic algorithm is executed, it is almost literally copied from the deap library
def main():
    pop = toolbox.population(n=300)
    
    # Evaluate the entire population
    fitnesses = list(map(toolbox.evaluate, pop))
    for ind, fit in zip(pop, fitnesses):
        ind.fitness.values = fit

    # CXPB  is the probability with which two individuals
    #       are crossed
    #
    # MUTPB is the probability for mutating an individual
    CXPB, MUTPB = 0.5, 0.2
    
    # Extracting all the fitnesses of 
    fits = [ind.fitness.values[0] for ind in pop]
    
    # Variable keeping track of the number of generations
    g = 0
    
    # Begin the evolution
    while g < 5000:
        # A new generation
        g = g + 1
        #print("-- Generation %i --" % g)
        
        # Select the next generation individuals
        offspring = toolbox.select(pop, len(pop))
        # Clone the selected individuals
        offspring = list(map(toolbox.clone, offspring))
        
        # Apply crossover and mutation on the offspring
        for child1, child2 in zip(offspring[::2], offspring[1::2]):
            if random.random() < CXPB:
                toolbox.mate(child1[0], child2[0])
                del child1.fitness.values
                del child2.fitness.values

        for mutant in offspring:
            if random.random() < MUTPB:
                toolbox.mutate(mutant[0])
                del mutant.fitness.values
            
        # Evaluate the individuals with an invalid fitness
        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = map(toolbox.evaluate, invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit
            
        pop[:] = offspring
        
        # Gather all the fitnesses in one list and print the stats
        fits = [ind.fitness.values[0] for ind in pop]
        
        length = len(pop)
        mean = sum(fits) / length
        sum2 = sum(x*x for x in fits)
        std = abs(sum2 / length - mean**2)**0.5
        
        #print(min(fits), max(fits), mean, std)
    
    best = pop[np.argmin([toolbox.evaluate(x) for x in pop])]
    return best

In [46]:


best_solution = main()



In [47]:


products_table['univariate_choice'] = pd.Series(best_solution[0])
products_table.head()



Unnamed: 0,Name,Min,Max,Calories,Gram_Prot,Gram_Fat,Gram_Carb,univariate_choice
0,"BUTTER,WITH SALT 1 cup",0,7,717,85,8111,6,0
1,"BUTTER,WHIPPED,WITH SALT 1 cup",0,15,717,85,8111,6,0
2,"BUTTER OIL,ANHYDROUS 1 cup",0,19,876,28,9948,0,0
3,"CHEESE,BLUE 1 oz",0,2,353,214,2874,234,0
4,"CHEESE,BRICK 1 cup, diced",0,13,371,2324,2968,279,0


In [48]:


# in this second version, we optimize for the four components of the shopping list: calories, protein, fat and carbs
# if we need to make everything as important, we should add a weight to them
# we know that there are 30% protein calories, 20% fat and 50% carbs.
weights = (-1., -1. / 0.3, -1. / 0.2, -1./0.5)



In [49]:


creator.create("FitnessMin", base.Fitness, weights=weights)
creator.create("Individual", list, fitness=creator.FitnessMin)





In [50]:
def evaluate(individual):
    individual = individual[0]
    tot_prot = sum(x*y for x,y in zip(prot_data,individual))
    tot_fat = sum(x*y for x,y in zip(fat_data,individual))
    tot_carb = sum(x*y for x,y in zip(carb_data,individual))
    cals = prot_cal_p_gram * tot_prot + carb_cal_p_gram * tot_carb + fat_cal_p_gram * tot_fat
    
    
    return abs(cals - total_calories), abs(tot_prot - gram_prot), abs(tot_fat - gram_fat), abs(tot_carb - gram_carb)

In [51]:
# this is the setup of the deap library: registering the different function into the toolbox
toolbox = base.Toolbox()

toolbox.register("n_per_product", n_per_product)

toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.n_per_product, n=1)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

toolbox.register("evaluate", evaluate)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)

toolbox.register("evaluate", evaluate)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)

In [52]:
def main():
    pop = toolbox.population(n=300)
    
    # Evaluate the entire population
    fitnesses = list(map(toolbox.evaluate, pop))
    for ind, fit in zip(pop, fitnesses):
        ind.fitness.values = fit

    # CXPB  is the probability with which two individuals
    #       are crossed
    #
    # MUTPB is the probability for mutating an individual
    CXPB, MUTPB = 0.5, 0.2
    
    # Extracting all the fitnesses of 
    fits = [ind.fitness.values[0] for ind in pop]
    
    # Variable keeping track of the number of generations
    g = 0
    
    # Begin the evolution
    while g < 5000:
        # A new generation
        g = g + 1
        #print("-- Generation %i --" % g)
        
        # Select the next generation individuals
        offspring = toolbox.select(pop, len(pop))
        # Clone the selected individuals
        offspring = list(map(toolbox.clone, offspring))
        
        # Apply crossover and mutation on the offspring
        for child1, child2 in zip(offspring[::2], offspring[1::2]):
            if random.random() < CXPB:
                toolbox.mate(child1[0], child2[0])
                del child1.fitness.values
                del child2.fitness.values

        for mutant in offspring:
            if random.random() < MUTPB:
                toolbox.mutate(mutant[0])
                del mutant.fitness.values
            
                
        # Evaluate the individuals with an invalid fitness
        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = map(toolbox.evaluate, invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit
            
        pop[:] = offspring
        
        
        # Gather all the fitnesses in one list and print the stats
        fits = [ind.fitness.values[0] for ind in pop]
        
        length = len(pop)
        mean = sum(fits) / length
        sum2 = sum(x*x for x in fits)
        std = abs(sum2 / length - mean**2)**0.5
        
        #print(min(fits), max(fits), mean, std)
    
    
    best = pop[np.argmin([sum(toolbox.evaluate(x)) for x in pop])]
    return best

In [53]:


best_solution = main()



In [54]:
products_table['multivariate_choice'] = pd.Series(best_solution[0])

In [55]:


products_table['univariate_gr_prot'] = products_table['univariate_choice'] * products_table['Gram_Prot']
products_table['univariate_gr_fat'] = products_table['univariate_choice'] * products_table['Gram_Fat']
products_table['univariate_gr_carb'] = products_table['univariate_choice'] * products_table['Gram_Carb']
products_table['univariate_cal'] = products_table['univariate_choice'] * products_table['Calories']

products_table['multivariate_gr_prot'] = products_table['multivariate_choice'] * products_table['Gram_Prot']
products_table['multivariate_gr_fat'] = products_table['multivariate_choice'] * products_table['Gram_Fat']
products_table['multivariate_gr_carb'] = products_table['multivariate_choice'] * products_table['Gram_Carb']
products_table['multivariate_cal'] = products_table['multivariate_choice'] * products_table['Calories']



In [56]:
summary = pd.DataFrame.from_records(
[
    [products_table['univariate_gr_prot'].sum(), products_table['multivariate_gr_prot'].sum(), gram_prot],
    [products_table['univariate_gr_fat'].sum(), products_table['multivariate_gr_fat'].sum(), gram_fat],
    [products_table['univariate_gr_carb'].sum(), products_table['multivariate_gr_carb'].sum(), gram_carb],
    [products_table['univariate_cal'].sum(), products_table['multivariate_cal'].sum(), sum((cal_prot, cal_carb, cal_fat))]
])
summary.columns = ['univariate', 'multivariate', 'goal']
summary.index = ['prot', 'fat', 'carb', 'cal']
summary["univ_error"] = (summary["goal"] - summary["univariate"]).apply(abs)
summary["multiv_error"] = (summary["goal"] - summary["multivariate"]).apply(abs)
summary

Unnamed: 0,univariate,multivariate,goal,univ_error,multiv_error
prot,1747773,1792877,1312.5,1746460.0,1791564.0
fat,987800,981548,388.888889,987411.1,981159.1
carb,2533900,2494202,2187.5,2531712.0,2492014.0
cal,528734,525259,17500.0,511234.0,507759.0


In [57]:
summary["univ_error"].sum(), summary["multiv_error"].sum()

(5776818.111111111, 5772497.111111111)

In [61]:
# Shopping list
pd.set_option('display.max_rows',7500)
products_table[['Name', 'multivariate_choice', 'univariate_choice']]


Unnamed: 0,Name,multivariate_choice,univariate_choice
0,"BUTTER,WITH SALT 1 cup",0,0
1,"BUTTER,WHIPPED,WITH SALT 1 cup",0,0
2,"BUTTER OIL,ANHYDROUS 1 cup",0,0
3,"CHEESE,BLUE 1 oz",0,0
4,"CHEESE,BRICK 1 cup, diced",1,0
5,"CHEESE,BRIE 1 cup, melted",0,1
6,"CHEESE,CAMEMBERT 1 cup",0,0
7,"CHEESE,CARAWAY 1 oz",0,1
8,"CHEESE,CHEDDAR 1 cup, diced",0,0
9,"CHEESE,CHESHIRE 1 oz",1,1
