# Diet Problem
This is another method of using software to compute optimal solutions of a linear programming problem. We will be using the ```scipy``` package, specifically ```scipy.optimize.linprog```.

In [1]:
# Necessary packages
import pandas as pd
import numpy as np
from scipy.optimize import linprog
np.set_printoptions(suppress=True)

## We will start by loading and cleaning the data.
We need to load our data using ```pandas```.

In [2]:
df = pd.read_csv('../dataset/cleaned_diet_data.csv')
df

Unnamed: 0,foods,price/_serving,serving_size,calories,cholesterol_mg,total_fat_g,sodium_mg,carbohydrates_g,dietary_fiber_g,protein_g,vit_a_iu,vit_c_iu,calcium_mg,iron_mg,price_per_serving
0,Frozen Broccoli,$0.16,10 Oz Pkg,73.8,0.0,0.8,68.2,13.6,8.5,8.0,5867.4,160.2,159.0,2.3,0.16
1,"Carrots,Raw",$0.07,1/2 Cup Shredded,23.7,0.0,0.1,19.2,5.6,1.6,0.6,15471.0,5.1,14.9,0.3,0.07
2,"Celery, Raw",$0.04,1 Stalk,6.4,0.0,0.1,34.8,1.5,0.7,0.3,53.6,2.8,16.0,0.2,0.04
3,Frozen Corn,$0.18,1/2 Cup,72.2,0.0,0.6,2.5,17.1,2.0,2.5,106.6,5.2,3.3,0.3,0.18
4,"Lettuce,Iceberg,Raw",$0.02,1 Leaf,2.6,0.0,0.0,1.8,0.4,0.3,0.2,66.0,0.8,3.8,0.1,0.02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61,"New E Clamchwd,W/Mlk",$0.99,1 C (8 Fl Oz),163.7,22.3,6.6,992.0,16.6,1.5,9.5,163.7,3.5,186.0,1.5,0.99
62,"Crm Mshrm Soup,W/Mlk",$0.65,1 C (8 Fl Oz),203.4,19.8,13.6,1076.3,15.0,0.5,6.1,153.8,2.2,178.6,0.6,0.65
63,"Beanbacn Soup,W/Watr",$0.67,1 C (8 Fl Oz),172.0,2.5,5.9,951.3,22.8,8.6,7.9,888.0,1.5,81.0,2.0,0.67
64,,,Minimum daily intake,1500.0,30.0,20.0,800.0,130.0,125.0,60.0,1000.0,400.0,700.0,10.0,


It looks there are some empty columns at the bottom that got picked up by the dataframe.

In [3]:
print(df.isnull().sum())

foods                2
price/_serving       2
serving_size         0
calories             0
cholesterol_mg       0
total_fat_g          0
sodium_mg            0
carbohydrates_g      0
dietary_fiber_g      0
protein_g            0
vit_a_iu             0
vit_c_iu             0
calcium_mg           0
iron_mg              0
price_per_serving    2
dtype: int64


There are some problematic entries in the dataset.

In [4]:
df_cleaned = df.dropna()
df_cleaned

Unnamed: 0,foods,price/_serving,serving_size,calories,cholesterol_mg,total_fat_g,sodium_mg,carbohydrates_g,dietary_fiber_g,protein_g,vit_a_iu,vit_c_iu,calcium_mg,iron_mg,price_per_serving
0,Frozen Broccoli,$0.16,10 Oz Pkg,73.8,0.0,0.8,68.2,13.6,8.5,8.0,5867.4,160.2,159.0,2.3,0.16
1,"Carrots,Raw",$0.07,1/2 Cup Shredded,23.7,0.0,0.1,19.2,5.6,1.6,0.6,15471.0,5.1,14.9,0.3,0.07
2,"Celery, Raw",$0.04,1 Stalk,6.4,0.0,0.1,34.8,1.5,0.7,0.3,53.6,2.8,16.0,0.2,0.04
3,Frozen Corn,$0.18,1/2 Cup,72.2,0.0,0.6,2.5,17.1,2.0,2.5,106.6,5.2,3.3,0.3,0.18
4,"Lettuce,Iceberg,Raw",$0.02,1 Leaf,2.6,0.0,0.0,1.8,0.4,0.3,0.2,66.0,0.8,3.8,0.1,0.02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,Neweng Clamchwd,$0.75,1 C (8 Fl Oz),175.7,10.0,5.0,1864.9,21.8,1.5,10.9,20.1,4.8,82.8,2.8,0.75
60,Tomato Soup,$0.39,1 C (8 Fl Oz),170.7,0.0,3.8,1744.4,33.2,1.0,4.1,1393.0,133.0,27.6,3.5,0.39
61,"New E Clamchwd,W/Mlk",$0.99,1 C (8 Fl Oz),163.7,22.3,6.6,992.0,16.6,1.5,9.5,163.7,3.5,186.0,1.5,0.99
62,"Crm Mshrm Soup,W/Mlk",$0.65,1 C (8 Fl Oz),203.4,19.8,13.6,1076.3,15.0,0.5,6.1,153.8,2.2,178.6,0.6,0.65


In [5]:
df_cleaned.describe()

Unnamed: 0,calories,cholesterol_mg,total_fat_g,sodium_mg,carbohydrates_g,dietary_fiber_g,protein_g,vit_a_iu,vit_c_iu,calcium_mg,iron_mg,price_per_serving
count,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0
mean,134.384375,18.071875,5.185937,332.126562,16.4375,1.503125,6.071875,777.132812,11.498437,50.296875,1.5875,0.327188
std,120.790475,42.114117,9.68453,491.718977,18.257971,2.216817,6.706771,2173.534758,28.957448,75.257796,2.477998,0.254536
min,2.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9,0.0,0.02
25%,73.4,0.0,0.5,16.175,4.625,0.0,1.975,11.75,0.0,6.775,0.3,0.145
50%,109.4,0.0,2.65,137.25,15.05,0.7,4.05,95.45,1.55,20.4,0.7,0.27
75%,159.5,17.625,6.7,335.1,22.15,1.85,8.025,499.9,7.525,51.65,2.025,0.46
max,710.8,211.5,72.2,1915.1,128.2,11.6,42.2,15471.0,160.2,302.3,16.8,0.99


In [6]:
print(df_cleaned.isnull().sum())

foods                0
price/_serving       0
serving_size         0
calories             0
cholesterol_mg       0
total_fat_g          0
sodium_mg            0
carbohydrates_g      0
dietary_fiber_g      0
protein_g            0
vit_a_iu             0
vit_c_iu             0
calcium_mg           0
iron_mg              0
price_per_serving    0
dtype: int64


It is data. Very nice.

## Linear Programming 
Below is the table for our dietary goals.
| Diets | Damian | Tyler | Jacob | 
| --- | --- | --- | --- |
| Calories | 2174 | 2350 | 2125 |
| Carbs | 190 | 176 | 186 |
| Protein | 272 | 294 | 266 |
| Fats | 36 | 52 | 35 |

We can do something where we make a linear program for each person, with the given constraints.

First, let us make a simple method to read results nicely

In [7]:
def print_results(x_sol, obj_val, person_name):
    food_i = np.nonzero(x_sol > 0)[0] # All indices of a scipy x solution
    
    print(f"The optimal daily diet for {person_name} is:\n")
    for index in food_i:
        print(f"\t {x_sol[index]: .2f} servings ({df_cleaned.iloc[index, 2]}) of {df_cleaned.iloc[index, 0]}")
    
    print(f"\nwhich results in an optimal cost of ${obj_val: .2f}.")


In [8]:
c = df_cleaned["price_per_serving"].to_list()

A_columns = ["calories", "carbohydrates_g", "protein_g", "total_fat_g"]
A = df_cleaned[A_columns].values.transpose() * -1

damian_b = [2174, 190, 272, 36]  # [calories, carbs, protein, fats]
b = np.array(damian_b) * -1

res = linprog(c, A_ub=A, b_ub=b)

print_results(res.x, res.fun, "Damian")

The optimal daily diet for Damian is:

	  35.32 servings (2 Tbsp) of Peanut Butter

which results in an optimal cost of $ 2.47.


#### This solution is ridiculous of course, so we must add more constraints in this second trial. 
We are going to add more constraints for cholesterol, iron, and calcium.
We are also going to limit the number of peanut butter that we can have.

In [9]:
A_columns_2 = A_columns + ["iron_mg", "calcium_mg", "cholesterol_mg"]
# Now we have 9 constraints [calories, carbs, protein, 
#                            fats, iron_mg, calcium_mg, cholesterol_mg]
A_2 = df_cleaned[A_columns_2].values
A_2[:,:6] *= -1 # Multiplying all rows besides cholesterol row to flip constraints: >= ---> <= 
                # Cholesterol is a <= constraint by default
A_2 = A_2.transpose() # To get it back into an appropriate form

damian_b_2 = damian_b + [8, 1000, 300]# + [iron_mg, calcium_mg, cholesterol_mg]
b_2 = np.array(damian_b_2)
b_2[:6] *= -1 # Must multiply with the columns to flip inequality sign.

res_2 = linprog(c, A_ub=A_2, b_ub=b_2)
print_results(res_2.x, res_2.fun, "Damian")

The optimal daily diet for Damian is:

	  1.87 servings (1 C) of Skim Milk
	  33.29 servings (2 Tbsp) of Peanut Butter

which results in an optimal cost of $ 2.57.


## Time to make the complete LP
We are going to add more calorie constraints so that it falls within 300 calories of our daily intake.

In [10]:
# This can used twice for the two 
calorie_A = np.array(df_cleaned["calories"])

In [11]:
A_3 = np.vstack((A_2, (calorie_A*-1).tolist()))
A_3 = np.vstack((A_3, calorie_A.tolist()))
A_3 = np.vstack((A_3, np.identity(64)))
A_3

array([[ -73.8,  -23.7,   -6.4, ..., -163.7, -203.4, -172. ],
       [ -13.6,   -5.6,   -1.5, ...,  -16.6,  -15. ,  -22.8],
       [  -8. ,   -0.6,   -0.3, ...,   -9.5,   -6.1,   -7.9],
       ...,
       [   0. ,    0. ,    0. , ...,    1. ,    0. ,    0. ],
       [   0. ,    0. ,    0. , ...,    0. ,    1. ,    0. ],
       [   0. ,    0. ,    0. , ...,    0. ,    0. ,    1. ]])

In [12]:
damian_b_3 = np.array([-2174,  -190,  -272,   -36,    -8, -1000,   300, -1874,  2474] + [5 for i in range(64)])
tyler_b_3 = np.array([-2350,  -176,  -294,   -52,    -8, -1000,   300, -2150,  2750] + [5 for i in range(64)])
jacob_b_3 = np.array([-2125,  -186,  -266,   -35,    -8, -1000,   300, -1825,  2425] + [5 for i in range(64)])

In [13]:
res_dj = linprog(c, A_ub=A_3, b_ub=damian_b_3)
print_results(res_dj.x, res_dj.fun, "Damian")

The optimal daily diet for Damian is:

	  5.00 servings (10 Oz Pkg) of Frozen Broccoli
	  1.68 servings (1/4 block) of Tofu
	  0.77 servings (1 lb chicken) of Roasted Chicken
	  5.00 servings (1 C) of Skim Milk
	  2.91 servings (2 Tbsp) of Peanut Butter
	  5.00 servings (3 Oz) of White Tuna in Water
	  1.75 servings (1 Oz) of Popcorn,Air-Popped

which results in an optimal cost of $ 6.34.


In [14]:
res_tm = linprog(c, A_ub=A_3, b_ub=tyler_b_3)
print_results(res_tm.x, res_tm.fun, "Tyler")

The optimal daily diet for Tyler is:

	  5.00 servings (10 Oz Pkg) of Frozen Broccoli
	  3.31 servings (1/4 block) of Tofu
	  0.77 servings (1 lb chicken) of Roasted Chicken
	  5.00 servings (1 C) of Skim Milk
	  4.32 servings (2 Tbsp) of Peanut Butter
	  5.00 servings (3 Oz) of White Tuna in Water
	  0.51 servings (1 Oz) of Popcorn,Air-Popped

which results in an optimal cost of $ 6.89.


In [15]:
res_jm = linprog(c, A_ub=A_3, b_ub=jacob_b_3)
print_results(res_jm.x, res_jm.fun, "Jacob")

The optimal daily diet for Jacob is:

	  5.00 servings (10 Oz Pkg) of Frozen Broccoli
	  0.96 servings (1/4 block) of Tofu
	  0.77 servings (1 lb chicken) of Roasted Chicken
	  5.00 servings (1 C) of Skim Milk
	  3.08 servings (2 Tbsp) of Peanut Butter
	  5.00 servings (3 Oz) of White Tuna in Water
	  1.59 servings (1 Oz) of Popcorn,Air-Popped

which results in an optimal cost of $ 6.12.
