# Diet Problem
This is another method of using software to compute optimal solutions of a linear programming problem. We will be using the ```scipy``` package, specifically ```scipy.optimize.linprog```.

In [1]:
# Necessary packages
import pandas as pd
import numpy as np
from scipy.optimize import linprog

## We will start by loading and cleaning the data.
We need to load our data using ```pandas```.

In [2]:
df = pd.read_csv('../dataset/cleaned_diet_data.csv')
df

Unnamed: 0,foods,price/_serving,serving_size,calories,cholesterol_mg,total_fat_g,sodium_mg,carbohydrates_g,dietary_fiber_g,protein_g,vit_a_iu,vit_c_iu,calcium_mg,iron_mg,price_per_serving
0,Frozen Broccoli,$0.16,10 Oz Pkg,73.8,0.0,0.8,68.2,13.6,8.5,8.0,5867.4,160.2,159.0,2.3,0.16
1,"Carrots,Raw",$0.07,1/2 Cup Shredded,23.7,0.0,0.1,19.2,5.6,1.6,0.6,15471.0,5.1,14.9,0.3,0.07
2,"Celery, Raw",$0.04,1 Stalk,6.4,0.0,0.1,34.8,1.5,0.7,0.3,53.6,2.8,16.0,0.2,0.04
3,Frozen Corn,$0.18,1/2 Cup,72.2,0.0,0.6,2.5,17.1,2.0,2.5,106.6,5.2,3.3,0.3,0.18
4,"Lettuce,Iceberg,Raw",$0.02,1 Leaf,2.6,0.0,0.0,1.8,0.4,0.3,0.2,66.0,0.8,3.8,0.1,0.02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61,"New E Clamchwd,W/Mlk",$0.99,1 C (8 Fl Oz),163.7,22.3,6.6,992.0,16.6,1.5,9.5,163.7,3.5,186.0,1.5,0.99
62,"Crm Mshrm Soup,W/Mlk",$0.65,1 C (8 Fl Oz),203.4,19.8,13.6,1076.3,15.0,0.5,6.1,153.8,2.2,178.6,0.6,0.65
63,"Beanbacn Soup,W/Watr",$0.67,1 C (8 Fl Oz),172.0,2.5,5.9,951.3,22.8,8.6,7.9,888.0,1.5,81.0,2.0,0.67
64,,,Minimum daily intake,1500.0,30.0,20.0,800.0,130.0,125.0,60.0,1000.0,400.0,700.0,10.0,


It looks there are some empty columns at the bottom that got picked up by the dataframe.

In [3]:
print(df.isnull().sum())

foods                2
price/_serving       2
serving_size         0
calories             0
cholesterol_mg       0
total_fat_g          0
sodium_mg            0
carbohydrates_g      0
dietary_fiber_g      0
protein_g            0
vit_a_iu             0
vit_c_iu             0
calcium_mg           0
iron_mg              0
price_per_serving    2
dtype: int64


There are some problematic entries in the dataset.

In [4]:
df_cleaned = df.dropna()
df_cleaned

Unnamed: 0,foods,price/_serving,serving_size,calories,cholesterol_mg,total_fat_g,sodium_mg,carbohydrates_g,dietary_fiber_g,protein_g,vit_a_iu,vit_c_iu,calcium_mg,iron_mg,price_per_serving
0,Frozen Broccoli,$0.16,10 Oz Pkg,73.8,0.0,0.8,68.2,13.6,8.5,8.0,5867.4,160.2,159.0,2.3,0.16
1,"Carrots,Raw",$0.07,1/2 Cup Shredded,23.7,0.0,0.1,19.2,5.6,1.6,0.6,15471.0,5.1,14.9,0.3,0.07
2,"Celery, Raw",$0.04,1 Stalk,6.4,0.0,0.1,34.8,1.5,0.7,0.3,53.6,2.8,16.0,0.2,0.04
3,Frozen Corn,$0.18,1/2 Cup,72.2,0.0,0.6,2.5,17.1,2.0,2.5,106.6,5.2,3.3,0.3,0.18
4,"Lettuce,Iceberg,Raw",$0.02,1 Leaf,2.6,0.0,0.0,1.8,0.4,0.3,0.2,66.0,0.8,3.8,0.1,0.02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,Neweng Clamchwd,$0.75,1 C (8 Fl Oz),175.7,10.0,5.0,1864.9,21.8,1.5,10.9,20.1,4.8,82.8,2.8,0.75
60,Tomato Soup,$0.39,1 C (8 Fl Oz),170.7,0.0,3.8,1744.4,33.2,1.0,4.1,1393.0,133.0,27.6,3.5,0.39
61,"New E Clamchwd,W/Mlk",$0.99,1 C (8 Fl Oz),163.7,22.3,6.6,992.0,16.6,1.5,9.5,163.7,3.5,186.0,1.5,0.99
62,"Crm Mshrm Soup,W/Mlk",$0.65,1 C (8 Fl Oz),203.4,19.8,13.6,1076.3,15.0,0.5,6.1,153.8,2.2,178.6,0.6,0.65


In [5]:
print(df_cleaned.isnull().sum())

foods                0
price/_serving       0
serving_size         0
calories             0
cholesterol_mg       0
total_fat_g          0
sodium_mg            0
carbohydrates_g      0
dietary_fiber_g      0
protein_g            0
vit_a_iu             0
vit_c_iu             0
calcium_mg           0
iron_mg              0
price_per_serving    0
dtype: int64


It is data. Very nice.

## Linear Programming 
Below is the table for our dietary goals.
| Diets | Damian | Tyler | Jacob | 
| --- | --- | --- | --- |
| Calories | 2174 | 2350 | 2125 |
| Carbs | 190 | 176 | 186 |
| Protein | 272 | 294 | 266 |
| Fats | 36 | 52 | 35 |

We can do something where we make a linear program for each person, with the given constraints.

We have to convert our monetary data from strings into numeric values. 

In [6]:
c = df_cleaned["price_per_serving"].to_list()

A_columns = ["calories", "carbohydrates_g", "protein_g", "total_fat_g"]
A = df_cleaned[A_columns].values.transpose() * -1

damian_b = [2174, 190, 272, 36]  # [calories, carbs, protein, fats]

b = np.array(damian_b) * -1


res = linprog(c, A_ub=A, b_ub=b)
print(f"The daily feasible optimal cost is ${res.fun: .2f}")
print(f"The daily optimal food consumption is {res.x[48]: .2f} servings of peanut butter.")
df_cleaned.iloc[48, :]


The daily feasible optimal cost is $ 2.47
The daily optimal food consumption is  35.32 servings of peanut butter.


foods                Peanut Butter
price/_serving              $0.07 
serving_size                2 Tbsp
calories                     188.5
cholesterol_mg                 0.0
total_fat_g                   16.0
sodium_mg                    155.5
carbohydrates_g                6.9
dietary_fiber_g                2.1
protein_g                      7.7
vit_a_iu                       0.0
vit_c_iu                       0.0
calcium_mg                    13.1
iron_mg                        0.6
price_per_serving             0.07
Name: 48, dtype: object

#### This solution is ridiculous of course, so we must add more constraints in this second trial. 
We are going to add more constraints for cholesterol, iron, and calcium.
We are also going to limit the number of peanut butter that we can have.

In [30]:
A_columns_2 = A_columns + ["iron_mg", "calcium_mg", "cholesterol_mg"]
# Now we have 9 constraints [calories, carbs, protein, 
#                            fats, iron_mg, calcium_mg, cholesterol_mg]
A_2 = df_cleaned[A_columns_2].values
A_2[:,:6] *= -1 # Multiplying all rows besides cholesterol row to flip constraints: >= ---> <= 
                # Cholesterol is a <= constraint by default

A_2 = A_2.transpose() # To get it back into an appropriate form
damian_b_2 = damian_b + [8, 1000, 300]# + [iron_mg, calcium_mg, cholesterol_mg]
b_2 = np.array(damian_b_2)
b_2[:6] *= -1

print(b_2)
res_2 = linprog(c, A_ub=A_2, b_ub=b_2)
sol_2 = res_2.x
val_2 = res_2.fun
res_2

[-2174  -190  -272   -36    -8 -1000   300]


        message: Optimization terminated successfully. (HiGHS Status 7: Optimal)
        success: True
         status: 0
            fun: 2.572779538885407
              x: [ 0.000e+00  0.000e+00 ...  0.000e+00  0.000e+00]
            nit: 2
          lower:  residual: [ 0.000e+00  0.000e+00 ...  0.000e+00
                              0.000e+00]
                 marginals: [ 6.020e-02  6.196e-02 ...  5.632e-01
                              5.856e-01]
          upper:  residual: [       inf        inf ...        inf
                                    inf]
                 marginals: [ 0.000e+00  0.000e+00 ...  0.000e+00
                              0.000e+00]
          eqlin:  residual: []
                 marginals: []
        ineqlin:  residual: [ 4.261e+03  6.190e+01  0.000e+00  4.974e+02
                              1.216e+01  0.000e+00  2.918e+02]
                 marginals: [-0.000e+00 -0.000e+00 -8.774e-03 -0.000e+00
                             -0.000e+00 -1.862e-04 -0.000e

### Let's try to make this a little easier before solving for each group member. 

In [58]:
def print_results(x_sol, obj_val, person_name):
    food_i = np.nonzero(sol_2 > 0)[0] # All indices of a scipy x solution
    
    print(f"The optimal daily diet for {person_name} is:\n")
    for index in food_i:
        print(f"\t {x_sol[index]: .2f} servings of {df_cleaned.iloc[index, 0]}")
    
    print(f"\nwhich results in an optimal cost of ${obj_val: .2f}")


In [59]:
print_results(sol_2, val_2, "Damian")

The optimal daily diet for Damian is:

	  1.87 servings of Skim Milk
	  33.29 servings of Peanut Butter

which results in an optimal cost of $ 2.57
