### Obtaining three known constraints on protein, calories, and carbohydrates, plus the set of data points satisfying them all.

In [1]:
import pandas as pd
import itertools
import json 

In [2]:
data = pd.read_csv(r'Data/test_F1959.csv')

'''
To determine the nutrition values for one serving of each food group, 
we relied on available data from Government of Canada. 
Nutrient value of some common foods, 2008. 
URL https: //www.canada.ca/en/health-canada/services/food-nutrition/healthy-eating/nutrient-data/nutrient-value-some-common-foods-2008.html.
Therefore, the dollowing tables are gathered from the above link.
'''
Fruits = pd.read_csv(r'Data/Fruits.csv')
Vegetables = pd.read_csv(r'Data/Vegetables.csv')
Dairy = pd.read_csv(r'Data/Dairy.csv')
g1 = pd.read_csv(r'Data/grain1.csv')
g2 = pd.read_csv(r'Data/grain2.csv')
p1 = pd.read_csv(r'Data/pro1.csv')
p2 = pd.read_csv(r'Data/pro2.csv')
p3 = pd.read_csv(r'Data/pro3.csv')
p4 = pd.read_csv(r'Data/pro4.csv')


Grains = pd.concat([g1, g2])
Protein = pd.concat([p1,p2,p3,p4])

Protein = Protein.rename(columns={'Total Daietary Fibre (g)': 'Total Dietary Fibre (g)'})
Grains.fillna(0, inplace=True)
Protein.fillna(0, inplace=True)
Fruits.fillna(0, inplace=True)
Vegetables.fillna(0, inplace=True)
Dairy.fillna(0, inplace=True)

Grains.name = 'Grains'
Protein.name = 'Protein'
Fruits.name = 'Fruits'
Vegetables.name = 'Vegetables'
Dairy.name = 'Dairy'

In [6]:
''' 
Note: the serving size of each food category, which you see in unit dictionary variable, is derived from the paper that provided the main dataset.
Paper titled "An open-source dataset on dietary behaviors and dash eating plan optimization constraints".
'''
class Food:
    def __init__(self, data, dfs, content, upper=0, lower=0): # 'dfs' is the list of dataframes and 'content' is a string refering to the nutritional requirements (e.g. "Energy (kcal)" ) 
        self.unit = {"Fruits":150,"Vegetables":30,"Grains":36,"Protein":39,"Dairy":240} #f,v,g,p,d
        self.dfs = dfs
        self.content = content
        self.output = {}
        self.num_points = data.shape[0]
        self.num_vars = data.shape[1]
        self.upper = upper
        self.lower = lower
        self.data = data
    

    def func(self):
        for d in self.dfs:
            if self.content not in d.columns:
                self.output[d.name] = 0
                continue
            one_gram = d[self.content]/d["Weight (g)"]
            Mean_one_gram = np.average(one_gram)
            self.output[d.name] = Mean_one_gram * self.unit[(d.name)]
        # if d.name == 'Dairy' and self.content == 'Sodium (mg)':
        #     self.output[d.name] = 250
        return self.output

    def feasibility(self,dic_coef):
        values = dic_coef.values()
        coef = np.array([float(value) for value in values])
        array_data = self.data.iloc[:, 1:6].values
        bminusaxs = []
        axminusbs = []
        for row in range(len(array_data)):
            if self.upper != 0:
                bminusaxs.append(self.upper - np.dot(coef,array_data[row]))
            if self.lower != 0:
                axminusbs.append(np.dot(coef,array_data[row]) - self.lower)

        if len(bminusaxs) != 0:
            y_or_n = ["y" if i >= 0 else "n" for i in bminusaxs]
            data[content+"Upper"] = y_or_n
        if len(axminusbs) != 0:
            y_or_n = ["y" if i >= 0 else "n" for i in axminusbs]
            data[content+"Lower"] = y_or_n

        return data
        

In [None]:
'''
Note: the upper and lower bound of the nutritional requirements (Contents), is derived from avialable sources on the internet.
'''

Contents = ['Energy (kcal)','Protein (g)', 'Carbohydrate (g)', 'Total Fat (g)', 'Saturated Fat (g)',
        'Cholesterol (mg)',  'Sodium (mg)','Total Sugar (g)', 'Total Dietary Fibre (g)' ]
        
upperval = [2400, 175, 325, 81.66, 23.33, 200, 200, 30, 35]
lowerval = [1600, 50, 225, 0, 0, 0, 0, 0, 20]

Upperbound = {k: v for k, v in zip(Contents, upperval)}
Lowerbound = {k: v for k, v in zip(Contents, lowerval)}
print(Upperbound)
print(Lowerbound)




dfs = [Fruits,Vegetables,Grains,Protein,Dairy]
for key in list(Upperbound.keys()):
    content = key
    obj = Food(data, dfs, content, upper= Upperbound[content], lower=Lowerbound[content]) 
    dic_coef = obj.func()
    print(dic_coef)
    data_updated = obj.feasibility(dic_coef)

'''
This gives us the nutritional requirements (items in Contents list) of one serving for each food category.
A dictionary is generated for each nutritional requirement (such as Energy, Protein, Carbohydrate), containing the coefficients specific to different food categories.
'''

In [13]:
'''
We generate all possible combinations of nutritional requirements to determine the number of data points meeting diverse nutritional constraints. 
This exploration is needed for identifying a set of data points adhering to specific upper or lower bound constraints on nutritional requirements.
'''

y_n_cols = list(data_updated.columns)[6:]
combinations = []
for num_cols in range(2, 14): 
    combinations += itertools.combinations(y_n_cols, num_cols)



res = []
for col_combs in combinations: 
    
    df = data_updated.copy()
    
    for col in col_combs: 
        df = df.loc[df[col] == 'y']    
    
    res.append(
        dict(
            cols=list(col_combs),
            matching_rows=len(df)
        )
    )  
res.sort(key=lambda x: x['matching_rows'], reverse=True)


with open('output_1959.json', 'w') as fp: 
    json.dump(res, fp)      

In [12]:
data_test1 = data_updated.drop(data_updated.columns[[7,8,11,12,13,14,15,16,17,18]], axis=1)
data_test1 = data_test1[(data_test1['Energy (kcal)Upper'] == 'y')  & (data_test1['Protein (g)Lower'] == 'y') & (data_test1['Carbohydrate (g)Upper'] == 'y')]

### Only 30 data points from the above dataset are chosen as the input for the inverse optimization model.