In [1]:
# !pip install -r ../requirements.txt

In [42]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

Calculate the Point score

In [35]:
def get_points(bounds, value, fruit_flag=False):
    if fruit_flag:
        if value > 80:
            return 5
    for i in range(len(bounds) - 1):
        if bounds[i] < value <= bounds[i + 1]:
            return i
    return len(bounds) - 1 if value > bounds[-1] else 0


In [81]:
def CalculatePoints(product):
    # print(product)
    print(product['energy-kj_100g'])
    # define the bounds for each of the four categories for negative nutrients and positive nutrients

    # negative nutrients: energy, suger, sodium, saturated fatty acid
    energy_bounds = [0, 335, 670, 1005, 1340, 1675, 2010, 2345, 2680, 3015, 3350]
    suger_bounds = [0, 4.5, 9, 13.5, 18, 22.5, 27, 31.5, 36, 40.5, 45]
    sodium_bounds = [0, 90, 180, 270, 360, 450, 540, 630, 720, 810, 900]
    saturated_fatty_acid_bounds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
    # positive nutrients: fruit, fiber, protein
    fruit_bounds = [0, 40, 60, 80, 100]
    fiber_bounds = [0, 0.9, 1.9, 2.8, 3.7, 4.7]
    protens_bounds = [0, 1.6, 3.2, 4.8, 6.4, 8.0]

    # Find the points for each of the four categories for negative nutrients
    energy_points = get_points(energy_bounds, product['energy-kj_100g'])
    suger_points = get_points(suger_bounds, product['sugars_100g'])
    sodium_points = get_points(sodium_bounds, product['sodium_100g'])
    saturated_fatty_acid_points = get_points(saturated_fatty_acid_bounds, product['saturated-fat_100g'])

    # find the points for the positive nutrients
    fruit_points = get_points(fruit_bounds, product['fruits-vegetables-nuts-estimate-from-ingredients_100g'], fruit_flag=True)
    fiber_points = get_points(fiber_bounds, product['fiber_100g'])
    protens_points = get_points(protens_bounds, product['proteins_100g'])

    # append the points to the dataframe
    product['energy_points'] = energy_points
    product['suger_points'] = suger_points
    product['sodium_points'] = sodium_points
    product['saturated_fatty_acid_points'] = saturated_fatty_acid_points
    product['fruit_points'] = fruit_points
    product['fiber_points'] = fiber_points
    product['protens_points'] = protens_points
    


### Defining criteria

In [3]:
en = "energy_points"
su = "sugars_points"
fa = "saturated_fat_points"
sa = "sodium_points"
pr = "proteins_points"
fi = "fiber_points"
fr = "fruit_points"

criteria_to_minimize = {en, su, fa, sa}
criteria_to_maximize = {pr, fi, fr}

### Defining Additive Models

In [4]:
# Additive model 1
def new_scale_transform(df):
    """
    Defines a new transformed scale for each criterion:
    1. x'_i = 10 - x_i for the criteria to be minimized i ∈ {en, su, fa, sa}
    2. x'_i = 2 * x_i for the criteria to be maximized i ∈ {pr, fi, fr}
    """
    for column in df.columns:
        if column in criteria_to_minimize:
            df[column] = 10 - df[column]
        elif column in criteria_to_maximize:
            df[column] = 2 * df[column]
    return df

In [5]:
def normal_model(df):
    # 40 − F(x) = 40 - F(x'_en, x'_su, x'_fa, x'_sa, x'_pr, x'_fi, x'_fr) = 40 - x'_en + x'_su + x'_fa + x'_sa + 1/2(x'_pr + x'_fi + x'fr)
    return (40 - (df[en] + df[su] + df[fa] + df[sa] + (1/2 * (df[pr] + df[fi] + df[fr])))).astype(int)

In [6]:
# nutrients = [en, su, fa, sa, pr, fi, fr]

# # Additive Model 2 (Utility Function 1)
# def utility_model_1(df):
#     total_value = 0    
#     utility_ranges = [(0, 5), (5, 10), (10, 15), (15, 20)]
#     utility_values = [0.2, 0.4, 0.6, 0.8, 1]  # Corresponding utility values for each range
    
#     for _, row in df.iterrows():
#         for nutrient in nutrients:
#             for i, (low, high) in enumerate(utility_ranges):
#                 if low < row[nutrient] <= high:
#                     total_value += utility_values[i]
#                 else:
#                     total_value += 0.2  # Minimum utility for quantities beyond the defined ranges
#     return total_value

# # Additive Model 3 (Utility Function 2)
# def utility_model_2(df):  
#     total_value = 0    
#     utility_ranges = [(0, 5), (5, 10), (10, 15), (15, 20)]
#     utility_values = [0.2, 0.4, 0.6, 0.8, 1][::-1]  # Corresponding utility values for each range
    
#     for _, row in df.iterrows():
#         for nutrient in nutrients:
#             for i, (low, high) in enumerate(utility_ranges):
#                 if low < row[nutrient] <= high:
#                     total_value += utility_values[i]
#                 else:
#                     total_value += 0.2  # Minimum utility for quantities beyond the defined ranges
#     return total_value

### Defining Labels for Additive Models

In [7]:
def normal_model_labels(df):
    score = df['normal_score']
    return np.where(score < 0, 'A',
        np.where(score < 3, 'B',
        np.where(score < 11, 'C',
        np.where(score < 19, 'D',
        np.where(score <= 40, 'E', 'ERROR')))))

In [8]:
# def utility_model_1_labels(df):
#     score = df['utility_score_1']
#     return np.where(score < 0.2, 'A',
#         np.where(score < 0.4, 'B',
#         np.where(score < 0.6, 'C',
#         np.where(score < 0.8, 'D',
#         np.where(score <= 1, 'E', 'ERROR')))))

# def utility_model_2_labels(df):
#     score = df['utility_score_2']
#     return np.where(score < 0.2, 'A',
#         np.where(score < 0.4, 'B',
#         np.where(score < 0.6, 'C',
#         np.where(score < 0.8, 'D',
#         np.where(score <= 1, 'E', 'ERROR')))))

### Working on data

In [82]:
df = pd.read_csv('../data/preprocessed_data_v4.csv')
# loop over the df, and call the function to calcualte the points for each product and append it to the df
points = []
for index, row in df.iterrows():
    CalculatePoints(row)

0


AttributeError: 'float' object has no attribute 'values'

In [10]:
df = new_scale_transform(df)
df.head(2)

Unnamed: 0,id,image_url,brands,pnns_groups_2,negative_points,positive_points,energy_points,sugars_points,sodium_points,saturated_fat_points,proteins_points,fiber_points,fruits_vegetables_nuts_colza_walnut_olive_oils_points,nutrition_score_fr
0,4000418000000.0,https://images.openfoodfacts.org/images/produc...,Ritter Sport,Chocolate products,22.0,0.0,3.0,5.0,10.0,0.0,0.0,0.0,0.0,22.0
1,4260402000000.0,https://images.openfoodfacts.org/images/produc...,Veganz,Chocolate products,25.0,0.0,3.0,2.0,10.0,0.0,0.0,0.0,0.0,25.0


In [11]:
# calculate score using normal model
df['normal_score'] = normal_model(df)

# calculate score using marginal utility models
# df['utility_score_1'] = utility_model_1(df)
# df['utility_score_2'] = utility_model_2(df)

In [12]:
# assign label for normal model
df['normal_label'] = normal_model_labels(df)

# assign label for marginal utility models
# df['utility_label_1'] = utility_model_1_labels(df)
# df['utility_label_2'] = utility_model_2_labels(df)

### Checking and comparing different additive models

In [13]:
df['normal_score_matching_nutriscore'] = df.apply(lambda x: x["normal_score"] == int(x["nutrition_score_fr"]), axis = 1)
df['normal_score_matching_nutriscore'].value_counts()

True    152
Name: normal_score_matching_nutriscore, dtype: int64

In [14]:
# compare = pd.DataFrame(df['normal_label'].value_counts())
# compare['utility_label_1'] = df['utility_label_1'].value_counts()
# compare['utility_label_2'] = df['utility_label_2'].value_counts()
# compare['Labels'] = compare.index

# fig, ax = plt.subplots()
# compare.plot(x = 'Labels', y = ['normal_label', 'utility_label_1', 'utility_label_2'], kind="bar", ax=ax, title="Classification of products with three different additive models")
# ax.legend(["Normal Model", "Utility Model 1", "Utility Model 2"]);