# TFM - Cristian Leguisamon - Universidad Castilla-La Mancha 
## Master en ciencia de datos e ingeniería de datos en la nube
### TFM - Parte 02

Carga de módulos, librerias y configuraciones

In [36]:
!pip install pulp
!pip install openai
import json
import pandas as pd
import openai
from collections import defaultdict
from pulp import LpProblem, LpMaximize, LpVariable, lpSum, LpStatus
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.exceptions import ConvergenceWarning
import numpy as np
import copy
import warnings

from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor

OPEN_AI_KEY='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'
OPEN_AI_ORG='org-w13PnAMq4ebjDxHSqcRxrB9c'
GPT_ENGINE='text-davinci-003'
openai.organization = OPEN_AI_ORG
openai.api_key = OPEN_AI_KEY
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", category=UserWarning, message="X does not have valid feature names")
complete_data_folder = "Full_Dataset"



In [49]:
df_user_data = pd.read_csv(f"{complete_data_folder}/df_user_data.csv")
df_nutrition= pd.read_csv(f"{complete_data_folder}/df_nutrition.csv")
df_nutrition.fillna(0, inplace=True)
df_nutrition = df_nutrition.drop_duplicates(subset=['name'])
with open('nutrition_structure.json', 'r') as json_file:
    nutrition_structure = json.load(json_file)
with open('nutrition_equations.json', 'r') as json_file:
    nutrition_equations = json.load(json_file)

Definimos la Funcion nutrition_settings. Sirve para calcular la cantidad de nutrientes necesarios por un individuo, para cada comida (desayuno, almuerzo, merienda y cena), en funcion del tipo de comida, su edad, actividad, peso y calorias consumidas previamente.

In [22]:
def nutrition_settings(filtered_rows,nutrition_structure):
    nutrition_structure_copy = copy.deepcopy(nutrition_structure)
    meals = ['breakfast','lunch','snack','dinner']
    weight = filtered_rows['WeightKg'].iloc[0]
    calories = filtered_rows['Calories'].iloc[0]
    avg_activity = filtered_rows['avg_activity'].iloc[0]
    if int(filtered_rows['age'].iloc[0]) > 59:
        age_factor = 0.8
    else:
        age_factor = 1
        
    for meal in meals:
        for nutrient, equation in nutrition_equations.get(meal).items():
            calculated_value = eval(equation, {'calories': calories, 'weight': weight, 'age_factor': age_factor, 'avg_activity': avg_activity})
            nutrition_structure_copy[meal][nutrient]['amount'] = round(calculated_value,2)
    return nutrition_structure_copy

Creamos un dataset que permite conocer, para cada individuo, la cantidad de nutrientes que debe ingerir en cada comida, considerando sus caracteristicas personales

In [23]:
unique_ids = df_user_data['Id'].unique()
nutrition_results = {}
for id_value in unique_ids:
    filtered_rows = df_user_data.loc[df_user_data['Id'] == id_value]
    result = nutrition_settings(filtered_rows, nutrition_structure)
    nutrition_results[id_value] = result

In [24]:
rows = []
for user_id, meals in nutrition_results.items():
    for meal, nutrients in meals.items():
        row = {'Id': user_id, 'Meal': meal}
        for nutrient, values in nutrients.items():
            row[nutrient +'_meal'] = values['amount']
        rows.append(row)

df_estimations = pd.DataFrame(rows)

Para tener un backup de los datos, los guardamos en el siguiente fichero

In [None]:
df_estimations.to_csv(f"{complete_data_folder}/df_estimations.csv", index=False)

A continuación, creamos un df alternativo con las variables necesarias sobre el usuario

In [50]:
user_info_selected = df_user_data.loc[:, ['Id', 'age','avg_activity','WeightKg','BMI','Height','Calories']]
df_combined = df_estimations.merge(user_info_selected, on='Id')
columns_to_convert = ['Calories_meal', 'Proteins_meal', 'Carbohydrates_meal', 'Fats_meal', 'Fiber_meal',
                      'Sugar_meal', 'Calcium_meal', 'Vitamin C_meal', 'age', 'avg_activity', 'WeightKg',
                      'BMI', 'Height', 'Calories']
df_combined[columns_to_convert] = df_combined[columns_to_convert].apply(pd.to_numeric, errors='coerce')
df_combined.fillna(0, inplace=True)

Antes de continuar, consultamos los datos para un usuario especifico y calculamos los valores nutricionales necesarios (que actuarán como valores límites)

In [26]:
id_search = 1503960366
filtered_rows = df_user_data.loc[df_user_data['Id'] == id_search]
nutrition_structure = nutrition_settings(filtered_rows,nutrition_structure)

In [27]:
id_list = []
moment_list = []
calories_list = []
proteins_list = []
carbs_list = []
fats_list = []
fiber_list = []
sugar_list = []

for moment, values in nutrition_structure.items():
    id_list.append(id_search)  # Assuming the same Id for all rows
    moment_list.append(moment)
    calories_list.append(values['Calories']['amount'])
    proteins_list.append(values['Proteins']['amount'])
    carbs_list.append(values['Carbohydrates']['amount'])
    fats_list.append(values['Fats']['amount'])
    fiber_list.append(values['Fiber']['amount'])
    sugar_list.append(values['Sugar']['amount'])

# Create a DataFrame
df_user_sample = pd.DataFrame({
    'Id': id_list,
    'Moment': moment_list,
    'Calories': calories_list,
    'Proteins': proteins_list,
    'Carbohydrates': carbs_list,
    'Fats': fats_list,
    'Fiber': fiber_list,
    'Sugar': sugar_list
})

print(df_user_sample)

           Id     Moment  Calories  Proteins  Carbohydrates  Fats Fiber  Sugar
0  1503960366  breakfast    496.25      5.75          11.51  2.56    15     20
1  1503960366      lunch    694.75      6.90           9.20  3.07           15
2  1503960366      snack    297.75      5.75          11.51  2.56    10     10
3  1503960366     dinner    496.25      5.75          11.51  2.56    15     10


Para un tipo de comida (breakfast, dinner, etc), obtenemos el valor nutricional que estimado

In [28]:
def get_constraints(nutrition_structure,meal):
    return nutrition_structure.get(meal)

In [29]:
def get_list_from_openai(food_list, meal):
    PROMPT_NEW_TEXT = f"Use this list to create another one, only with meals that can be used, in general, for {meal}. Do not add any aditional meal. Your answer must be a list comma separated and nothing else. Do not add any text before or after your answer. The base list is {food_list}"
    response_body = openai.Completion.create(
    engine=GPT_ENGINE, prompt=PROMPT_NEW_TEXT, temperature=0.5, max_tokens=1500
    )
    food_list = response_body["choices"][0]["text"]
    food_list = food_list.strip().rstrip('.').split(', ')
    return food_list

Del listado de alimentos seleccionamos aquellos que, en términos generales, suelen ser utilizados en cada comida (breakfast, lunch, etc.) Para esto, utilizamos la librería de OpenAI

In [30]:
def first_recommendations(df_nutrition, meal):
    food_list = get_list_from_openai(df_nutrition['name'].to_list(),meal)
    df_nutrition = df_nutrition[df_nutrition['name'].isin(food_list)]
    return df_nutrition

Para hacer una estimación de la mejor combinación posible, utilizamos programación lineal, estableciendo límites (calorías y un parámetro adicional). Este parámetro queda a criterio del usuario. Si para el usuario es muy importante controlar los niveles de azúcar, el límite está establecido con esa variable. Hay otros límites establecidos, como por ejemplo una cantidad máxima de alimentos repetidos y una cantidad máxima de alimentos por categoría (vegetales, carne, etc.). La sumatoria de los alimentos elegidos, no puede superar el límite de las calorías indicadas.

In [31]:
def get_food_recommendations(meals_dataset,constraints, control_var):
    recomendations = []
    max_units_per_food = 10
    max_units_per_category = 10
    name_values = meals_dataset['name'].unique()
    selected_counts = defaultdict(int)
    meals_dataset = meals_dataset.sample(frac=1)
    problem = LpProblem("BreakfastIdeas",LpMaximize);
    meals = meals_dataset['name'].to_list()
    calories = dict(zip(meals,meals_dataset['Calories']))
    meals_vars = LpVariable.dicts("Meals", meals, lowBound=0, upBound=max_units_per_food, cat='Integer')
    
    for f in meals:
        category = meals_dataset.loc[meals_dataset['name'] == f, 'Category'].values[0]
        selected_counts[category] += meals_vars[f]
        restriction_name = f"LímiteMax_{category}_{f.replace(' ', '_')}"
        problem += selected_counts[category] <= max_units_per_category, restriction_name
        
    total_calories = lpSum(meals_vars[f] * meals_dataset.loc[meals_dataset['name'] == f, 'Calories'].values[0] for f in meals)
    lower_calories_limit = 0.5 * constraints['Calories']
    upper_calories_limit = 1.5 * constraints['Calories']
    problem += lower_calories_limit <= total_calories <= upper_calories_limit, "Limit_Calories"
       
    if control_var:        
        total_control_var = lpSum(meals_vars[f] * meals_dataset.loc[meals_dataset['name'] == f, control_var].values[0] for f in meals)
        lower_control_var_limit = 0.5 * constraints[control_var]  # Límite inferior (60% de 450)
        upper_control_var_limit = 1.5 * constraints[control_var]  # Límite superior (40% de 450)
        problem += lower_control_var_limit <= total_control_var <= upper_control_var_limit, f"Limit_{control_var}"
        
    
    problem += lpSum([calories[f] * meals_vars[f] for f in meals]) >= constraints['Calories'] - constraints['Calories'] * 0.25, "MínimoCalorías"
    problem += lpSum([calories[f] * meals_vars[f] for f in meals]) <= constraints['Calories'] + constraints['Calories'] * 0.25, "MáximoCalorías"
    problem.solve()
    
    for v in problem.variables():
        if not '__dummy' in v.name:
            if v.varValue>0:
                recomendations.append(v.name)
    if recomendations:
        recomendations = [elemento.replace('Meals_', '') for elemento in recomendations]
        recomendations = [i.replace('_', ' ') for i in recomendations]
    return recomendations

### Recomendaciones para una sola persona preexistente

In [32]:
meal = 'breakfast' #breakfast,dinner,snack,lunch
nutrient_control = 'Calcium'
constraints = get_constraints(nutrition_structure,meal)
new_dict = {}
for nutrient, values in constraints.items():
    new_dict[nutrient] = values['amount']
constraints = new_dict
print(f"Nutrition limits for {meal}:")
for nutrient, limit in constraints.items():
    print(f"{nutrient}: {limit}")
print("------------------")
df_nutrition_copy = df_nutrition
meals_dataset_filtered = first_recommendations(df_nutrition_copy, meal)
recomendations = get_food_recommendations(meals_dataset_filtered,constraints,nutrient_control)
print("------------------")
print(f"\nMeals recommended for {meal} time: {', '.join(recomendations)}\n")

Nutrition limits for breakfast:
Calories: 496.25
Proteins: 5.75
Carbohydrates: 11.51
Fats: 2.56
Fiber: 15
Sugar: 20
Calcium: 400
Vitamin C: 70.0
------------------
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/cronopio/Escritorio/TFM - FitBit/TFM_env/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/3caa0bd27e864e9f8f8643e3f3d95807-pulp.mps max timeMode elapsed branch printingOptions all solution /tmp/3caa0bd27e864e9f8f8643e3f3d95807-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 11 COLUMNS
At line 28 RHS
At line 35 BOUNDS
At line 39 ENDATA
Problem MODEL has 6 rows, 3 columns and 11 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.00 seconds
Cgl0004I processed model has 1 rows, 2 columns (2 integer (0 of which binary)) and 2 elements
Cbc0012I Integer solution of 0 found by DiveCoefficient after 0 iterations 

### Recomendaciones para una persona nueva

Hacemos un modelo para predecir, cual sería la mejor combinacion para una nueva persona, utilizando como base los datos de otros usuarios

### Variables iniciales

In [33]:
nutrient_control = 'Fiber'
age = 60
weight = 50
calories = 600
meal = 'dinner'

final_result = {}

### Modelo KNeighborsRegressor

In [34]:
def get_constraints_from_dataset_knr(meal, parameters_input):   
    features = ['age', 'WeightKg', 'Calories']
    nutrients = ['Proteins_meal', 'Carbohydrates_meal', 'Fats_meal', 'Fiber_meal', 'Sugar_meal', 'Calcium_meal', 'Calories_meal', 'Vitamin C_meal']
    
    df_specific_meal = df_combined[df_combined['Meal'] == meal]

    X = df_specific_meal[features]
    y = df_specific_meal[nutrients]
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    models = {}
    for nutrient in nutrients:
        model = KNeighborsRegressor(n_neighbors=5)
        model.fit(X_train_scaled, y_train[nutrient])
        models[nutrient] = model

    predicted_nutrients = {}
    for nutrient in nutrients:
        predicted_value = models[nutrient].predict(scaler.transform(parameters_input))
        predicted_nutrients[nutrient] = max(0, predicted_value[0])

    y_pred = pd.DataFrame(index=X_test.index, columns=nutrients)
    for nutrient in nutrients:
        y_pred[nutrient] = models[nutrient].predict(X_test_scaled)
    
    mse = mean_squared_error(y_test, y_pred)
    
    return predicted_nutrients, mse

In [37]:
constraints_kn, mse_kn = get_constraints_from_dataset_knr(meal,[[age, weight, calories]])
constraints_kn = {key.replace('_meal', ''): value for key, value in constraints_kn.items()}
constraints_kn = {nutrient: round(value, 2) for nutrient, value in constraints_kn.items()}
print(f"Nutrition limits for {meal}:")
for nutrient, limit in constraints_kn.items():
    print(f"{nutrient}: {limit}")
print("------------------")
meals_dataset_filtered = first_recommendations(df_nutrition_copy, meal)
recomendations_kn = get_food_recommendations(meals_dataset_filtered,constraints_kn,nutrient_control)
print("------------------")
print(f"Meals recommended: {', '.join(recomendations_kn)}\n")

final_result['KNeighborsRegressor'] = {}
final_result['KNeighborsRegressor']['MSE'] = round(mse_kn,2)
final_result['KNeighborsRegressor']['Meals'] = ', '.join(recomendations_kn)

Nutrition limits for dinner:
Proteins: 6.15
Carbohydrates: 12.3
Fats: 2.73
Fiber: 15.0
Sugar: 10.0
Calcium: 350.0
Calories: 495.55
Vitamin C: 50.0
------------------
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/cronopio/Escritorio/TFM - FitBit/TFM_env/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/61655c735e494c8c84de88d720e96f50-pulp.mps max timeMode elapsed branch printingOptions all solution /tmp/61655c735e494c8c84de88d720e96f50-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 50 COLUMNS
At line 504 RHS
At line 550 BOUNDS
At line 593 ENDATA
Problem MODEL has 45 rows, 42 columns and 370 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.00 seconds
Cgl0003I 0 fixed, 1 tightened bounds, 18 strengthened rows, 0 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 18 strengthened rows, 0 substitutions
Cgl0003I 

### Modelo Redes Neuronales

In [38]:
def get_constraints_from_dataset_neural(meal, parameters_input):
    features = ['age', 'WeightKg', 'Calories']
    nutrients = ['Proteins_meal', 'Carbohydrates_meal', 'Fats_meal', 'Fiber_meal', 'Sugar_meal', 'Calcium_meal', 'Calories_meal', 'Vitamin C_meal']
    df_specific_meal = df_combined[df_combined['Meal'] == meal]
    X = df_specific_meal[features].values
    y = df_specific_meal[nutrients].values
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    model = Sequential()
    model.add(Dense(128, activation='relu', input_dim=len(features)))
    model.add(Dropout(0.2))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(len(nutrients), activation='relu'))
    adam_optimizer = Adam(learning_rate=0.001)
    model.compile(loss='mean_squared_error', optimizer=adam_optimizer)
    model.fit(X_train_scaled, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=0)

    new_parameters_input_scaled = scaler.transform(parameters_input)
    predicted_nutrients = model.predict(new_parameters_input_scaled)[0]

    y_pred_test = model.predict(X_test_scaled)
    mse = mean_squared_error(y_test, y_pred_test)
    
    predicted_nutrients = [max(0, value) for value in predicted_nutrients]

    predicted_nutrients_dict = {nutrient.replace('_meal', ''): round(value, 2) for nutrient, value in zip(nutrients, predicted_nutrients)}
    return predicted_nutrients_dict, mse

In [39]:
constraints_neural, mse_neural = get_constraints_from_dataset_neural(meal,[[age, weight, calories]])
print(f"Nutrition limits for {meal}:")
for nutrient, limit in constraints_neural.items():
    print(f"{nutrient}: {limit}")
print("------------------")
meals_dataset_filtered = first_recommendations(df_nutrition_copy, meal)
recomendations_neural = get_food_recommendations(meals_dataset_filtered,constraints_neural,nutrient_control)
print("------------------")
print(f"Meals recommended: {', '.join(recomendations_neural)}\n")

final_result['Neuronal'] = {}
final_result['Neuronal']['MSE'] = round(mse_neural)
final_result['Neuronal']['Meals'] = ', '.join(recomendations_neural)


2023-08-17 21:54:59.735013: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:268] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


Nutrition limits for dinner:
Proteins: 7.289999961853027
Carbohydrates: 5.03000020980835
Fats: 5.71999979019165
Fiber: 7.159999847412109
Sugar: 1.1299999952316284
Calcium: 2.6500000953674316
Calories: 14.65999984741211
Vitamin C: 3.9000000953674316
------------------
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/cronopio/Escritorio/TFM - FitBit/TFM_env/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/c62f0aafc83c4648b453d68bb8bb9f80-pulp.mps max timeMode elapsed branch printingOptions all solution /tmp/c62f0aafc83c4648b453d68bb8bb9f80-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 50 COLUMNS
At line 504 RHS
At line 550 BOUNDS
At line 593 ENDATA
Problem MODEL has 45 rows, 42 columns and 370 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.00 seconds
Cgl0000I Cut generators found to be infeasible! (or unbound

### Modelo Random Forest

In [40]:
def get_constraints_from_dataset_random_forest(meal, parameters_input):
    features = ['age', 'WeightKg', 'Calories']
    nutrients = ['Proteins_meal', 'Carbohydrates_meal', 'Fats_meal', 'Fiber_meal', 'Sugar_meal', 'Calcium_meal', 'Calories_meal', 'Vitamin C_meal']
    df_specific_meal = df_combined[df_combined['Meal'] == meal]
    X = df_specific_meal[features].values
    y = df_specific_meal[nutrients].values
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
    rf_model.fit(X_train, y_train)
    y_pred_test = rf_model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred_test)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    new_parameters_input_scaled = scaler.transform(parameters_input)
    
    predicted_nutrients = rf_model.predict(new_parameters_input_scaled)[0]
    predicted_nutrients = [max(0, value) for value in predicted_nutrients]
    predicted_nutrients_dict = {nutrient.replace('_meal', ''): round(value, 2) for nutrient, value in zip(nutrients, predicted_nutrients)}
    return predicted_nutrients_dict, mse

In [41]:
constraints_random_forest, mse_random_forest = get_constraints_from_dataset_random_forest(meal,[[age, weight, calories]])
print(f"Nutrition limits for {meal}:")
for nutrient, limit in constraints_random_forest.items():
    print(f"{nutrient}: {limit}")
print("------------------")
meals_dataset_filtered = first_recommendations(df_nutrition_copy, meal)
recomendations_random_forest = get_food_recommendations(meals_dataset_filtered,constraints_random_forest,nutrient_control)
print(f"Meals recommended: {', '.join(recomendations_random_forest)}\n")

final_result['RandonForest'] = {}
final_result['RandonForest']['MSE'] = round(mse_random_forest)
final_result['RandonForest']['Meals'] = ', '.join(recomendations_random_forest)

Nutrition limits for dinner:
Proteins: 5.63
Carbohydrates: 11.26
Fats: 2.5
Fiber: 15.0
Sugar: 10.0
Calcium: 350.0
Calories: 361.48
Vitamin C: 44.32
------------------
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/cronopio/Escritorio/TFM - FitBit/TFM_env/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/4622b0ea2dc4432793ab5477f13a8caf-pulp.mps max timeMode elapsed branch printingOptions all solution /tmp/4622b0ea2dc4432793ab5477f13a8caf-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 50 COLUMNS
At line 504 RHS
At line 550 BOUNDS
At line 593 ENDATA
Problem MODEL has 45 rows, 42 columns and 370 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.00 seconds
Cgl0003I 0 fixed, 0 tightened bounds, 16 strengthened rows, 0 substitutions
Cgl0003I 0 fixed, 0 tightened bounds, 16 strengthened rows, 0 substitutions
Cgl0003I

### Modelo XGBRegressor

In [47]:
def get_constraints_from_dataset_xgbregressor(meal, parameters_input):
    features = ['age', 'WeightKg', 'Calories']
    print("ENTRAMOS")
    nutrients = ['Proteins_meal', 'Carbohydrates_meal', 'Fats_meal', 'Fiber_meal', 'Sugar_meal', 'Calcium_meal', 'Calories_meal', 'Vitamin C_meal']
    df_specific_meal = df_combined[df_combined['Meal'] == meal]
    X = df_specific_meal[features].values
    y = df_specific_meal[nutrients].values
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    xgb_model = XGBRegressor(n_estimators=200, max_depth=10, random_state=42)
    xgb_model.fit(X_train, y_train)
    y_pred = xgb_model.predict(X_test)
    mse = round(mean_squared_error(y_test, y_pred),2)
    
    best_predictions = {}
    for nutrient in nutrients:
        nutrient_index = nutrients.index(nutrient)
        nutrient_predictions = y_pred[:, nutrient_index]
        best_predictions[nutrient] = round(max(nutrient_predictions),2)
    best_predictions = {key.replace('_meal', ''): value for key, value in best_predictions.items()}
    return best_predictions, mse

In [43]:
constraints_xgbregressor, mse_xgbregressor = get_constraints_from_dataset_xgbregressor(meal,[[age, weight, calories]])
print(f"Nutrition limits for {meal}:")
for nutrient, limit in constraints_xgbregressor.items():
    print(f"{nutrient}: {limit}")
print("------------------")
meals_dataset_filtered = first_recommendations(df_nutrition_copy, meal)
recomendations_xgbregressor = get_food_recommendations(meals_dataset_filtered,constraints_xgbregressor,nutrient_control)
print(f"Meals recommended: {', '.join(recomendations_xgbregressor)}\n")

final_result['XGBRegressor'] = {}
final_result['XGBRegressor']['MSE'] = round(mse_xgbregressor,2)
final_result['XGBRegressor']['Meals'] = ', '.join(recomendations_xgbregressor)

Nutrition limits for dinner:
Proteins: 7.329999923706055
Carbohydrates: 15.789999961853027
Fats: 3.299999952316284
Fiber: 15.0
Sugar: 10.0
Calcium: 350.0
Calories: 730.7999877929688
Vitamin C: 65.75
------------------
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/cronopio/Escritorio/TFM - FitBit/TFM_env/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/9ccb082e7de846d58f6d8af9331469be-pulp.mps max timeMode elapsed branch printingOptions all solution /tmp/9ccb082e7de846d58f6d8af9331469be-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 50 COLUMNS
At line 504 RHS
At line 550 BOUNDS
At line 593 ENDATA
Problem MODEL has 45 rows, 42 columns and 370 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.00 seconds
Cgl0003I 0 fixed, 1 tightened bounds, 12 strengthened rows, 0 substitutions
Cgl0003I 0 fixed, 0 tightened bou

## Resumen de los modelos

In [44]:
print(f"Summary for a person with this parameters: Age: {age}, Weight {weight} and Calories: {calories}\n")
final_result= pd.DataFrame(final_result).transpose()
print(final_result)

Summary for a person with this parameters: Age: 60, Weight 50 and Calories: 600

                        MSE                    Meals
KNeighborsRegressor  299.18                 Pancakes
Neuronal              56956           Pork Loin Chop
RandonForest             29                  Octopus
XGBRegressor          87.52  Duck Breast, Milk, Rice


## Aplicando los modelos a las comidas diarias de una persona

In [45]:
def calculate_liquid(age, weight):
    if age <= 30:
        return 40 * weight
    elif age > 30 and age <= 55:
        return 35 * weight
    elif age > 55 and age <= 75:
        return 30 * weight
    else:
        return 25 * weight

In [51]:
meals = ['breakfast', 'dinner','lunch','snack']
nutrient_control = 'Fiber'
age = 37
weight = 75
calories = 1960
nutrient_control = 'Sugar'
result = {}
to_eat_today = []
liquid = round(calculate_liquid(age,weight),2)

for meal in meals:  
    constraints_xgbregressor, mse_xgbregressor = get_constraints_from_dataset_xgbregressor(meal,[[age, weight, calories]])   
    constraints_random_forest, mse_random_forest = get_constraints_from_dataset_random_forest(meal,[[age, weight, calories]])
    constraints_kn,mse_kn = get_constraints_from_dataset_knr(meal,[[age, weight, calories]])
    constraints_kn = {key.replace('_meal', ''): value for key, value in constraints_kn.items()}
    constraints_kn = {nutrient: round(value, 2) for nutrient, value in constraints_kn.items()}
    constraints_neural, mse_neural = get_constraints_from_dataset_neural(meal,[[age, weight, calories]])
    
    meals_dataset_filtered = first_recommendations(df_nutrition_copy, meal)
    
    recomendations_xgbregressor = get_food_recommendations(meals_dataset_filtered,constraints_xgbregressor,nutrient_control)
    recomendations_random_forest = get_food_recommendations(meals_dataset_filtered,constraints_random_forest,nutrient_control)
    recomendations_kn = get_food_recommendations(meals_dataset_filtered,constraints_kn,nutrient_control)
    recomendations_neural = get_food_recommendations(meals_dataset_filtered,constraints_neural,nutrient_control)
    
    to_eat_today.append(recomendations_xgbregressor)
    to_eat_today.append(recomendations_random_forest)
    to_eat_today.append(recomendations_kn)
    to_eat_today.append(recomendations_neural)
    
    result[meal] = {
        'KNeighborsRegressor': {
            'constraints_kn': constraints_kn,
            'recommendations_kn': recomendations_kn,
            'mse': mse_kn
        },
        'Neural': {
            'constraints_neural': constraints_neural,
            'recommendations_neural': recomendations_neural,
            'mse': mse_neural
        },
        'XGBRegressor': {
            'constraints_xgbr': constraints_xgbregressor,
            'recommendations_xgbr': recomendations_xgbregressor,
            'mse': mse_xgbregressor
        },
        'RandomForest': {
            'constraints_random_forest': constraints_random_forest,
            'recommendations_random_forest': recomendations_random_forest,
            'mse': mse_random_forest
        }
    }

ENTRAMOS
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/cronopio/Escritorio/TFM - FitBit/TFM_env/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/2a6e75e007db48be91045a5713c6db5b-pulp.mps max timeMode elapsed branch printingOptions all solution /tmp/2a6e75e007db48be91045a5713c6db5b-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 10 COLUMNS
At line 19 RHS
At line 25 BOUNDS
At line 28 ENDATA
Problem MODEL has 5 rows, 2 columns and 5 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Problem is infeasible - 0.00 seconds
Option for printingOptions changed from normal to all
Total time (CPU seconds):       0.00   (Wallclock seconds):       0.00

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/cronopio/Escritorio/TFM - FitBit/TFM_env/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/9646f9

In [53]:
def show_info(result, indent=0):
    for key, value in result.items():
        if isinstance(value, dict):
            print(' ' * indent + f"{key}:")
            show_info(value, indent + 4)
        else:
            print(' ' * indent + f"{key}: {value}")
show_info(result)

breakfast:
    KNeighborsRegressor:
        constraints_kn:
            Proteins: 5.14
            Carbohydrates: 10.28
            Fats: 2.29
            Fiber: 15.0
            Sugar: 20.0
            Calcium: 400.0
            Calories: 452.4
            Vitamin C: 46.96
        recommendations_kn: ['Smoothie']
        mse: 299.18168528571414
    Neural:
        constraints_neural:
            Proteins: 0.3100000023841858
            Carbohydrates: 1.2999999523162842
            Fats: 1.9900000095367432
            Fiber: 2.380000114440918
            Sugar: 3.190000057220459
            Calcium: 8.210000038146973
            Calories: 6.159999847412109
            Vitamin C: 1.8600000143051147
        recommendations_neural: ['Smoothie']
        mse: 61343.368454171694
    XGBRegressor:
        constraints_xgbr:
            Proteins: 7.329999923706055
            Carbohydrates: 15.789999961853027
            Fats: 3.299999952316284
            Fiber: 15.0
            Sugar: 20.0
  

In [54]:
print(f"Summary for a person with: Age: {age}, Weight {weight} and Calories: {calories}\n")
print(f"Items to get today:\n")
for item in set(item for sublist in to_eat_today for item in sublist):print(item)
print(f"\nLiquid (ml): {liquid}")

Summary for a person with: Age: 37, Weight 75 and Calories: 1960

Items to get today:

Nuts
Cod
Mussels
Grapefruit Juice
Smoothie
Clams
Cheese
Salmon
Milk
Gooseberries
Hummus

Liquid (ml): 2625


Esta lista de elementos, puede enviarse por ejemplo:
- A la API de algún supermercado
- A la API de alguna aplicacion de recetas (Cookpad)
- A la API de alguna aplicación de comidas.

Tambien se puede crear un dataframe, adjuntando el valor nutricional de cada elemento seleccionado.

In [55]:
buy = set(item for sublist in to_eat_today for item in sublist)
search_results = df_nutrition_copy[df_nutrition_copy['name'].isin(buy)]
print(search_results)

                 name  Fiber  Vitamin C    Fats  Proteins  Calories  Sugar  \
60       Gooseberries    4.3       27.7    5.22      3.52        44    0.0   
161            Salmon    0.0        0.0  234.00    367.00       412    0.0   
163               Cod    0.0        0.0  119.00    333.00       290    0.0   
167           Mussels    0.0       12.0   15.00    224.00       172    0.0   
172             Clams    0.0       36.0    1.00    113.00        48    0.0   
195              Milk    0.0        0.0    3.30      3.20        61    4.8   
197  Grapefruit Juice    0.2       38.0    0.20      1.60        96   21.2   
198          Smoothie    2.0       60.0    2.00      2.00       150   26.0   
199              Nuts    7.5        0.0   54.50     20.50       607    1.5   
202            Cheese    0.0        0.0   33.10     25.00       402    0.5   
205            Hummus    7.9        2.1    8.70      7.90       177    0.2   

     Calcium  Carbohydrates    Category  
60        25         