# Random agent with dietkit data

In [1]:
import numpy as np
import pandas as pd
from meal_planning_environment import MealPlanningEnv
from dietkit import (load_ingredient, load_menu, load_diet)

In [2]:
all_ingredients = load_ingredient(sample_language = 'eng')
meal_dict = load_menu(ingredients = all_ingredients, sample_language = 'eng')
example_mealplans = load_diet(menus = meal_dict, num_loads = 100, sample_language = 'eng', sample_name = 'ML')

possible_meals = np.array(list(meal_dict.keys()))
display(possible_meals)

nutrition_data = pd.DataFrame([meal.nutrition for meal in meal_dict.values()], index=possible_meals)
display(nutrition_data)

array(['S watermelon punch (without milks)', 'Dried persimmon',
       'S kyogo grape (100g)', ..., 'Green onion kimchi',
       'Pickled paprika', 'empty'], dtype='<U72')

Unnamed: 0,Energy,Protein,Fat,Carbohydrate,Total Dietary,Calcium,Iron,Sodium,Vitamin A,Vitamin B1 (Thiamine),Vitamin B2 (Rivoflavin),Vitamin C,Linoleic Acid,Alpha-Linolenic Acid
S watermelon punch (without milks),55.00,0.7900,0.05000,13.8180,0.200,7.200,0.18600,0.600,71.083333,0.024000,0.03000,0.0000,14.2500,2.9300
Dried persimmon,76.50,0.5820,0.04800,20.5830,3.360,6.300,0.19200,0.900,13.725000,0.003000,0.06720,1.5330,0.0000,8.6010
S kyogo grape (100g),61.00,0.4300,0.04000,16.3800,0.400,4.000,0.17000,1.000,2.166667,0.026000,0.04700,0.1300,11.8900,6.0900
S dried persimmon,85.60,0.7720,0.03200,22.9800,3.400,5.600,0.24800,0.400,9.533333,0.003600,0.09720,1.0680,0.0000,5.0440
S fruit punch,101.05,3.6700,3.36450,14.8900,0.790,117.650,0.19850,36.000,91.166667,0.040350,0.19110,1.8925,86.6605,10.6925
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Ponytail radish kimchi,10.25,0.5050,0.05000,1.9725,0.775,10.500,0.16750,173.000,5.541667,0.003250,0.01900,0.4125,19.7050,9.4775
Kohlrabi watery kimchi,5.50,0.2900,0.01250,1.2900,0.600,10.500,0.06750,1.750,0.250000,0.203750,0.00875,0.0000,1.9650,5.1100
Green onion kimchi,12.25,0.6250,0.14500,2.1275,1.150,15.000,0.18250,163.750,26.854167,0.012750,0.02850,0.5550,79.8050,17.1425
Pickled paprika,7.94,0.2017,0.03005,1.9859,0.348,1.675,0.08145,67.204,4.850000,0.003675,0.02709,24.2820,12.4483,6.5448


In [3]:
class RandomAgent:
    def __init__(self, action_space):
        self.action_space = action_space

    def act(self, observation, reward, done):
        return self.action_space.sample()

In [4]:
num_meals = 5
env = MealPlanningEnv(possible_meals, nutrition_data, num_meals)
agent = RandomAgent(env.action_space)

# run for 3 steps
observation = env.reset()
for _ in range(num_meals):
    action = agent.act(observation, reward=None, done=False)
    observation, reward, done, info = env.step(action)
    display(env.render())
    print(f'Done?: {done}')
env.close()

Step: 1
Chosen Meal: Egg green bean sprout salad
Meal History: ['Egg green bean sprout salad' 'empty' 'empty' 'empty' 'empty']
Nutrition History: [[5.7020e+01 4.3600e+00 2.3600e+00 4.4900e+00 8.1000e-01 2.2100e+01
  6.4000e-01 5.5870e+01 6.4150e+01 5.0000e-02 1.7000e-01 2.5200e+00
  3.9216e+02 1.4310e+01]
 [0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00
  0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00
  0.0000e+00 0.0000e+00]
 [0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00
  0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00
  0.0000e+00 0.0000e+00]
 [0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00
  0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00
  0.0000e+00 0.0000e+00]
 [0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00
  0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00
  0.0000e+00 0.0000e+00]]
Goal Nutrition: [4.17670e+02 2.10100e+01 1.34600

None

Done?: False
Step: 2
Chosen Meal: S beef radish porridge
Meal History: ['Egg green bean sprout salad' 'S beef radish porridge' 'empty' 'empty'
 'empty']
Nutrition History: [[5.7020e+01 4.3600e+00 2.3600e+00 4.4900e+00 8.1000e-01 2.2100e+01
  6.4000e-01 5.5870e+01 6.4150e+01 5.0000e-02 1.7000e-01 2.5200e+00
  3.9216e+02 1.4310e+01]
 [1.4113e+02 4.4900e+00 4.0900e+00 2.0380e+01 6.0000e-02 5.5300e+00
  6.5000e-01 7.2320e+01 1.9300e+00 7.0000e-02 5.0000e-02 9.1000e-01
  5.4374e+02 9.6600e+00]
 [0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00
  0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00
  0.0000e+00 0.0000e+00]
 [0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00
  0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00
  0.0000e+00 0.0000e+00]
 [0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00
  0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00
  0.0000e+00 0.0000e+00]]
Goal Nutrition: [4.176

None

Done?: False
Step: 3
Chosen Meal: Udon (white wheat noodles) with stir-fried teriyaki chicken
Meal History: ['Egg green bean sprout salad' 'S beef radish porridge'
 'Udon (white wheat noodles) with stir-fried teriyaki chicken' 'empty'
 'empty']
Nutrition History: [[5.70200e+01 4.36000e+00 2.36000e+00 4.49000e+00 8.10000e-01 2.21000e+01
  6.40000e-01 5.58700e+01 6.41500e+01 5.00000e-02 1.70000e-01 2.52000e+00
  3.92160e+02 1.43100e+01]
 [1.41130e+02 4.49000e+00 4.09000e+00 2.03800e+01 6.00000e-02 5.53000e+00
  6.50000e-01 7.23200e+01 1.93000e+00 7.00000e-02 5.00000e-02 9.10000e-01
  5.43740e+02 9.66000e+00]
 [9.81300e+01 8.55000e+00 2.88000e+00 8.81000e+00 9.20000e-01 1.89700e+01
  5.60000e-01 2.40880e+02 4.34100e+01 9.00000e-02 8.00000e-02 2.95000e+00
  1.19606e+03 1.41370e+02]
 [0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00
  0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00
  0.00000e+00 0.00000e+00]
 [0.00000e+00 0.00000e+00 0.00000e+0

None

Done?: False
Step: 4
Chosen Meal: Steamed sweet rice and red bean
Meal History: ['Egg green bean sprout salad' 'S beef radish porridge'
 'Udon (white wheat noodles) with stir-fried teriyaki chicken'
 'Steamed sweet rice and red bean' 'empty']
Nutrition History: [[5.70200e+01 4.36000e+00 2.36000e+00 4.49000e+00 8.10000e-01 2.21000e+01
  6.40000e-01 5.58700e+01 6.41500e+01 5.00000e-02 1.70000e-01 2.52000e+00
  3.92160e+02 1.43100e+01]
 [1.41130e+02 4.49000e+00 4.09000e+00 2.03800e+01 6.00000e-02 5.53000e+00
  6.50000e-01 7.23200e+01 1.93000e+00 7.00000e-02 5.00000e-02 9.10000e-01
  5.43740e+02 9.66000e+00]
 [9.81300e+01 8.55000e+00 2.88000e+00 8.81000e+00 9.20000e-01 1.89700e+01
  5.60000e-01 2.40880e+02 4.34100e+01 9.00000e-02 8.00000e-02 2.95000e+00
  1.19606e+03 1.41370e+02]
 [1.99850e+02 4.40000e+00 2.70000e-01 4.29800e+01 9.00000e-01 6.40000e+00
  9.90000e-01 3.60000e+00 1.30000e-01 1.30000e-01 3.00000e-02 2.30000e-01
  1.55200e+01 7.21000e+00]
 [0.00000e+00 0.00000e+00 0.00000e+00 

None

Done?: False
Step: 5
Chosen Meal: Steamed corvina
Meal History: ['Egg green bean sprout salad' 'S beef radish porridge'
 'Udon (white wheat noodles) with stir-fried teriyaki chicken'
 'Steamed sweet rice and red bean' 'Steamed corvina']
Nutrition History: [[5.70200e+01 4.36000e+00 2.36000e+00 4.49000e+00 8.10000e-01 2.21000e+01
  6.40000e-01 5.58700e+01 6.41500e+01 5.00000e-02 1.70000e-01 2.52000e+00
  3.92160e+02 1.43100e+01]
 [1.41130e+02 4.49000e+00 4.09000e+00 2.03800e+01 6.00000e-02 5.53000e+00
  6.50000e-01 7.23200e+01 1.93000e+00 7.00000e-02 5.00000e-02 9.10000e-01
  5.43740e+02 9.66000e+00]
 [9.81300e+01 8.55000e+00 2.88000e+00 8.81000e+00 9.20000e-01 1.89700e+01
  5.60000e-01 2.40880e+02 4.34100e+01 9.00000e-02 8.00000e-02 2.95000e+00
  1.19606e+03 1.41370e+02]
 [1.99850e+02 4.40000e+00 2.70000e-01 4.29800e+01 9.00000e-01 6.40000e+00
  9.90000e-01 3.60000e+00 1.30000e-01 1.30000e-01 3.00000e-02 2.30000e-01
  1.55200e+01 7.21000e+00]
 [3.55200e+01 5.71000e+00 1.21000e+00 2.0000

None

Done?: True


In [5]:
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3 import PPO
num_meals=21
# The algorithms require a vectorized environment to run
env = DummyVecEnv([lambda: MealPlanningEnv(possible_meals, nutrition_data, num_meals=num_meals)])
model = PPO("MultiInputPolicy", env, verbose=1)
model.learn(total_timesteps=20000)
obs = env.reset()
for i in range(num_meals):
  action, _states = model.predict(obs)
  obs, rewards, done, info = env.step(action)
env.render()

Using cpu device
-----------------------------
| time/              |      |
|    fps             | 2486 |
|    iterations      | 1    |
|    time_elapsed    | 0    |
|    total_timesteps | 2048 |
-----------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 1345          |
|    iterations           | 2             |
|    time_elapsed         | 3             |
|    total_timesteps      | 4096          |
| train/                  |               |
|    approx_kl            | 1.9790605e-09 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -8.08         |
|    explained_variance   | 4.11e-06      |
|    learning_rate        | 0.0003        |
|    loss                 | 4.63e+09      |
|    n_updates            | 10            |
|    policy_gradient_loss | -4.5e-05      |
|    value_loss           | 8.74e+09      |
-------------------------