# Toy example with custom environment

In [1]:
import numpy as np
import pandas as pd
from meal_planning_environment import MealPlanningEnv
from dietkit import (load_ingredient, load_menu, load_diet)

In [2]:
# make dummy data
possible_meals = np.array(['meal1', 'meal2', 'meal3'])
nutrition_data = pd.DataFrame({
    'calories': [200, 300, 400],
    'protein': [20, 30, 40],
    'carbs': [30, 40, 50],
    'fat': [10, 20, 30]
}, index=possible_meals)
display(possible_meals)
display(nutrition_data)

class RandomAgent:
    def __init__(self, action_space):
        self.action_space = action_space

    def act(self, observation, reward, done):
        return self.action_space.sample()


# create one meal plan of 5 meals
num_meals = 5
env = MealPlanningEnv(possible_meals=possible_meals, nutrition_data=nutrition_data, num_meals=num_meals)
agent = RandomAgent(env.action_space)
observation = env.reset()
for _ in range(num_meals):
    action = agent.act(observation, reward=None, done=False)
    observation, reward, done, info = env.step(action)
    env.render()
    print(f'Done?: {done}')
env.close()

array(['meal1', 'meal2', 'meal3'], dtype='<U5')

Unnamed: 0,calories,protein,carbs,fat
meal1,200,20,30,10
meal2,300,30,40,20
meal3,400,40,50,30


Step: 1
Chosen Meal: meal2
Meal History: ['meal2' 'meal3' 'meal3' 'meal3' 'meal3']
Nutrition History: [[300.  30.  40.  20.]
 [  0.   0.   0.   0.]
 [  0.   0.   0.   0.]
 [  0.   0.   0.   0.]
 [  0.   0.   0.   0.]]
Goal Nutrition: [1500.  150.  200.  100.]
Current Nutrition: [300.  30.  40.  20.]
Reward: -1560.0
Done?: False
Step: 2
Chosen Meal: meal3
Meal History: ['meal2' 'meal3' 'meal3' 'meal3' 'meal3']
Nutrition History: [[300.  30.  40.  20.]
 [400.  40.  50.  30.]
 [  0.   0.   0.   0.]
 [  0.   0.   0.   0.]
 [  0.   0.   0.   0.]]
Goal Nutrition: [1500.  150.  200.  100.]
Current Nutrition: [700.  70.  90.  50.]
Reward: -1040.0
Done?: False
Step: 3
Chosen Meal: meal3
Meal History: ['meal2' 'meal3' 'meal3' 'meal3' 'meal3']
Nutrition History: [[300.  30.  40.  20.]
 [400.  40.  50.  30.]
 [400.  40.  50.  30.]
 [  0.   0.   0.   0.]
 [  0.   0.   0.   0.]]
Goal Nutrition: [1500.  150.  200.  100.]
Current Nutrition: [1100.  110.  140.   80.]
Reward: -520.0
Done?: False
Step: 4