# DQN agent with dietkit data

In [1]:
import numpy as np
import pandas as pd
from dqn_meal_planning_environment import MealPlanningEnv
from dietkit import (load_ingredient, load_menu, load_diet)

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers.legacy import Adam

from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

2023-04-17 15:34:04.504108: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
all_ingredients = load_ingredient(sample_language = 'eng')
meal_dict = load_menu(ingredients = all_ingredients, sample_language = 'eng')
example_mealplans = load_diet(menus = meal_dict, num_loads = 100, sample_language = 'eng', sample_name = 'ML')

possible_meals = np.array(list(meal_dict.keys()))
display(possible_meals)

nutrition_data = pd.DataFrame([meal.nutrition for meal in meal_dict.values()], index=possible_meals)
display(nutrition_data)

array(['S watermelon punch (without milks)', 'Dried persimmon',
       'S kyogo grape (100g)', ..., 'Green onion kimchi',
       'Pickled paprika', 'empty'], dtype='<U72')

Unnamed: 0,Energy,Protein,Fat,Carbohydrate,Total Dietary,Calcium,Iron,Sodium,Vitamin A,Vitamin B1 (Thiamine),Vitamin B2 (Rivoflavin),Vitamin C,Linoleic Acid,Alpha-Linolenic Acid
S watermelon punch (without milks),55.00,0.7900,0.05000,13.8180,0.200,7.200,0.18600,0.600,71.083333,0.024000,0.03000,0.0000,14.2500,2.9300
Dried persimmon,76.50,0.5820,0.04800,20.5830,3.360,6.300,0.19200,0.900,13.725000,0.003000,0.06720,1.5330,0.0000,8.6010
S kyogo grape (100g),61.00,0.4300,0.04000,16.3800,0.400,4.000,0.17000,1.000,2.166667,0.026000,0.04700,0.1300,11.8900,6.0900
S dried persimmon,85.60,0.7720,0.03200,22.9800,3.400,5.600,0.24800,0.400,9.533333,0.003600,0.09720,1.0680,0.0000,5.0440
S fruit punch,101.05,3.6700,3.36450,14.8900,0.790,117.650,0.19850,36.000,91.166667,0.040350,0.19110,1.8925,86.6605,10.6925
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Ponytail radish kimchi,10.25,0.5050,0.05000,1.9725,0.775,10.500,0.16750,173.000,5.541667,0.003250,0.01900,0.4125,19.7050,9.4775
Kohlrabi watery kimchi,5.50,0.2900,0.01250,1.2900,0.600,10.500,0.06750,1.750,0.250000,0.203750,0.00875,0.0000,1.9650,5.1100
Green onion kimchi,12.25,0.6250,0.14500,2.1275,1.150,15.000,0.18250,163.750,26.854167,0.012750,0.02850,0.5550,79.8050,17.1425
Pickled paprika,7.94,0.2017,0.03005,1.9859,0.348,1.675,0.08145,67.204,4.850000,0.003675,0.02709,24.2820,12.4483,6.5448


In [3]:
num_meals = 21
env = MealPlanningEnv(possible_meals, nutrition_data, num_meals)

states = env.observation_space['nutrition_history'].shape
actions = env.action_space.n

def build_model(states, actions):
    model = Sequential()    
    model.add(Dense(24, activation='relu', input_shape=(1, *states)))
    model.add(Dense(24, activation='relu'))
    model.add(Flatten())
    model.add(Dense(actions, activation='linear'))
    return model

model = build_model(states, actions)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 1, 21, 24)         360       
                                                                 
 dense_1 (Dense)             (None, 1, 21, 24)         600       
                                                                 
 flatten (Flatten)           (None, 504)               0         
                                                                 
 dense_2 (Dense)             (None, 3235)              1633675   
                                                                 
Total params: 1,634,635
Trainable params: 1,634,635
Non-trainable params: 0
_________________________________________________________________


In [4]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(
        model=model, 
        memory=memory, 
        policy=policy, 
        nb_actions=actions, 
        nb_steps_warmup=10, 
        target_model_update=1e-2
    )
    return dqn

dqn = build_agent(model, actions)
dqn.compile(Adam(learning_rate=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=1000, visualize=False, verbose=1)

2023-04-17 15:34:28.133708: W tensorflow/c/c_api.cc:300] Operation '{name:'dense_2_1/bias/Assign' id:228 op device:{requested: '', assigned: ''} def:{{{node dense_2_1/bias/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](dense_2_1/bias, dense_2_1/bias/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.


Training for 1000 steps ...
Interval 1 (0 steps performed)
    1/10000 [..............................] - ETA: 13:23 - reward: 0.0000e+00

  updates=self.state_updates,
2023-04-17 15:34:28.476472: W tensorflow/c/c_api.cc:300] Operation '{name:'dense_2/BiasAdd' id:117 op device:{requested: '', assigned: ''} def:{{{node dense_2/BiasAdd}} = BiasAdd[T=DT_FLOAT, _has_manual_control_dependencies=true, data_format="NHWC"](dense_2/MatMul, dense_2/BiasAdd/ReadVariableOp)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-04-17 15:34:28.494078: W tensorflow/c/c_api.cc:300] Operation '{name:'count_1/Assign' id:377 op device:{requested: '', assigned: ''} def:{{{node count_1/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](count_1, count_1/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modi

 1000/10000 [==>...........................] - ETA: 4:18 - reward: 11318.3154done, took 28.828 seconds


<keras.callbacks.History at 0x7fa2b95622b0>

In [11]:
results = dqn.test(env, nb_episodes=1, visualize=True)
print(np.mean(results.history['episode_reward']))

Testing for 1 episodes ...
Step: 1
Chosen Meal: Perilla seed seaweed soup
Meal History: ['Perilla seed seaweed soup' 'empty' 'empty' 'empty' 'empty' 'empty'
 'empty' 'empty' 'empty' 'empty' 'empty' 'empty' 'empty' 'empty' 'empty'
 'empty' 'empty' 'empty' 'empty' 'empty' 'empty']
Nutrition History: [[4.1050e+01 2.5200e+00 2.9400e+00 2.4200e+00 1.7400e+00 8.8940e+01
  7.0000e-01 3.0676e+02 1.5650e+01 1.0000e-02 3.0000e-02 8.0000e-02
  7.9023e+02 7.6634e+02]
 [0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00
  0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00
  0.0000e+00 0.0000e+00]
 [0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00
  0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00
  0.0000e+00 0.0000e+00]
 [0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00
  0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00
  0.0000e+00 0.0000e+00]
 [0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 

dict_keys(['episode_reward', 'nb_steps'])