In [9]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import random, copy
import numpy as np
from dialogue_config import rule_requests, agent_actions
import re
from tqdm import tqdm
import json

# Load constants

In [10]:
# %pwd
FOLDER_PATH = '/home/taindp/Jupyter/custom_dqn'
CONSTANTS_FILE_PATH = f'{FOLDER_PATH}/constants.json'
constants_file = CONSTANTS_FILE_PATH

with open(constants_file) as f:
    constants = json.load(f)

In [12]:
constants

{'db_file_paths': {'database': 'data/db_hcmut_full.json',
  'dict': 'data/db_entity_official.json',
  'user_goals': 'data/user_goal.json'},
 'run': {'usersim': True,
  'warmup_mem': 56,
  'num_ep_run': 20000,
  'train_freq': 100,
  'max_round_num': 20,
  'success_rate_threshold': 0.3},
 'agent': {'save_weights_file_path': '',
  'load_weights_file_path': '',
  'vanilla': True,
  'learning_rate': 0.001,
  'batch_size': 128,
  'dqn_hidden_size': 70,
  'epsilon_init': 0.0,
  'gamma': 0.9,
  'max_mem_size': 10000},
 'emc': {'slot_error_mode': 0,
  'slot_error_prob': 0.05,
  'intent_error_prob': 0.0}}

In [18]:
hidden_size = constants['agent']['dqn_hidden_size']
lr = constants['agent']['learning_rate']
state_size = 168
num_actions = len(agent_actions)
eps = constants['agent']['epsilon_init']

In [17]:
def reset():
    rule_current_slot_index = 0
    rule_phase = 'not done'

# Build Model

In [16]:
def build_model():
    model = Sequential()
    model.add(Dense(hidden_size),input_dim=state_size,activation='relu')
    model.add(Dense(num_actions,activation='linear'))
    model.compile(loss='mse',optimizer=Adam(lr=lr))
    return model

# Policy có 2 loại là rule_action và dqn_action

In [39]:
def get_action(state,use_rule=False):
    if eps > random.random():
        index = random.randint(0,num_actions-1)
        action = agent_actions[index]
        return index,action
    else:
        if use_rule:
            return rule_action()
        else:
            return dqn_action(state)

In [40]:
def rule_action():
    if rule_current_slot_index < len(rule_requests):
        slot = rule_requests[rule_current_slot_index]
        rule_current_slot_index += 1
        rule_response = {'intent': 'request', 'inform_slots': {}, 'request_slots': {slot: 'UNK'}}
    elif rule_phase =='not done':
        rule_response = {'intent': 'match_found', 'inform_slots': {}, 'request_slots': {}}
        rule_phase = 'done'
    elif rule_phase == 'done':
        rule_response = {'intent': 'done', 'inform_slots': {}, 'request_slots': {}}
    index = map_action_to_index(rule_response)
    return index,rule_response

In [41]:
def map_action_to_index(response):
    for i,action in enumerate(agent_actions):
        if response == action:
            return i

In [42]:
def dqn_action(state):
    index = np.argmax(dqn_predict_one(state))
    action = map_action_to_index(index)
    return index,action

In [43]:
def dqn_predict_one(state,target=False):
    return dqn_predict(state.reshape(1,state_size),target=target).flatten()

In [46]:
def dqn_predict(states,target=False):
    if target:
        return build_model.predict(states)
    else:
        return build_model.predict(states)