In [None]:
!pip install tensorflow-gpu

In [None]:
!rm -r DialogueBot
!git clone https://github.com/ressay/DialogueBot.git

In [None]:
!pip install rdflib

In [None]:
!pip install -r DialogueBot/requirements.txt

In [None]:
!mkdir my_weights


In [1]:
import sys
sys.path.insert(0, ".")
from DialogueManager.FileBrowserDM.agent import AgentFB
from DialogueManager.FileBrowserDM.user_simulator import UserSimulatorFB
import Ontologies.onto_fbrowser as fbrowser
import argparse, json

Using TensorFlow backend.


In [2]:
CONSTANTS_FILE_PATH = 'DialogueManager/FileBrowserDM/constants.json'
constants_file = CONSTANTS_FILE_PATH

consts = {
  "run": {
    "usersim": True,
    "warmup_mem": 1000,
    "num_ep_run": 40000,
    "train_freq": 100,
    "max_round_num": 40,
    "success_rate_threshold": 0.3
  },
  "agent": {
    "save_weights_file_path": "my_weights/m_encoder.h5",
    "load_weights_file_path": 0,
    "vanilla": True,
    "learning_rate": 1e-3,
    "batch_size": 128,
    "dqn_hidden_size": 80,
    "epsilon_init": 0.7,
    "gamma": 0.7,
    "max_mem_size": 50000,
    "agent_actions": ["Create_file","Delete_file","Change_directory",
      "inform","ask","request"]
  },
  "emc": {
    "slot_error_mode": 0,
    "slot_error_prob": 0.05,
    "intent_error_prob": 0.0
  }
}
with open(constants_file, 'w') as f:
  json.dump(consts, f)

In [3]:
CONSTANTS_FILE_PATH = 'DialogueManager/FileBrowserDM/constants.json'
constants_file = CONSTANTS_FILE_PATH

with open(constants_file) as f:
  constants = json.load(f)

# Load run constants
run_dict = constants['run']
USE_USERSIM = run_dict['usersim']
WARMUP_MEM = run_dict['warmup_mem']
NUM_EP_TRAIN = run_dict['num_ep_run']
TRAIN_FREQ = run_dict['train_freq']
MAX_ROUND_NUM = run_dict['max_round_num']
SUCCESS_RATE_THRESHOLD = run_dict['success_rate_threshold']
compress = True
train_batch = True
use_encoder = True
one_hot = True

# Init. Objects
user = UserSimulatorFB(constants,fbrowser.graph)

dqn_agent = AgentFB(1024, constants,train_batch, use_encoder, compress,one_hot)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


In [4]:
def run_round(user):
    # 1) Agent takes action given state tracker's representation of dialogue (state)
    state = dqn_agent.get_state()
    agent_action_index, agent_action = dqn_agent.step()
    user_action, reward, done, success = user.step(agent_action)
    # if not done:
        # 4) Infuse error into semantic frame level of user action
        # emc.infuse_error(user_action)
    # 5) Update state tracker with user action
    dqn_agent.update_state_user_action(user_action)
    # state_tracker.update_state_user(user_action)
    # 6) Get next state and add experience
    next_state = dqn_agent.get_state()
    # next_state = state_tracker.get_state(done)
    dqn_agent.add_experience(state, agent_action_index, reward, next_state, done)

    return reward, done, success


def train_run():
    """
    Runs the loop that trains the agent.
    Trains the agent on the goal-oriented chatbot task. Training of the agent's neural network occurs every episode that
    TRAIN_FREQ is a multiple of. Terminates when the episode reaches NUM_EP_TRAIN.
    """
    print('Training Started...')
    
    success_rate_best = 0.0
    episode = 0
    avg_tree_size_succeeded = 0.0
    tree_sizes = []
    ftree_sizes = []
    period_reward_total = 0
    period_reward_success = 0
    period_success_total = 0      
    while episode < NUM_EP_TRAIN:
        user = episode_reset()
        episode += 1
        # print('running episode:',episode)
        done = False
        # state = state_tracker.get_state()
        r_sum = 0
        while not done:
            reward, done, success = run_round(user)
            period_reward_total += reward
            r_sum += reward
              

        # print('success is: ',success)
        period_success_total += success
        period_reward_success += r_sum*success
        rsize = user.goal['goal_tree'].r_size()
        if success == 1:
          tree_sizes.append(rsize)
        else:
          ftree_sizes.append(rsize)

        # Train
        if episode % TRAIN_FREQ == 0:

            # Check success rate
            success_rate = period_success_total / TRAIN_FREQ
            avg_reward = period_reward_total / TRAIN_FREQ
            avg_success_reward = period_reward_success / max((period_success_total,1))
            if not len(tree_sizes): tree_sizes = [1]
            if not len(ftree_sizes): ftree_sizes = [1]
            print('training after getting success_rate:', success_rate, " and avg_reward: ",avg_reward, " avg success reward ", avg_success_reward,
                  " max tree size: ",max(tree_sizes)," avg size: ",float(sum(tree_sizes))/max((1,len(tree_sizes))),
                  " avg failure size: ", float(sum(ftree_sizes))/max((1,len(ftree_sizes))), " min failure size: ",min(ftree_sizes))
            
            # Update current best success rate
            
            # Flush
#             if success_rate > success_rate_best and success_rate >= SUCCESS_RATE_THRESHOLD:
#                 dqn_agent.empty_memory()
#                 period_reward_total = 0
#                 period_success_total = 0
#                 period_reward_success = 0
#                 avg_tree_size_succeeded = 0.0
#                 tree_sizes = []
#                 success_rate_best = success_rate
#                 dqn_agent.save_weights()
#                 continue
#             period_success_total = 0
#             period_reward_total = 0
            if success_rate > success_rate_best:
                # print('Episode: {} NEW BEST SUCCESS RATE: {} Avg Reward: {}' .format(episode, success_rate, avg_reward))
                success_rate_best = success_rate
                dqn_agent.save_weights()
#                 uploaded = drive.CreateFile({'title': 'm_beh.h5'})
#                 uploaded.SetContentFile('my_weights/m_beh.h5')
#                 uploaded.Upload()
#                 print('Uploaded file with ID {}'.format(uploaded.get('id')))
          
            period_reward_total = 0
            period_success_total = 0
            period_reward_success = 0
            avg_tree_size_succeeded = 0.0
            tree_sizes = []
            ftree_sizes = []
            # Copy
            dqn_agent.copy()
            # Train
            dqn_agent.train()
            
    print('...Training Ended')


def episode_reset():
    """
    Resets the episode/conversation in the warmup and training loops.
    Called in warmup and train to reset the state tracker, user and agent. Also gets the initial user action.
    """
#     user = UserSimulatorFB(constants,fbrowser.graph)
    user_action = user.reset()
    dqn_agent.reset(user_action)
    return user

In [None]:
dqn_agent.get_state_output = dqn_agent._build_state_model(dqn_agent.beh_model)
dqn_agent.get_state_and_action = dqn_agent._built_state_action_model(dqn_agent.beh_model)

In [None]:
dqn_agent.eps = 0

In [5]:
train_run()

Training Started...
ERROR HAPPENED AND IGNORING IT:  ('error from remove as ', AssertionError('dir3/ does not exist',))
ERROR HAPPENED AND IGNORING IT:  ('error from remove as ', AssertionError('dir4/ does not exist',))
training after getting success_rate: 0.19  and avg_reward:  -6.3  avg success reward  1.894736842105263  max tree size:  3  avg size:  1.4736842105263157  avg failure size:  4.666666666666667  min failure size:  1
Epoch 1/1
finished fitting on  1196  samples and avg triplet number:  0.0
ERROR HAPPENED AND IGNORING IT:  ('error from remove as ', AssertionError('dir3/ does not exist',))
ERROR HAPPENED AND IGNORING IT:  ('error from remove as ', AssertionError('dir2/ does not exist',))
training after getting success_rate: 0.07  and avg_reward:  -8.075  avg success reward  0.8571428571428571  max tree size:  2  avg size:  1.1428571428571428  avg failure size:  4.21505376344086  min failure size:  1
Epoch 1/1
finished fitting on  2349  samples and avg triplet number:  0.0
ER

KeyboardInterrupt: 

In [None]:
!pip install -U -q PyDrive

In [None]:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
# This only needs to be done once in a notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [None]:
# Create & upload a file.
uploaded = drive.CreateFile({'title': 'm_beh.h5'})
uploaded.SetContentFile('my_weights/m_beh.h5')
uploaded.Upload()
print('Uploaded file with ID {}'.format(uploaded.get('id')))

In [None]:
id='18eWGaUMTpqrnjSojPEUZwJp0Vq_UrgAS'
downloaded = drive.CreateFile({'id':id}) 
downloaded.GetContentFile('my_weights/m_beh.h5')

In [None]:
def simulate():
    done = False
    user_action = user.reset()
    print('user goal:')
    user.goal['goal_tree'].print_tree()
    print('user: ', user_action)
    dqn_agent.reset(user_action)
    while not done:
        agent_action_index, agent_action = dqn_agent.step()
        print('agent: ',agent_action)
        user_action, reward, done, success = user.step(agent_action)
        print('user: ', user_action)
        dqn_agent.update_state_user_action(user_action)

In [None]:
simulate()