# Testing Agents

In [1]:
%load_ext autoreload
%autoreload 2

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import pickle
from tqdm import tqdm_notebook as tqdm
from tqdm import trange, tnrange

from deep_qnet_agent import DQNAgentKeras
from environment import LanderEnvironment
from full_lander_sim import *

In [4]:
def network_config():
	netcon = {}
	netcon['alpha'] = 0.01
	return netcon


def agent_config():
	agentcon = {}
	agentcon['gamma'] = 0.9
	agentcon['eps0'] = 0.8
	agentcon['epsf'] = 0.0
	agentcon['n_eps'] = 800
	agentcon['minib'] = 50
	agentcon['max_mem'] = 500000
	return agentcon

In [5]:
env=LanderEnvironment()
agent=DQNAgentKeras(agent_config(),network_config(),env)

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 512)               4608      
_________________________________________________________________
dense_1 (Dense)              (None, 256)               131328    
_________________________________________________________________
dense_2 (Dense)              (None, 4)                 1028      
Total params: 136,964
Trainable params: 136,964
Non-trainable params: 0
_________________________________________________________________
None


In [5]:
def save_agent(agent, fname):
    env.reset()
    
    agentcon = agent_config()
    netcon = network_config()
    for key in agentcon.keys():
        agentcon[key] = getattr(agent, key)
    for key in netcon.keys():
        netcon[key] = getattr(agent, key)
    agent_data = {'agentcon':agentcon, 'netcon':netcon}
    pickle.dump(agent_data,open(fname,'wb'))
    agent.model.save_weights('Checkpoints/'+fname+'_checkpoint')


def load_agent(fname, env):
    agent_data = pickle.load(open(fname,'rb'))
    agent = DQNAgentKeras(agent_data['agentcon'],agent_data['netcon'],env)
    agent.model.load_weights('Checkpoints/'+fname+'_checkpoint')
    return agent


def do_run(agent, env, N_ep):
    R_ep = []
    t = tnrange(N_ep, desc='bar_desc', leave=True)
#     for ep_no in tqdm(range(N_ep)):
    for ep_no in t:
        observation = env.reset()
        done = False
        r = 0
        n_step = 0
        while not done:
            action = agent.action_select(env,observation)
            observation, reward, done, info = env.step(action)
            agent.update_net(observation,reward,done)
            r += reward
            n_step +=1
        R_ep.append(r)
        t.set_description('Last reward: {}'.format(r))
        t.refresh()
    return R_ep, agent, env


def agent_demo(agent, env, N_ep):
    R_ep = []
    for ep_no in tqdm(range(N_ep)):
        observation = env.reset()
        done = False
        r = 0
        while not done:
            action = agent.action_select(env,observation)
            observation, reward, done, _ = env.step(action)
            env.render()
            r += reward
        R_ep.append(r)
    return R_ep


def data_smooth(data,n_avg):
	# A function to average data over n_avg timesteps
	ind_vec = np.arange(n_avg,len(data)+1,n_avg)
	data_avg = [0]
	for ind in ind_vec:
		data_avg.append(np.mean(data[ind-n_avg:ind]))
	return data_avg

## Run agent

In [7]:
observation = env.reset()
action = agent.action_select(env,observation)
observation, reward, done, _ = env.step(action)
agent.update_net(observation,reward,done)

In [8]:
save_agent(agent,'test_agent')

In [9]:
agent_load = load_agent('test_agent',env)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 512)               4608      
_________________________________________________________________
dense_4 (Dense)              (None, 256)               131328    
_________________________________________________________________
dense_5 (Dense)              (None, 4)                 1028      
Total params: 136,964
Trainable params: 136,964
Non-trainable params: 0
_________________________________________________________________
None


In [7]:
print(agent_config())
print(network_config())

fname = 'lander_full_heuristic_13_11'
demo_exp = np.load(open(fname,'rb'),allow_pickle=True).tolist()

N_attempt = 10
for agent_no in range(1,N_attempt):
    print('Agent ' + str(agent_no))
    
    N_ep = 1000
    env = LanderEnvironment()
    agent = DQNAgentKeras(agent_config(),network_config(),env)
    agent.memory = demo_exp
    
    R_ep, agent, env = do_run(agent, env, N_ep)
    mean_end = np.mean(R_ep[-100:])
    print('R end: ' + repr(mean_end))
    
    save_agent(agent, 'agent'+str(agent_no))
    pickle.dump(R_ep, open('reward'+str(agent_no),'wb'))

{'gamma': 0.9, 'eps0': 0.8, 'epsf': 0.0, 'n_eps': 800, 'minib': 50, 'max_mem': 500000}
{'alpha': 0.01}
Agent 1
Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_24 (Dense)             (None, 512)               4608      
_________________________________________________________________
dense_25 (Dense)             (None, 256)               131328    
_________________________________________________________________
dense_26 (Dense)             (None, 4)                 1028      
Total params: 136,964
Trainable params: 136,964
Non-trainable params: 0
_________________________________________________________________
None


HBox(children=(IntProgress(value=0, description='bar_desc', max=1000, style=ProgressStyle(description_width='i…


R end: -901.3245380383638
Agent 2
Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_27 (Dense)             (None, 512)               4608      
_________________________________________________________________
dense_28 (Dense)             (None, 256)               131328    
_________________________________________________________________
dense_29 (Dense)             (None, 4)                 1028      
Total params: 136,964
Trainable params: 136,964
Non-trainable params: 0
_________________________________________________________________
None


HBox(children=(IntProgress(value=0, description='bar_desc', max=1000, style=ProgressStyle(description_width='i…


R end: -1035.3068571679119
Agent 3
Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_30 (Dense)             (None, 512)               4608      
_________________________________________________________________
dense_31 (Dense)             (None, 256)               131328    
_________________________________________________________________
dense_32 (Dense)             (None, 4)                 1028      
Total params: 136,964
Trainable params: 136,964
Non-trainable params: 0
_________________________________________________________________
None


HBox(children=(IntProgress(value=0, description='bar_desc', max=1000, style=ProgressStyle(description_width='i…

KeyboardInterrupt: 

In [14]:
env = LanderEnvironment()
agent = load_agent('agent0', env)

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_15 (Dense)             (None, 512)               4608      
_________________________________________________________________
dense_16 (Dense)             (None, 256)               131328    
_________________________________________________________________
dense_17 (Dense)             (None, 4)                 1028      
Total params: 136,964
Trainable params: 136,964
Non-trainable params: 0
_________________________________________________________________
None
