In [None]:
#Starting as a notebook, but I should probably move most core functionality here to a python script.

from keras import backend as K
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
K.set_session(sess)


In [2]:
#Setting up the Unity environment

from gym_unity.envs.unity_env import UnityEnv, ActionFlattener
env = UnityEnv("../../unity_envs/kais_banana2", worker_id=12, use_visual=True, flatten_branched=True)
print("Resetting env")
initial_observation = env.reset()

In [18]:
#Importing parent level files
import os,sys,inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0,parentdir) 
from dfp import DFPAgent
from networks import Networks

#Setting up the agent.
misc = 100 # [Health]
prev_misc = misc
action_size = env.action_space.n
print("Env has ", action_size, " actions.")
measurement_size = 3 # [misc,posion, food]
timesteps = [1,2,4,8,16,32]
goal_size = measurement_size * len(timesteps)
img_rows , img_cols = 84, 84
img_channels = 3
state_size = (img_rows, img_cols, img_channels)
agent = DFPAgent(state_size, measurement_size, action_size, timesteps)
agent.model = Networks.dfp_network(state_size, measurement_size, goal_size, action_size, len(timesteps), agent.learning_rate)

Env has  27  actions.


In [19]:
import numpy as np
s_t = initial_observation
s_t = np.expand_dims(s_t, axis=0) # 1x64x64x4
food=0
poison=0
m_t = np.array([misc/100.0, food, poison])
goal=np.array([0, 1.0, -1.0] * len(timesteps))
inference_goal=goal

In [20]:
#loading stored model
loaded_model = "../may24_fixed_visual_input_kais2_both_goal/model/dfp.h5"
agent.load_model(loaded_model)
agent.epsilon = agent.final_epsilon #To not start in explore-mode

done=False
num_timesteps=300

In [21]:
#Running model on environment, remembering the "strongest" predictions.
#TODO: Consider moving all this to separate script, storing states, predictions, actions, and load/analyze
#in a different notebook.
#Storing the frame, selected action and all predictions
recorded_frames = []
recorded_actions = []
recorded_predictions = [] #ALL predictions for the recorded frame.
recorded_prediction_weighted_sums = [] #Predictions multiplied by the weights, giving 1 number for each action.
for t in range(num_timesteps):

    predicted_effects  = agent.get_predicted_effects(s_t, m_t, goal) #3x6 (3 meas, 6 timesteps for each action.)
    obj = np.sum(np.multiply(predicted_effects, inference_goal), axis=1) # num_action
    #KOE: Double-check the NN output a bit.
    action_idx = np.argmax(obj)
    print("Most optimistic predictions is ", obj[action_idx])
    print("The related measurements: ", predicted_effects[action_idx])
    print("Action is ", convert_action_id_to_name(action_idx))
    observation, reward, done, info = env.step(action_idx)
    
    
    recorded_frames.append(observation)
    recorded_actions.append(action_idx)
    recorded_predictions.append(predicted_effects)
    recorded_prediction_weighted_sums.append(obj)
    #print("Full set of predictions: ", recorded_predictions)
    #print("weighted predictions per action: ", obj)
    
    meas = info['brain_info'].vector_observations
    if (done):
        print("Game done at timestep ", t)
        print ("Episode Finish ")
        misc = 100
        x_t1 = env.reset()
    else:
        x_t1 = observation
        misc = meas[0][0]
        
    s_t1 = x_t1
    s_t1 = np.expand_dims(x_t1, axis=0) # 1x64x64x4

    if (reward==-1): # Pick up Poison
        poison += 1
        print("Picked up. Current poison is ", poison)
    if (reward==1): # Pick up food
        food += 1
        print("Picked up. Current food is ", food)
    # Update the cache
    prev_misc = misc

    #KOETODO: Think about normalization.
    m_t = np.array([meas[0][0]/100.0, food, poison]) # Measurement after transition
    s_t = s_t1
#env.close()

Most optimistic predictions is  0.06538747623562813
The related measurements:  [0.         0.         0.         0.         0.04804292 0.
 0.         0.         0.10533348 0.         0.2107344  0.
 0.         0.34222224 0.33193916 0.         0.75202197 0.8503614 ]
Action is  backward, , ,
Most optimistic predictions is  0.4840906895697117
The related measurements:  [0.         0.         0.         0.         0.04716563 0.
 0.         0.12482984 0.09421777 0.         0.24970703 0.
 0.         0.43941167 0.36342672 0.         0.88787097 0.80724996]
Action is  forward,left, ,
Most optimistic predictions is  0.2186526544392109
The related measurements:  [0.         0.         0.         0.         0.0400974  0.
 0.         0.08907519 0.10163525 0.         0.20591941 0.
 0.         0.34177592 0.33539927 0.         0.7376002  0.75878096]
Action is  forward,left, ,
Most optimistic predictions is  0.21164832636713982
The related measurements:  [0.         0.         0.         0.         0.03

Most optimistic predictions is  0.5314603261649609
The related measurements:  [0.         0.         0.         0.         0.04701095 0.
 0.         0.16510354 0.13294742 0.         0.24654776 0.
 0.         0.4833635  0.4226499  0.         0.93927646 0.7942446 ]
Action is  forward,left, ,
Most optimistic predictions is  0.436542060226202
The related measurements:  [0.         0.         0.         0.         0.04783371 0.
 0.         0.12391742 0.11694493 0.         0.24066216 0.
 0.         0.46743923 0.44846672 0.         0.9228472  0.800746  ]
Action is  forward,left, ,
Most optimistic predictions is  0.4734281711280346
The related measurements:  [0.         0.         0.         0.         0.04302866 0.
 0.         0.11636762 0.10945717 0.         0.23481339 0.
 0.         0.4637572  0.42233247 0.         0.908423   0.76117206]
Action is  forward,left, ,
Most optimistic predictions is  0.42969199642539024
The related measurements:  [0.         0.         0.         0.         0.05

Most optimistic predictions is  0.36480559781193733
The related measurements:  [0.         0.         0.         0.         0.06015908 0.
 0.         0.08423515 0.0976622  0.         0.19225976 0.
 0.         0.3695267  0.34618396 0.         0.8872476  0.78477657]
Action is  forward,left, ,
Most optimistic predictions is  0.37582362815737724
The related measurements:  [0.         0.         0.         0.         0.06020805 0.
 0.         0.11593738 0.08829822 0.         0.23760426 0.
 0.         0.43548268 0.43180647 0.         0.9066809  0.85998493]
Action is  forward,left, ,
Most optimistic predictions is  0.38622692599892616
The related measurements:  [0.         0.         0.         0.         0.05813089 0.
 0.         0.13691458 0.10853892 0.         0.2561181  0.
 0.         0.50237554 0.4710129  0.         0.9640836  0.951844  ]
Action is  forward,left, ,
Most optimistic predictions is  0.3459654860198498
The related measurements:  [0.         0.         0.         0.         0

Most optimistic predictions is  0.3910714462399483
The related measurements:  [0.         0.         0.         0.         0.05028727 0.
 0.         0.10265968 0.05932098 0.         0.20657724 0.
 0.         0.39864647 0.30943495 0.         0.8109594  0.8093027 ]
Action is  forward,left, ,
Most optimistic predictions is  0.3846619315445423
The related measurements:  [0.         0.         0.         0.         0.04990038 0.
 0.         0.10198046 0.05920839 0.         0.20543686 0.
 0.         0.39620388 0.3088603  0.         0.80686253 0.8076535 ]
Action is  forward,left, ,
Most optimistic predictions is  0.37825387716293335
The related measurements:  [0.         0.         0.         0.         0.04950769 0.
 0.         0.1013019  0.05908899 0.         0.20429935 0.
 0.         0.39375725 0.30829036 0.         0.8027573  0.8059903 ]
Action is  forward,left, ,
Most optimistic predictions is  0.282483771443367
The related measurements:  [0.         0.         0.         0.         0.03

Most optimistic predictions is  -0.3633997291326523
The related measurements:  [0.         0.         0.         0.         0.07569713 0.
 0.         0.09170201 0.13084424 0.         0.13488413 0.
 0.         0.2589585  0.40542966 0.         0.42089343 0.809261  ]
Action is  forward,left, ,
Most optimistic predictions is  -0.41056896303780377
The related measurements:  [0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 4.1875686e-04
 0.0000000e+00 0.0000000e+00 6.1397672e-02 1.1656902e-01 0.0000000e+00
 1.0834967e-01 2.1417032e-01 0.0000000e+00 2.0279436e-01 3.5822302e-01
 0.0000000e+00 5.8831602e-01 6.8288308e-01]
Action is  backward, ,rot_left,
Most optimistic predictions is  -0.37771202251315117
The related measurements:  [0.         0.         0.         0.         0.03685417 0.
 0.         0.08815953 0.12280667 0.         0.14765039 0.
 0.         0.2650834  0.4319427  0.         0.43361026 0.7943204 ]
Action is  forward,left, ,
Most optimistic predictions is  -0.38200833275

Most optimistic predictions is  0.0071381255984306335
The related measurements:  [0.         0.         0.         0.         0.07394268 0.
 0.         0.14591368 0.09952505 0.         0.18373764 0.
 0.         0.3177189  0.46602654 0.         0.6773353  0.8259585 ]
Action is  forward,left, ,
Most optimistic predictions is  -0.16742787137627602
The related measurements:  [0.         0.         0.         0.         0.04844863 0.
 0.         0.08982211 0.1189911  0.         0.1693335  0.
 0.         0.2922396  0.41916078 0.         0.5659397  0.79505956]
Action is  forward,left, ,
Most optimistic predictions is  -0.21804731618613005
The related measurements:  [0.         0.         0.         0.         0.00440497 0.
 0.         0.09826287 0.10006135 0.         0.14480138 0.
 0.         0.2310161  0.37955508 0.         0.47175747 0.6886737 ]
Action is  forward,left, ,
Most optimistic predictions is  -0.18945341184735298
The related measurements:  [0.         0.         0.         0.    

Most optimistic predictions is  -0.30026960000395775
The related measurements:  [0.         0.         0.         0.         0.04951974 0.
 0.         0.09263662 0.09587223 0.         0.11942211 0.
 0.         0.19301687 0.3840832  0.         0.3972519  0.6721614 ]
Action is  forward,left, ,
Most optimistic predictions is  -0.3020607493817806
The related measurements:  [0.         0.         0.         0.         0.05240471 0.
 0.         0.0911423  0.07611869 0.         0.14661816 0.
 0.         0.19503093 0.3631513  0.         0.346033   0.69401985]
Action is  forward,left, ,
Most optimistic predictions is  -0.38934220001101494
The related measurements:  [0.         0.         0.         0.         0.03686784 0.
 0.         0.08052249 0.10545416 0.         0.10115252 0.
 0.         0.1405634  0.33810067 0.         0.308407   0.6133006 ]
Action is  forward,left, ,
Most optimistic predictions is  -0.2675540028139949
The related measurements:  [0.         0.         0.         0.       

Most optimistic predictions is  -0.3005738735664636
The related measurements:  [0.         0.         0.         0.         0.00332061 0.
 0.         0.08132581 0.07767363 0.         0.11819162 0.
 0.         0.16928779 0.34196305 0.         0.40360093 0.65666395]
Action is  forward,left, ,
Most optimistic predictions is  -0.43357281759381294
The related measurements:  [0.         0.         0.         0.         0.02350735 0.
 0.         0.08880195 0.12154901 0.         0.08662473 0.
 0.         0.11264937 0.34373224 0.         0.33572298 0.61559796]
Action is  forward,left, ,
Most optimistic predictions is  -0.4587254486978054
The related measurements:  [0.         0.         0.         0.         0.01917352 0.
 0.         0.07689604 0.10105109 0.         0.11015397 0.
 0.         0.09765031 0.3419089  0.         0.26961693 0.5892562 ]
Action is  forward,left, ,
Most optimistic predictions is  -0.5767456442117691
The related measurements:  [0.         0.         0.         0.        

Most optimistic predictions is  -0.34637515619397163
The related measurements:  [0.         0.         0.         0.         0.03271667 0.
 0.         0.05960276 0.10611358 0.         0.05476366 0.
 0.         0.07648683 0.26959473 0.         0.1570227  0.35125947]
Action is  forward,left, ,
Most optimistic predictions is  -0.3736500460654497
The related measurements:  [0.         0.         0.         0.         0.         0.
 0.         0.05115233 0.09537078 0.         0.03098617 0.
 0.         0.04831131 0.21567301 0.         0.07646559 0.26952165]
Action is  forward,left, ,
Most optimistic predictions is  -0.4600381087511778
The related measurements:  [0.         0.         0.         0.         0.         0.
 0.         0.         0.11409143 0.         0.02309151 0.06944993
 0.         0.04970235 0.23441167 0.         0.18520527 0.3000842 ]
Action is  backward, ,rot_right,
Game done at timestep  299
Episode Finish 


In [7]:
action_names = [[" ", "forward", "backward"], [" ", "right", "left"], [" ", "rot_left", "rot_right"], ["dont charge", "charge"]]
#OK! These seem right.
    
flattened_names = ActionFlattener([3,3,3,2])

In [8]:
#%%
def convert_action_id_to_name(id):
    un_flattened_indices = flattened_names.lookup_action(id)
    name = ""
    action_counter=0
    for i in un_flattened_indices:
        name+=action_names[action_counter][i]
        action_counter+=1
        name+=","
    return name
#%%


In [None]:
import matplotlib.pyplot as plt
for frame in range(20):#len(recorded_frames)):
    #plt.figure()
    #plt.imshow(np.array(recorded_frames[frame]))
    print("Suggesting action: ", convert_action_id_to_name(recorded_actions[frame]))
    print("id: ", recorded_actions[frame])