In [1]:

# Import common dependencies
import pandas as pd  # noqa
import numpy as np
import matplotlib  # noqa
import matplotlib.pyplot as plt
import datetime  # noqa
import PIL  # noqa
import glob  # noqa
import pickle  # noqa
from pathlib import Path  # noqa
from scipy import misc  # noqa
import sys
import tensorflow as tf
import pdb
TRADE_COST_FRAC = .003
EPSILON = 1e-10
ADV_MULT = 1e-3

  from ._conv import register_converters as _register_converters


In [2]:
uni_tokens = set()
uni_commands = set()
uni_actions = set()
fname = 'tasks_with_length_tags.txt'
with open(fname) as f:
    content = f.readlines()
content2 = [c.split(' ') for c in content]
# you may also want to remove whitespace characters like `\n` at the end of each line
commands = []
actions = []
content = [l.replace('\n', '') for l in content]
commands = [x.split(':::')[1].split(' ')[1:-1] for x in content]
actions = [x.split(':::')[2].split(' ')[1:-2] for x in content]
structures = [x.split(':::')[3].split(' ')[2:] for x in content]

structures = [[int(l) for l in program] for program in structures]
#actions = [[wd.replace('\n', '') for wd in res] for res in actions]

In [64]:
max_actions_per_subprogram = max([max([s for s in struct]) for struct in structures]) + 1
max_actions_per_subprogram

9

In [43]:
max_num_subprograms = max([len(s) for s in structures]) + 1
max_cmd_len = max([len(s) for s in commands]) + 1
max_act_len = max([len(a) for a in actions]) + 1
max_num_subprograms, max_cmd_len, max_act_len

(7, 10, 49)

In [4]:
def build_fmap_invmap(unique, num_unique):
    fmap = dict(zip(unique, range(num_unique)))
    invmap = dict(zip(range(num_unique), unique))
    return fmap, invmap

In [5]:
for li in content2:
    for wd in li:
        uni_tokens.add(wd)

In [6]:
for li in commands:
    for wd in li:
        uni_commands.add(wd)

In [7]:
for li in actions:
    for wd in li:
        uni_actions.add(wd)

In [8]:
uni_commands.add('end_command')
uni_actions.add('end_subprogram')
uni_actions.add('end_action')

In [9]:
num_cmd = len(uni_commands)
num_act = len(uni_actions)
size_emb = 32

In [10]:
command_map, command_invmap = build_fmap_invmap(uni_commands, num_cmd)
action_map, action_invmap = build_fmap_invmap(uni_actions, num_act)

In [11]:


def dense_scaled(prev_layer, layer_size, name=None, reuse=False, scale=1.0):
    output = tf.layers.dense(prev_layer, layer_size, reuse=reuse) * scale
    return output


def dense_relu(dense_input, layer_size, scale=1.0):
    dense = dense_scaled(dense_input, layer_size, scale=scale)
    output = tf.nn.leaky_relu(dense)

    return output

def get_grad_norm(opt_fcn, loss):
    gvs = opt_fcn.compute_gradients(loss)
    grad_norm = tf.sqrt(tf.reduce_sum(
        [tf.reduce_sum(tf.square(grad)) for grad, var in gvs if grad is not None]))
    return grad_norm


def apply_clipped_optimizer(opt_fcn, loss, clip_norm=.1, clip_single=.03, clip_global_norm=False):
    gvs = opt_fcn.compute_gradients(loss)

    if clip_global_norm:
        gs, vs = zip(*[(g, v) for g, v in gvs if g is not None])
        capped_gs, grad_norm_total = tf.clip_by_global_norm([g for g in gs], clip_norm)
        capped_gvs = list(zip(capped_gs, vs))
    else:
        grad_norm_total = tf.sqrt(
            tf.reduce_sum([tf.reduce_sum(tf.square(grad)) for grad, var in gvs if grad is not None]))
        capped_gvs = [(tf.clip_by_value(grad, -1 * clip_single, clip_single), var)
                      for grad, var in gvs if grad is not None]
        capped_gvs = [(tf.clip_by_norm(grad, clip_norm), var) for grad, var in capped_gvs if grad is not None]

    optimizer = opt_fcn.apply_gradients(capped_gvs)

    return optimizer, grad_norm_total


def mlp(x, hidden_sizes, output_size=None, name='', reuse=False):
    prev_layer = x

    for idx, l in enumerate(hidden_sizes):
        dense = dense_scaled(prev_layer, l, name='mlp' + name + '_' + str(idx))
        prev_layer = tf.nn.leaky_relu(dense)

    output = prev_layer

    if output_size is not None:
        output = dense_scaled(prev_layer, output_size, name='mlp' + name + 'final')

    return output

def mlp_with_adversaries(x, hidden_sizes, output_size=None, name='', reuse=False):
    prev_layer = x
    adv_phs = []
    for idx, l in enumerate(hidden_sizes):
        
        adversary = tf.placeholder_with_default(tf.zeros_like(prev_layer), prev_layer.shape)
        prev_layer = prev_layer + adversary
        adv_phs.append(adversary)
        
        dense = dense_scaled(prev_layer, l, name='mlp' + name + '_' + str(idx))
        prev_layer = tf.nn.leaky_relu(dense)

    output = prev_layer

    if output_size is not None:
        output = dense_scaled(prev_layer, output_size, name='mlp' + name + 'final')

    return output, adv_phs



In [120]:
tf.reset_default_graph()
cmd_mat = tf.Variable(1e-5*tf.random_normal([num_cmd, size_emb]))
act_mat = tf.Variable(1e-5*tf.random_normal([num_act, size_emb]))
global_bs = None
global_time_len = None
action_lengths = None
max_num_actions= None
# global_bs = 8
global_time_len = 8
max_num_actions = 9
cmd_ind = tf.placeholder(tf.int32, shape=(global_bs, global_time_len,))
act_ind = tf.placeholder(tf.int32, shape=(global_bs, global_time_len, max_num_actions))
cmd_lengths = tf.placeholder(tf.int32, shape=(global_bs))
act_lengths = tf.placeholder(tf.int32, shape=(global_bs, max_num_actions))

cmd_emb = tf.nn.embedding_lookup(cmd_mat, cmd_ind)
act_emb = tf.nn.embedding_lookup(act_mat, act_ind)


num_layers_encoder = 3
hidden_filters = 16
first_cell_encoder = [tf.nn.rnn_cell.LSTMCell(
    hidden_filters, forget_bias=1., name = 'layer1_'+d) for d in ['f', 'b']]
hidden_cells_encoder = [[tf.nn.rnn_cell.LSTMCell(
    hidden_filters,forget_bias=1., name = 'layer' + str(lidx) + '_' + d)  for d in ['f', 'b']]
                        for lidx in range(num_layers_encoder - 1)]
cells_encoder = [first_cell_encoder] + hidden_cells_encoder
c1, c2 = zip(*cells_encoder)
cells_encoder = [c1, c2]
def encode(x, num_layers, cells, initial_states, lengths, name='',):
    prev_layer = x
    shortcut = x
    hiddenlayers = []
    returncells = []
    cell_fw, cell_bw = cells
    bs = tf.shape(x)[0]
    for idx in range(num_layers):
        prev_layer, c = tf.nn.bidirectional_dynamic_rnn(
                cell_fw = cell_fw[idx],
                cell_bw = cell_bw[idx],
                inputs = prev_layer,
                sequence_length=lengths,
                initial_state_fw=None,
                initial_state_bw=None,
                dtype=tf.float32,
                scope='encoder'+str(idx)
            )
        prev_layer = tf.concat(prev_layer, 2)
        prev_layer = tf.nn.leaky_relu(prev_layer)
        returncells.append(c)
        hiddenlayers.append(prev_layer)
        if idx == num_layers - 1:
            #pdb.set_trace()
            output = tf.gather_nd(
                        prev_layer,
                        tf.stack([tf.range(bs), lengths], 1),
                        name=None
                    )
            return prev_layer, returncells, hiddenlayers, output
        prev_layer = tf.concat((prev_layer, shortcut), 2)
encoding_last_layer, encoding_final_cells, encoding_hidden_layers, encoding_last_timestep = encode(
    cmd_emb, num_layers_encoder, cells_encoder,None, lengths = cmd_lengths, name = 'encoder')
# encoding_last_timestep = encoding_last_layer[:,cmd_lengths, :]
num_layers_subprogram = 3
hidden_filters_encoder = encoding_last_timestep.shape[-1].value
hidden_filters_subprogram = 16
first_cell_subprogram = tf.nn.rnn_cell.LSTMCell(
    hidden_filters_subprogram, forget_bias=1., name = 'subpogramlayer1_')
hidden_cells_subprogram = [tf.nn.rnn_cell.LSTMCell(
    hidden_filters_subprogram,forget_bias=1., name = 'subpogramlayer' + str(lidx))
                        for lidx in range(num_layers_subprogram - 1)]
cells_subprogram = [first_cell_subprogram] + hidden_cells_subprogram


def subprogram(x, num_layers, cells, initial_states, lengths, name='',):
    prev_layer = x
    shortcut = x
    hiddenlayers = []
    returncells = []
    bs = tf.shape(x)[0]
    for idx in range(num_layers):
        prev_layer, c = tf.nn.dynamic_rnn(
                cell = cells[idx],
                inputs = prev_layer,
                sequence_length=lengths,
                initial_state = None,
                dtype=tf.float32,
                scope = name + 'subprogram' + str(idx)
            )
        prev_layer = tf.concat(prev_layer, 2)
        prev_layer = tf.nn.leaky_relu(prev_layer)
        returncells.append(c)
        hiddenlayers.append(prev_layer)
        if idx == num_layers - 1:
            output = tf.gather_nd(
                        prev_layer,
                        tf.stack([tf.range(bs), lengths], 1),
                        name=None
                    )
            return prev_layer, returncells, hiddenlayers, output
        prev_layer = tf.concat((prev_layer, shortcut), 2)
encodings = [encoding_last_timestep]
last_encoding = encoding_last_timestep
initial_cmb_encoding = last_encoding
loss = 0
action_probabilities_presoftmax = []
for sub_idx in range(max_num_subprograms):      
    
    subprogram_last_layer, _, subprogram_hidden_layers, subprogram_output = subprogram(
        tf.tile(tf.expand_dims(tf.concat((last_encoding, initial_cmb_encoding), 1), 1), [1, max_num_actions, 1]), 
        num_layers_subprogram, cells_subprogram,None, lengths = act_lengths[:, sub_idx], name = 'subprogram')
    action_prob_flat = mlp(
        tf.reshape(subprogram_last_layer, [-1, hidden_filters_subprogram]),
        [32,], output_size = num_act, name = 'action_choice_mlp', reuse = (sub_idx > 0))
    action_prob_expanded = tf.reshape(action_prob_flat, [-1, max_num_actions, hidden_filters_subprogram])
    action_probabilities_layer = tf.nn.softmax(action_prob_expanded, axis=-1)
    action_probabilities_presoftmax.append(action_prob_expanded)
    delta = mlp(
        subprogram_output, [64], output_size = hidden_filters_encoder, name = 'global_transform',
        reuse = (sub_idx > 0)
    )
    last_encoding = last_encoding + delta
    encodings.append(last_encoding)
act_presoftmax = tf.stack(action_probabilities_presoftmax, 1)
#batch, subprogram, timestep, action_selection
logprobabilities = tf.nn.log_softmax(act_presoftmax, -1)
'''
self.dec_relu_shaped = tf.reshape(
                self.dec_relu, [self.batch_size *  (self.max_conv_len-1), 
                                self.max_sent_len, self.num_wds])[:,:-1,:]   
self.ppl_loss_masked = tf.contrib.seq2seq.sequence_loss(
    logits = self.dec_relu_shaped,
    targets = self.target_decode,
    weights = self.mask_flat_decode,
    average_across_timesteps=False,
    average_across_batch=False,
    softmax_loss_function=None,
    name=None
)
'''
opt_fcn = tf.train.AdamOptimizer(learning_rate=1e-3)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for _ in range(1000):
    pass

In [121]:
logprobabilities

<tf.Tensor 'Reshape_29:0' shape=(?, 7, 9, 16) dtype=float32>

In [48]:
commands_ind = [[command_map[c] for c in cmd] + [0] * (max_cmd_len - len(cmd)) for cmd in commands]
actions_ind = [[action_map[a] for a in act] + [0] * (max_act_len - len(act)) for act in actions]
cmd_np = np.array(commands_ind)

In [117]:
actions_structured = []
for row in range(len(structures)):
    action_row = []
    act = actions_ind[row]
    struct = structures[row]
    start = 0
    for step in struct:
        end = start + step
        a = act[start:end]
        padding = max_actions_per_subprogram - step - 1
        action_row.append(a + [action_map['end_action']] + [0] * padding)
        start = end
    actions_structured.append(
        action_row + [[action_map['end_subprogram']] + [0] * (max_actions_per_subprogram - 1)] +
        [[0] * max_actions_per_subprogram] * (max_num_subprograms - len(struct))
    )
act_np = np.array(actions_structured)
struct_padded = [s + [0] * (max_num_subprograms - len(s)) for s in structures]
struct_np = np.array(struct_padded)

In [119]:
cmd_np.shape, act_np.shape, struct_np.shape

((20910, 10), (20910, 8, 9), (20910, 7))

In [114]:
structures

[[1],
 [1],
 [1],
 [1],
 [1],
 [1],
 [2],
 [2],
 [2],
 [2],
 [2],
 [2],
 [2],
 [2],
 [4],
 [4],
 [8],
 [8],
 [8],
 [8],
 [8],
 [8],
 [8],
 [8],
 [2],
 [2],
 [3],
 [3],
 [3],
 [3],
 [3],
 [3],
 [3],
 [3],
 [8, 8],
 [2, 2],
 [2, 2],
 [4, 4],
 [2, 2],
 [3, 3],
 [8, 8],
 [2, 2],
 [3, 3],
 [8, 8],
 [1, 1],
 [1, 1],
 [2, 2],
 [3, 3],
 [8, 8],
 [2, 2],
 [3, 3],
 [2, 2],
 [2, 2],
 [1, 1],
 [3, 3],
 [3, 3],
 [2, 2],
 [4, 4],
 [1, 1],
 [1, 1],
 [8, 8],
 [8, 8],
 [3, 3],
 [1, 1],
 [8, 8],
 [2, 2],
 [8, 8],
 [3, 3],
 [8, 8, 8],
 [2, 2, 2],
 [2, 2, 2],
 [4, 4, 4],
 [2, 2, 2],
 [3, 3, 3],
 [8, 8, 8],
 [2, 2, 2],
 [3, 3, 3],
 [8, 8, 8],
 [1, 1, 1],
 [1, 1, 1],
 [2, 2, 2],
 [3, 3, 3],
 [8, 8, 8],
 [2, 2, 2],
 [3, 3, 3],
 [2, 2, 2],
 [2, 2, 2],
 [1, 1, 1],
 [3, 3, 3],
 [3, 3, 3],
 [2, 2, 2],
 [4, 4, 4],
 [1, 1, 1],
 [1, 1, 1],
 [8, 8, 8],
 [8, 8, 8],
 [3, 3, 3],
 [1, 1, 1],
 [8, 8, 8],
 [2, 2, 2],
 [8, 8, 8],
 [3, 3, 3],
 [2, 2, 2, 2],
 [2, 2, 3, 3, 3],
 [2, 2, 1, 1],
 [2, 2, 1, 1, 1],
 [2, 2, 2, 2],
 

In [107]:
struct

[2, 2, 2, 2, 2, 2]

In [108]:
[0] * (max_actions_per_subprogram - 1)

[0, 0, 0, 0, 0, 0, 0, 0]

In [109]:
action_map

{'I_JUMP': 5,
 'I_LOOK': 0,
 'I_RUN': 3,
 'I_TURN_LEFT': 4,
 'I_TURN_RIGHT': 1,
 'I_WALK': 7,
 'end_action': 2,
 'end_subprogram': 6}

In [110]:
actions_structured[50]

[[4, 4, 5, 2, 0, 0, 0, 0, 0],
 [4, 4, 5, 2, 0, 0, 0, 0, 0],
 [6, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0]]

In [111]:
print(*actions_ind[2])

1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


In [112]:
cmd_np[:2, :], act_np[:2, :]

(array([[ 3,  0,  0,  0,  0,  0,  0,  0,  0,  0],
        [ 0, 10,  0,  0,  0,  0,  0,  0,  0,  0]]),
 array([[[0, 2, 0, 0, 0, 0, 0, 0, 0],
         [6, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 0]],
 
        [[4, 2, 0, 0, 0, 0, 0, 0, 0],
         [6, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 0]]]))

In [32]:
commands_ind[:2], actions_ind[:2]

([[3, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 10, 0, 0, 0, 0, 0, 0, 0, 0]],
 [[0, 0, 0, 0, 0, 0, 0, 0, 0], [4, 0, 0, 0, 0, 0, 0, 0, 0]])

In [20]:
commands[:2]

[['look'], ['turn', 'left']]

In [16]:
actions[:2]

[['I_LOOK'], ['I_TURN_LEFT']]

In [17]:
structures[:2]

[[1], [1]]

In [None]:
command_map

In [None]:
action_map

In [None]:
act_prob

In [None]:
subprogram_output

In [None]:
subprogram_output

In [None]:
subprogram_last_layer[:,cmd_lengths,:]

In [None]:
encoding_last_layer

In [None]:
tf.gather(
    encoding_last_layer,
    [1,2],
    axis=1
)

In [None]:
tf.gather_nd(
    encoding_last_layer,
    np.array([[0,1,2,3,4], [1,4,3,2,5]]).T,
    name=None
)

In [None]:
cmd_lengths

In [None]:
def generate_command(sub_cmd, num_repeat):
    return sub_cmd * num_repeat

In [None]:
def process_command(cmd):
    

In [None]:
uni_commands

In [None]:
uni_actions

In [None]:
uni_tokens

In [None]:
df.shape