In [1]:
import numpy as np
import tensorflow as tf
from functools import reduce
import matplotlib.pyplot as plt
from tensorflow.python import debug as tf_debug
from numpy.random import RandomState
import random
import time
import threading 
from tensorflow.python.client import timeline
import os
import json
import sys

#model flags
tf.flags.DEFINE_boolean("debug", False, "weather run in a dubg mode")
tf.flags.DEFINE_boolean("norm", True, "weather to norm grads")
tf.flags.DEFINE_integer("seed", round(random.random()*100000), "the global simulation seed for np and tf")
tf.flags.DEFINE_string("name", "predef_sim_name" , "name of the simulation")

datatype = tf.float64
FLAGS = tf.flags.FLAGS


#set random seed
tf.set_random_seed(FLAGS.seed)

def variable_summaries(var):
  """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
  with tf.name_scope(var.name.replace(":","_")):
    mean = tf.reduce_mean(var)
    tf.summary.scalar('mean', mean)
    tf.summary.scalar('stddev', tf.sqrt(tf.reduce_mean(tf.square(var - mean))))
    tf.summary.scalar('max', tf.reduce_max(var))
    tf.summary.scalar('min', tf.reduce_min(var))
    tf.summary.histogram('histogram', var)

def write_no_tf_summary(writer, tag, val, step):
   summary=tf.Summary()
   summary.value.add(tag=tag, simple_value = val)
   writer.add_summary(summary, step)
    
def split_train_test (x, y , test_ratio):
    
    if y.shape != x.shape:
        raise Exception('Model expects x and y shapes to be the same')
    
    test_len  = int(x.shape[0]*test_ratio)
    train_len = x.shape[0] - test_len

    x_train = x[0:train_len][:]
    x_test  = x[-test_len:][:]
    y_train = y[0:train_len][:]
    y_test  = y[-test_len:][:]
    
    print(train_len)
    print(test_len)

    train_shape = (train_len, x.shape[1])
    test_shape = (test_len, x.shape[1])
    
    if y_train.shape != train_shape or x_train.shape != train_shape or x_test.shape != test_shape or y_test.shape != test_shape:
        raise Exception('One of the conversion test/train shapes gone wrong')
    
    return  x_train, x_test, y_train, y_test

#helpder func
def get_time_hhmmss(dif):
    m, s = divmod(dif, 60)
    h, m = divmod(m, 60)
    time_str = "%02d:%02d:%02d" % (h, m, s)
    return time_str


#sample gen functions
def np_add(vec):
    return reduce((lambda x, y: x + y),vec)

def np_mult(vec):
    return reduce((lambda x, y: x * y),vec)

def np_stall(vec):
    return vec

def samples_generator(fn, shape, rng, seed):
    '''
    Generate random samples for the model:
    @fn - function to be applied on the input features to get the ouput
    @shape - shape of the features matrix (num_samples, num_features)
    @rng - range of the input features to be generated within (a,b)
    Outputs a tuple of input and output features matrix
    '''
    prng = RandomState(seed)
    x = (rng[1] - rng[0]) * prng.random_sample(shape) + rng[0]
    y = np.apply_along_axis(fn, 1, x).reshape((shape[0],-1))
    z = np.zeros((shape[0],shape[1] - y.shape[1]))
    y = np.concatenate((y, z), axis=1)
    
    return x,y

#configuraion constants
total_num_epochs = 10000000
iters_per_epoch = 1
num_epochs = total_num_epochs // iters_per_epoch
state_size = 100
num_of_operations = 3
max_output_ops = 5
num_features = 3
num_samples = 1500
samples_value_rng = (-100, 100)
test_ratio = 0.33333333333
batch_size  = 100
param_init = 0.1
learning_rate = 0.005
epsilon=1e-6
grad_norm = 10e1
seed = 70948
train_fn = np_add
name = FLAGS.name
norm = FLAGS.norm

#dumpl globals
try:
    os.mkdir('./summaries/' + FLAGS.name)
except FileExistsError as err:
    print("Dir already exists")

stdout_org = sys.stdout
sys.stdout = open('./summaries/' + FLAGS.name  + '/globals.txt', 'w')
print(globals())
sys.stdout = stdout_org

#model operations
def tf_multiply(inpt):
    return tf.reshape( tf.reduce_prod(inpt, axis = 1, name = "tf_mult"), [batch_size, -1], name = "tf_mult_reshape")

def tf_add(inpt):
    return  tf.reshape( tf.reduce_sum(inpt, axis = 1, name = "tf_add"), [batch_size, -1], name = "tf_add_reshape")

def tf_stall(a):
    return a


#model constants
dummy_matrix = tf.zeros([batch_size, num_features], dtype=datatype, name="dummy_constant")

#model placeholders
batchX_placeholder = tf.placeholder(datatype, [batch_size, None], name="batchX")
batchY_placeholder = tf.placeholder(datatype, [batch_size, None], name="batchY")

init_state = tf.placeholder(datatype, [batch_size, state_size], name="init_state")

#model parameters
W = tf.Variable(tf.truncated_normal([state_size+num_features, state_size], -1*param_init, param_init, dtype=datatype), dtype=datatype, name="W")
b = tf.Variable(np.zeros((state_size)), dtype=datatype, name="b")
variable_summaries(W)
variable_summaries(b)

W2 = tf.Variable(tf.truncated_normal([state_size, num_of_operations], -1*param_init, param_init, dtype=datatype),dtype=datatype, name="W2")
b2 = tf.Variable(np.zeros((num_of_operations)), dtype=datatype, name="b2")
variable_summaries(W2)
variable_summaries(b2)

    #forward pass
def run_forward_pass(mode="train"):
    current_state = init_state

    output = batchX_placeholder

    outputs = []

    softmaxes = []
    
    #printtf = tf.Print(output, [output], message="Strated cycle")
    #output = tf.reshape( printtf, [batch_size, -1], name = "dummu_rehap")
    
    for timestep in range(max_output_ops):
        print("timestep " + str(timestep))
        current_input = output



        input_and_state_concatenated = tf.concat([current_input, current_state], 1, name="concat_input_state")  # Increasing number of columns
        next_state = tf.tanh(tf.add(tf.matmul(input_and_state_concatenated, W, name="input-state_mult_W"), b, name="add_bias"), name="tanh_next_state")  # Broadcasted addition
        #next_state = tf.nn.relu(tf.add(tf.matmul(input_and_state_concatenated, W, name="input-state_mult_W"), b, name="add_bias"), name="relu_next-state")  # Broadcasted addition
        current_state = next_state

        #calculate softmax and produce the mask of operations
        logits = tf.add(tf.matmul(next_state, W2, name="state_mul_W2"), b2, name="add_bias2") #Broadcasted addition
        softmax = tf.nn.softmax(logits, name="get_softmax")
        #argmax = tf.argmax(softmax, 1)
        '''
        print(logits)
        print(softmax)
        print(argmax)
        '''
        #perform ops
        add   = tf_add(current_input)
        mult  = tf_multiply(current_input)
        stall = tf_stall(current_input)
        #add = tf.reshape( tf.reduce_prod(current_input, axis = 1), [batch_size, -1])
        #mult = tf.reshape( tf.reduce_sum(current_input, axis = 1), [batch_size, -1])
        #stall = current_input
        #values = tf.concat([add, mult, stall], 1)
        #values = tf.concat([add, mult, stall], 1, name="concact_op_values")
        #values = tf.cast(values,dtype=datatype)
        #get softmaxes for operations
        #add_softmax = tf.slice(softmax, [0,0], [batch_size,1])
        #mult_softmax = tf.slice(softmax, [0,1], [batch_size,1])
        #stall_softmax = tf.slice(softmax, [0,2], [batch_size,1])
        #produce output matrix
        #onehot  = tf.one_hot(argmax_dum, num_of_operations)
        #stall_width = tf.shape(stall)[1]
        #stall_select = tf.slice(onehot, [0,2], [batch_size,1])
        #mask_arr = [onehot]
        #for i in range(num_features-1):
        #    mask_arr.append(stall_select)
        #mask = tf.concat(mask_arr, 1)
        #argmax = tf.reshape( softmax, [batch_size, -1])
        #mask = onehot
        #mask = tf.cast(mask, dtype=datatype)
        #mask = tf.cast(mask, tf.bool)
        #apply mask
        #output = tf.boolean_mask(values,mask)
        #in test change to hardmax
        if mode is "test":
            argmax  = tf.argmax(softmax, 1, )
            softmax  = tf.one_hot(argmax, num_of_operations, dtype=datatype)
        #in the train mask = saturated softmax for all ops. in test change it to onehot(hardmax)
        add_softmax   = tf.slice(softmax, [0,0], [batch_size,1], name="slice_add_softmax_val")
        mult_softmax  = tf.slice(softmax, [0,1], [batch_size,1], name="slice_mult_softmax_val")
        stall_softmax = tf.slice(softmax, [0,2], [batch_size,1], name="stall_mult_softmax_val")

        add_width   = tf.shape(add, name="add_op_shape")[1]
        mult_width  = tf.shape(mult, name="mult_op_shape")[1]
        stall_width = tf.shape(stall, name="stall_op_shape")[1]


        add_final   = tf.multiply(add, add_softmax, name="mult_add_softmax")
        mult_final  = tf.multiply(mult,mult_softmax, name="mult_mult_softmax")
        stall_final = tf.multiply(stall, stall_softmax, name="mult_stall_softmax")

        ##conact add and mult results with zeros matrix
        add_final = tf.concat([add_final, tf.slice(dummy_matrix, [0,0], [batch_size, num_features - add_width], name="slice_dum_add")], 1, name="concat_add_op_dummy_zeros") 
        mult_final = tf.concat([mult_final, tf.slice(dummy_matrix, [0,0], [batch_size, num_features - mult_width], name="slice_dum_mult")], 1, name="concat_mult_op_dummy_zeros") 


        output = tf.add(add_final, mult_final, name="add_final_op_mult_add")
        output =  tf.add(output, stall_final, name="add_final_op_stall")
        outputs.append(output)
        softmaxes.append(softmax)
    #printtf = tf.Print(output, [output], message="Finished cycle")
    #output = tf.reshape( printtf, [batch_size, -1], name = "dummu_rehap")
    return output, current_state, softmax, outputs, softmaxes

#cost function
def calc_loss(output):
    #reduced_output = tf.reshape( tf.reduce_sum(output, axis = 1, name="red_output"), [batch_size, -1], name="resh_red_output")
    math_error = tf.multiply(tf.constant(0.5, dtype=datatype), tf.square(tf.subtract(output , batchY_placeholder, name="sub_otput_batchY"), name="squar_error"), name="mult_with_0.5")
    
    total_loss = tf.reduce_sum(math_error, name="red_total_loss")
    return total_loss, math_error

output_train, current_state_train, softmax_train, outputs_train, softmaxes_train = run_forward_pass(mode = "train")
total_loss_train, math_error_train = calc_loss(output_train)

output_test, current_state_test, softmax_test, outputs_test, softmaxes_test = run_forward_pass(mode = "test")
total_loss_test, math_error_test = calc_loss(output_test)

grads_raw = tf.gradients(output_train, [W,b,W2,b2], name="comp_gradients")

#clip gradients by value and add summaries
if norm:
    print("norming the grads")
    grads, norms = tf.clip_by_global_norm(grads_raw, grad_norm)
    variable_summaries(norms)
else:
    grads = grads_raw

for grad in grads: variable_summaries(grad)


train_step = tf.train.AdamOptimizer(learning_rate, epsilon ,name="AdamOpt").apply_gradients(zip(grads, [W,b,W2,b2]), name="min_loss")
print("grads are")
print(grads)

#pre training setting
np.set_printoptions(precision=3, suppress=True)
#train_fn = np_mult
#train_fn = np_stall
x,y = samples_generator(train_fn, (num_samples, num_features) , samples_value_rng, seed)
x_train, x_test, y_train, y_test = split_train_test (x, y , test_ratio)
num_batches = x_train.shape[0]//batch_size
num_test_batches = x_test.shape[0]//batch_size
#model training

#create a saver to save the trained model
saver=tf.train.Saver(var_list=tf.trainable_variables())

#Enable jit
config = tf.ConfigProto()
config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1

with tf.Session(config=config) as sess:
    loss_list_train_soft = [0,0]
    loss_list_train_hard = [0,0]
    loss_list_test_soft = [0,0]
    loss_list_test_hard = [0,0]
    # Merge all the summaries and write them out 
    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter('./summaries/' + FLAGS.name ,sess.graph)
    ##enable debugger if necessary
    if (FLAGS.debug):
        print("Running in a debug mode")
        sess = tf_debug.LocalCLIDebugWrapperSession(sess)
        sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)

    sess.run(tf.global_variables_initializer())
    path = './summaries/100_state_1000_samples_100_batch_5_ops_np_add_no_clip_pred_work_seed/model/.'
    saver.restore(sess, tf.train.latest_checkpoint(path))
    
    _current_state_train = np.zeros((batch_size, state_size))
    _current_state_test = np.zeros((batch_size, state_size))
    for batch_idx in range(num_test_batches):
                start_idx = batch_size * batch_idx
                end_idx   = batch_size * batch_idx + batch_size

                batchX = x_test[start_idx:end_idx]
                batchY = y_test[start_idx:end_idx]
                
                _total_loss_train, _current_state_train = sess.run([total_loss_train, current_state_train],
                    feed_dict={
                        init_state:_current_state_train,
                        batchX_placeholder:batchX,
                        batchY_placeholder:batchY
                    })
                loss_list_test_soft.append(_total_loss_train)
                
                _total_loss_test, _current_state_test = sess.run([total_loss_test, current_state_test],
                    feed_dict={
                        init_state:_current_state_test,
                        batchX_placeholder:batchX,
                        batchY_placeholder:batchY
                    })
                loss_list_test_hard.append(_total_loss_test)
    print("Sotfmax test loss\t", reduce(lambda x, y: x+y, loss_list_test_soft))
    print("Hardmax test loss\t", reduce(lambda x, y: x+y, loss_list_test_hard))
    W2 =  sess.run([W2])
    print(W2)
    #saver.restore(sess, path)

Dir already exists
timestep 0
timestep 1
timestep 2
timestep 3
timestep 4
timestep 0
timestep 1
timestep 2
timestep 3
timestep 4
norming the grads
grads are
[<tf.Tensor 'clip_by_global_norm/clip_by_global_norm/_0:0' shape=(103, 100) dtype=float64>, <tf.Tensor 'clip_by_global_norm/clip_by_global_norm/_1:0' shape=(100,) dtype=float64>, <tf.Tensor 'clip_by_global_norm/clip_by_global_norm/_2:0' shape=(100, 3) dtype=float64>, <tf.Tensor 'clip_by_global_norm/clip_by_global_norm/_3:0' shape=(3,) dtype=float64>]
1001
499
INFO:tensorflow:Restoring parameters from ./summaries/100_state_1000_samples_100_batch_5_ops_np_add_no_clip_pred_work_seed/model/./-50
Sotfmax test loss	 7.56226656754e+16
Hardmax test loss	 1.37213534547e+12
[array([[-0.08 ,  0.024,  0.085],
       [-0.04 , -0.099, -0.146],
       [-0.004, -0.183, -0.171],
       [-0.274, -0.135, -0.066],
       [-0.283, -0.191,  0.016],
       [-0.12 , -0.132, -0.066],
       [-0.218, -0.244, -0.074],
       [-0.066, -0.135, -0.26 ],
       