In [38]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.python import debug as tf_debug
from tensorflow.python.framework import ops as tf_ops
import pprint
import random
import time
import os
import sys
import datetime
from data_gen import *
from params import get_cfg
from ops import Operations

def variable_summaries(var):
  """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
  with tf.name_scope(var.name.replace(":","_")):
    mean = tf.reduce_mean(var)
    tf.summary.scalar('mean', mean)
    tf.summary.scalar('stddev', tf.sqrt(tf.reduce_mean(tf.square(var - mean))))
    tf.summary.scalar('max', tf.reduce_max(var))
    tf.summary.scalar('min', tf.reduce_min(var))
    tf.summary.histogram('histogram', var)

def write_no_tf_summary(writer, tag, val, step):
   summary=tf.Summary()
   summary.value.add(tag=tag, simple_value = val)
   writer.add_summary(summary, step)
    

#helpder func
def get_time_hhmmss(dif):
    m, s = divmod(dif, 60)
    h, m = divmod(m, 60)
    time_str = "%02d:%02d:%02d" % (h, m, s)
    return time_str


cfg = get_cfg()    
ops = Operations(cfg)

tf.reset_default_graph()

#model constants
dummy_matrix = tf.zeros([cfg['batch_size'], cfg['num_features']], dtype=cfg['datatype'], name="dummy_constant")

#model placeholders
batchX_placeholder = tf.placeholder(cfg['datatype'], [cfg['batch_size'], None], name="batchX")
batchY_placeholder = tf.placeholder(cfg['datatype'], [cfg['batch_size'], None], name="batchY")

init_state = tf.placeholder(cfg['datatype'], [cfg['batch_size'], cfg['state_size']], name="init_state")


#set random seed
tf.set_random_seed(cfg['seed'])

#model parameters
W = tf.Variable(tf.truncated_normal([cfg['state_size']+cfg['num_features'], cfg['state_size']], -1*cfg['param_init'], cfg['param_init'], dtype=cfg['datatype']), dtype=cfg['datatype'], name="W")
b = tf.Variable(np.zeros((cfg['state_size'])), dtype=cfg['datatype'], name="b")
variable_summaries(W)
variable_summaries(b)

W2 = tf.Variable(tf.truncated_normal([cfg['state_size'], ops.num_of_ops], -1*cfg['param_init'], cfg['param_init'], dtype=cfg['datatype']),dtype=cfg['datatype'], name="W2")
b2 = tf.Variable(np.zeros((ops.num_of_ops)), dtype=cfg['datatype'], name="b2")
variable_summaries(W2)
variable_summaries(b2)

    #forward pass
def run_forward_pass(mode="train"):
    current_state = init_state

    output = batchX_placeholder

    outputs = []

    softmaxes = []
    
    #printtf = tf.Print(output, [output], message="Strated cycle")
    #output = tf.reshape( printtf, [batch_size, -1], name = "dummu_rehap")
    
    for timestep in range(cfg['max_output_ops']):
        print("timestep " + str(timestep))
        current_input = output



        input_and_state_concatenated = tf.concat([current_input, current_state], 1, name="concat_input_state")  # Increasing number of columns
        next_state = tf.tanh(tf.add(tf.matmul(input_and_state_concatenated, W, name="input-state_mult_W"), b, name="add_bias"), name="tanh_next_state")  # Broadcasted addition
        #next_state = tf.nn.relu(tf.add(tf.matmul(input_and_state_concatenated, W, name="input-state_mult_W"), b, name="add_bias"), name="relu_next-state")  # Broadcasted addition
        current_state = next_state

        #calculate softmax and produce the mask of operations
        logits = tf.add(tf.matmul(next_state, W2, name="state_mul_W2"), b2, name="add_bias2") #Broadcasted addition
        softmax = tf.nn.softmax(logits, name="get_softmax")
        
        #in test change to hardmax
        if mode is "test":
            argmax  = tf.argmax(softmax, 1, )
            softmax  = tf.one_hot(argmax, ops.num_of_ops, dtype=cfg['datatype'])
        #in the train mask = saturated softmax for all ops. in test change it to onehot(hardmax)
        
        #######################
        #perform op selection #
        #######################
        
        #perform all ops in the current timestep intput and save output results together with the op name

        op_res = []
        for op in ops.ops:
            name = op.__name__
            op_outp = op(current_input)
            op_res.append((name, op_outp))
        
        #slice softmax results for each operation
        ops_softmax = []
        for i, op in enumerate(ops.ops):
            name = "slice_"+op.__name__+"_softmax_val"
            softmax_slice = tf.slice(softmax, [0,i], [cfg['batch_size'],1], name=name)
            ops_softmax.append(softmax_slice)

         
        #apply softmax on each operation so that operation selection is performed
        ops_final = []
        for i,res in enumerate(op_res):
            name = "mult_"+res[0]+"_softmax"
            op_selection =  tf.multiply(res[1], ops_softmax[i], name=name)
            ops_final.append(op_selection)
       
        #add results from all operation with applied softmax together
        output = tf.add_n(ops_final)
        
        #save the sequance of softmaxes and outputs
        outputs.append(output)
        softmaxes.append(softmax)
    #printtf = tf.Print(output, [output], message="Finished cycle")
    #output = tf.reshape( printtf, [batch_size, -1], name = "dummu_rehap")
    return output, current_state, softmax, outputs, softmaxes

#cost function
def calc_loss(output):
    #reduced_output = tf.reshape( tf.reduce_sum(output, axis = 1, name="red_output"), [batch_size, -1], name="resh_red_output")
    math_error = tf.multiply(tf.constant(0.5, dtype=cfg['datatype']), tf.square(tf.subtract(output , batchY_placeholder, name="sub_otput_batchY"), name="squar_error"), name="mult_with_0.5")
    
    total_loss = tf.reduce_sum(math_error, name="red_total_loss")
    return total_loss, math_error

output_train, current_state_train, softmax_train, outputs_train, softmaxes_train = run_forward_pass(mode = "train")
total_loss_train, math_error_train = calc_loss(output_train)

output_test, current_state_test, softmax_test, outputs_test, softmaxes_test = run_forward_pass(mode = "test")
total_loss_test, math_error_test = calc_loss(output_test)

grads_raw = tf.gradients(total_loss_train, [W,b,W2,b2], name="comp_gradients")

#clip gradients by value and add summaries
if cfg['norm']:
    print("norming the grads")
    grads, norms = tf.clip_by_global_norm(grads_raw, cfg['grad_norm'])
    variable_summaries(norms)
else:
    grads = grads_raw

for grad in grads: variable_summaries(grad)


train_step = tf.train.AdamOptimizer(cfg['learning_rate'], cfg['epsilon'] ,name="AdamOpt").apply_gradients(zip(grads, [W,b,W2,b2]), name="min_loss")
print("grads are")
print(grads)

#pre training setting
np.set_printoptions(precision=3, suppress=True)
#train_fn = np_mult
#train_fn = np_stall
x,y = samples_generator(cfg['train_fn'], (cfg['num_samples'], cfg['num_features']) , cfg['samples_value_rng'], cfg['seed'])
x_train, x_test, y_train, y_test = split_train_test (x, y , cfg['test_ratio'])
num_batches = x_train.shape[0]//cfg['batch_size']
num_test_batches = x_test.shape[0]//cfg['batch_size']
print("num batches train:", num_batches)
print("num batches test:", num_test_batches)
#model training

#create a saver to save the trained model
saver=tf.train.Saver(var_list=tf.trainable_variables())

#Enable jit
config = tf.ConfigProto()
config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
#define congergance check list
last_train_losses = []

with tf.Session(config=config) as sess:
    # Merge all the summaries and write them out 
    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter('./summaries/' + cfg['dst'] ,sess.graph)
    ##enable debugger if necessary
    if (cfg['debug']):
        print("Running in a debug mode")
        sess = tf_debug.LocalCLIDebugWrapperSession(sess)
        sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)

    #init the var
    sess.run(tf.global_variables_initializer())
    path = './summaries/np_add-5ops/test7/model/'
    saver.restore(sess, tf.train.latest_checkpoint(path))
    #plt.ion()
    #plt.figure()
    #plt.show() 
    #Init vars:
    #_W = sess.run([W])
    #_W2 = sess.run([W2])
    #print(W.eval())
    #print(W2.eval())
    globalstartTime = time.time()
        
    #get soft and hardmaxes out of the model for the last batches
    _current_state_train = np.zeros((cfg['batch_size'], cfg['state_size']))
    _current_state_test = np.zeros((cfg['batch_size'], cfg['state_size']))

        #backprop and test training set for softmax and hardmax loss
    for batch_idx in range(num_batches):
            start_idx = cfg['batch_size'] * batch_idx
            end_idx   = cfg['batch_size'] * batch_idx + cfg['batch_size']

            batchX = x_train[start_idx:end_idx]
            batchY = y_train[start_idx:end_idx]

            #for testing cylce, do one forward and back prop with 1 batch with training data, plus produce summary and hardmax result
            _softmaxes_train, _softmax_train = sess.run([softmaxes_train, softmax_train],
            feed_dict={
                init_state:_current_state_train,
                batchX_placeholder:batchX,
                batchY_placeholder:batchY
            })
            
            _softmaxes_test, _softmax_test = sess.run([softmaxes_test, softmax_test],
                feed_dict={
                    init_state:_current_state_test,
                    batchX_placeholder:batchX,
                    batchY_placeholder:batchY
                }) 


timestep 0
timestep 1
timestep 2
timestep 3
timestep 4
timestep 0
timestep 1
timestep 2
timestep 3
timestep 4
norming the grads
grads are
[<tf.Tensor 'clip_by_global_norm/clip_by_global_norm/_0:0' shape=(53, 50) dtype=float64>, <tf.Tensor 'clip_by_global_norm/clip_by_global_norm/_1:0' shape=(50,) dtype=float64>, <tf.Tensor 'clip_by_global_norm/clip_by_global_norm/_2:0' shape=(50, 3) dtype=float64>, <tf.Tensor 'clip_by_global_norm/clip_by_global_norm/_3:0' shape=(3,) dtype=float64>]
num batches train: 10
num batches test: 4
INFO:tensorflow:Restoring parameters from ./summaries/np_add-5ops/test7/model/-1950


In [40]:
for softmaxe in _softmaxes_train:
    print("#####################################################################")
    print(softmaxe)

#####################################################################
[[ 1.     0.     0.   ]
 [ 1.     0.     0.   ]
 [ 1.     0.     0.   ]
 [ 1.     0.     0.   ]
 [ 1.     0.     0.   ]
 [ 0.991  0.     0.009]
 [ 1.     0.     0.   ]
 [ 1.     0.     0.   ]
 [ 1.     0.     0.   ]
 [ 1.     0.     0.   ]
 [ 1.     0.     0.   ]
 [ 1.     0.     0.   ]
 [ 1.     0.     0.   ]
 [ 1.     0.     0.   ]
 [ 0.985  0.     0.015]
 [ 1.     0.     0.   ]
 [ 1.     0.     0.   ]
 [ 0.964  0.     0.036]
 [ 0.997  0.     0.003]
 [ 1.     0.     0.   ]
 [ 0.943  0.     0.057]
 [ 1.     0.     0.   ]
 [ 0.996  0.     0.004]
 [ 1.     0.     0.   ]
 [ 1.     0.     0.   ]
 [ 1.     0.     0.   ]
 [ 1.     0.     0.   ]
 [ 0.996  0.     0.004]
 [ 0.999  0.     0.001]
 [ 1.     0.     0.   ]
 [ 1.     0.     0.   ]
 [ 1.     0.     0.   ]
 [ 1.     0.     0.   ]
 [ 1.     0.     0.   ]
 [ 1.     0.     0.   ]
 [ 0.999  0.     0.001]
 [ 1.     0.     0.   ]
 [ 1.     0.     0.   ]
 [ 1.     0.     0

In [41]:
for softma in _softmaxes_test:
    print("#####################################################################")
    print(softma)

#####################################################################
[[ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]


In [27]:
_softmax_train.shape

(100, 3)

In [28]:
W2.shape

TensorShape([Dimension(50), Dimension(3)])

In [29]:
W.shape

TensorShape([Dimension(53), Dimension(50)])

In [30]:
init_state.shape

TensorShape([Dimension(100), Dimension(50)])

In [31]:
b.shape

TensorShape([Dimension(50)])

In [32]:
b2.shape

TensorShape([Dimension(3)])