# Data provider unit testing

The aim of this notebook is to create a mock dataset with random numbers and random targets and test the data provider to produce a queue of tensors.

In [2]:
from pydst.rate_limiters import RateLimited 
from pydst.dataproviders import DataProvider
import tensorflow as tf
import numpy as np

### Generating mock dataset

In [None]:
# Generate data
tmp = np.arange(0, 512)
data = np.column_stack((tmp,tmp,tmp,tmp))

# Generate targets
targets=[]
for num in [int(i) for i in tmp]:
    tmp_tgt = [int(i) for i in list(np.binary_repr(num))]
    if len(tmp_tgt) != 9:
        remain = 9 - len(tmp_tgt)
        zeros = [0]*remain
        zeros.extend(tmp_tgt)
        tmp_tgt = zeros
    targets.append(tmp_tgt)
targets = np.asarray(targets)
tids = np.asarray(tmp)

# Print sizes
print('Data shape: ' + str(data.shape))
print('Targets shape: ' + str(targets.shape))
print('TIDs shape: ' + str(tids.shape))

In [None]:
# Save data
for idx, name in enumerate(tids):
    filename = '../mockdataset/data1/train/' + str(name) + '.npy'
    row = data[idx, :]
    np.save(filename, row)
    
# Save metadata
filename = '../mockdataset/data1/train_metadata.npy'
metadata = {'targets': targets, 'tids': tids}
np.save(filename, metadata)

### Testing the data provider

In [3]:
graph = tf.Graph()
with tf.device('/cpu:0'):
    trainData = DataProvider(graph=graph, which_set='train', batch_size=10, down_sample=1, target_size=6)
    data_batch, targets_batch = trainData.get_data()

# >> DEFINE MODEL HERE    
with graph.as_default():    
    sess = tf.Session(config=tf.ConfigProto(intra_op_parallelism_threads=8))
    sess.run(tf.global_variables_initializer())
    trainData.enable(sess)

[<Thread(Thread-4, started daemon 139863345043200)>]

Exception in thread Thread-4:
Traceback (most recent call last):
  File "/usr/lib/python2.7/threading.py", line 801, in __bootstrap_inner
    self.run()
  File "/usr/lib/python2.7/threading.py", line 754, in run
    self.__target(*self.__args, **self.__kwargs)
  File "pydst/dataproviders.py", line 167, in load_q
    sess.run(self.enqop, feed_dict={self.q_din: cdata, self.q_tin: ctargets})
  File "/home/mark/.virtualenvs/dst/local/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 778, in run
    run_metadata_ptr)
  File "/home/mark/.virtualenvs/dst/local/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 933, in _run
    + e.args[0])
TypeError: Cannot interpret feed_dict key as Tensor: Tensor Tensor("Placeholder:0", shape=(10, 4), dtype=float32) is not an element of this graph.



In [5]:
@RateLimited(1)
def eval_tensor(sess):
    [b,t] = sess.run([data_batch, targets_batch])
    print(b)
    
for num in range(5):
    eval_tensor(sess)
trainData.disable(sess)

[[ 435.  435.  435.  435.]
 [ 271.  271.  271.  271.]
 [ 396.  396.  396.  396.]
 [  96.   96.   96.   96.]
 [ 280.  280.  280.  280.]
 [ 399.  399.  399.  399.]
 [  90.   90.   90.   90.]
 [ 135.  135.  135.  135.]
 [ 329.  329.  329.  329.]
 [ 132.  132.  132.  132.]]
[[ 463.  463.  463.  463.]
 [ 103.  103.  103.  103.]
 [  41.   41.   41.   41.]
 [  24.   24.   24.   24.]
 [  54.   54.   54.   54.]
 [ 338.  338.  338.  338.]
 [ 377.  377.  377.  377.]
 [ 233.  233.  233.  233.]
 [ 291.  291.  291.  291.]
 [ 223.  223.  223.  223.]]
[[   2.    2.    2.    2.]
 [  13.   13.   13.   13.]
 [ 393.  393.  393.  393.]
 [ 363.  363.  363.  363.]
 [ 507.  507.  507.  507.]
 [ 353.  353.  353.  353.]
 [ 472.  472.  472.  472.]
 [ 480.  480.  480.  480.]
 [ 265.  265.  265.  265.]
 [ 249.  249.  249.  249.]]
[[  87.   87.   87.   87.]
 [  89.   89.   89.   89.]
 [ 406.  406.  406.  406.]
 [  28.   28.   28.   28.]
 [ 342.  342.  342.  342.]
 [  70.   70.   70.   70.]
 [ 411.  411.  411.  411.