# Data provider unit testing

The aim of this notebook is to create a mock dataset with random numbers and random targets and test the data provider to produce a queue of tensors.

In [1]:
from pydst.rate_limiters import RateLimited 
from pydst.dataproviders import DataProvider
import tensorflow as tf
import numpy as np

### Generating mock dataset

In [None]:
# Generate data
tmp = np.arange(0, 512)
data = np.column_stack((tmp,tmp,tmp,tmp))

# Generate targets
targets=[]
for num in [int(i) for i in tmp]:
    tmp_tgt = [int(i) for i in list(np.binary_repr(num))]
    if len(tmp_tgt) != 9:
        remain = 9 - len(tmp_tgt)
        zeros = [0]*remain
        zeros.extend(tmp_tgt)
        tmp_tgt = zeros
    targets.append(tmp_tgt)
targets = np.asarray(targets)
tids = np.asarray(tmp)

# Print sizes
print('Data shape: ' + str(data.shape))
print('Targets shape: ' + str(targets.shape))
print('TIDs shape: ' + str(tids.shape))

In [None]:
# Save data
for idx, name in enumerate(tids):
    filename = '../mockdataset/data1/train/' + str(name) + '.npy'
    row = data[idx, :]
    np.save(filename, row)
    
# Save metadata
filename = '../mockdataset/data1/train_metadata.npy'
metadata = {'targets': targets, 'tids': tids}
np.save(filename, metadata)

### Testing the data provider

In [3]:
graph = tf.Graph()
with tf.device('/cpu:0'):
    trainData = DataProvider(graph=graph, which_set='train', batch_size=10, num_samples=131072, target_size=6, shape='flat')
    data_batch, targets_batch = trainData.get_data()

# >> DEFINE MODEL HERE    
with graph.as_default():    
    sess = tf.Session(config=tf.ConfigProto(intra_op_parallelism_threads=8))
    sess.run(tf.global_variables_initializer())
    trainData.enable(sess)
    
@RateLimited(1)
def eval_tensor(sess):
    [b,t] = sess.run([data_batch, targets_batch])
    print(b)
    print(b.shape)
    
for num in range(1):
    eval_tensor(sess)
trainData.disable(sess)

[[ -4107.  -3620.  -2696. ...,   5319.   6507.   3750.]
 [  3986.   3401.   3281. ...,  -1098.   -690.   -222.]
 [  -256.   -548.   -695. ...,  -1807.  -1828.  -1844.]
 ..., 
 [ -1230.  -4719.  -8908. ...,  13152.   8053.   5774.]
 [  9888.   6360.   1782. ...,   -938.  -5199.  -4040.]
 [  8650.   9211.   9963. ...,  14559.  14024.  11821.]]
(10, 131072)


In [5]:
graph = tf.Graph()
with tf.device('/cpu:0'):
    trainData = DataProvider(graph=graph, which_set='train', batch_size=10, target_size=6, num_samples=1000, max_samples=2911, 
                             data_depth=40, root='../magnatagatune/dataset/fbank/', shape='flat')
    data_batch, targets_batch = trainData.get_data()
    
# >> DEFINE MODEL HERE    
with graph.as_default():    
    sess = tf.Session(config=tf.ConfigProto(intra_op_parallelism_threads=8))
    sess.run(tf.global_variables_initializer())
    trainData.enable(sess)
    
@RateLimited(1)
def eval_tensor(sess):
    [b,t] = sess.run([data_batch, targets_batch])
    print(b)
    print(b.shape)
    
for num in range(1):
    eval_tensor(sess)
trainData.disable(sess)

[[ 15.13716507  15.875       16.25159645 ...,   9.96492481   9.46856022
    8.67287636]
 [ 12.05631447  11.72623444  13.64362526 ...,  11.54877281  10.54208469
    8.37839317]
 [ 13.20380116  16.49135017  16.92581558 ...,  14.54773521  14.48663712
   12.5423336 ]
 ..., 
 [ 12.72415257  15.12538528  13.90313244 ...,  16.61892319  15.52478886
   13.71417141]
 [  9.75028896  10.79287624  10.20362663 ...,  15.46816158  15.126091
   13.31380367]
 [ 11.30250168  12.0936451   11.68301201 ...,  12.02502823  11.75377941
   10.69101715]]
(10, 40000)
