In [3]:
import tensorflow as tf
from scipy.io import loadmat
import scipy.sparse as sparse
import numpy as np
from datetime import datetime


# Encoding in .tfRecords from .Mat file

In [None]:
train_addrs = ["MatlabData/trial_"+str(i+1)+".mat" for i in range(7361) ]
train_addrs[0:5]

size_batch_in_memory = 1000
num_batches = len(train_addrs)//size_batch +((len(train_addrs)%size_batch)>0)

#Addresses to save the TFRecords file
train_filename = ["train_"+str(i+1)+".tfrecords" for i in range(num_batches)]
train_filename



In [None]:
def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
def _float_feature(value):
    return tf.train.Feature(float_list=tf.train.FloatList(value=value))

def from_sparse_matrix_to_feature(features, mapping_number,name="default"):
    sparse_array = features[mapping_number]
    index_x, index_y, values = sparse.find(sparse_array)
    shape = sparse_array.shape
    
    feature = {    name+'_x': _int64_feature(index_x), 
                   name+'_y': _int64_feature(index_y),
                   name+'_values': _float_feature(values),
                   name+'_shape':  _int64_feature(shape)
              }
    return feature 

def from_int_matrix_to_feature(features, mapping_number, name="default"):
    return {name: _int64_feature(np.array(features[mapping_number].flatten()))}

def from_float_matrix_to_feature(features, mapping_number, name="default"):
    return {name: _float_feature(np.array(features[mapping_number].flatten()))}

def from_string_matrix_to_feature(features, mapping_number, name="default"):
    return {name: _bytes_feature(np.array(features[mapping_number].flatten(), dtype='str'))}

In [None]:
np.save('mapping_Mat_Python.npy', map_name_to_type_and_position) 


In [None]:
map_name_to_type_and_position = np.load('mapping_Mat_Python.npy').item()

In [None]:
#Iterate on the number of files
for j in range(num_batches):
    # open the TFRecords file
    writer = tf.python_io.TFRecordWriter(train_filename[j]) 
    
    for i in range(size_batch_in_memory):
        
        #Case where no file left
        if j*size_batch_in_memory+i>=len(train_addrs):
            writer.close()
            break
            
        # print how many examples are saved every 100 images
        if not i % 100:
            print 'Train data: {}/{}'.format(j*size_batch+i, len(train_addrs))
            sys.stdout.flush()
        
        # Load the the .mat file 
        mat = loadmat(train_addrs[j*size_batch+i])
        features = list(mat['structarr'][0,0])
        
        # Convert the features 
        feature = {}
        for k in map_name_to_type_and_position:
            map_number, map_type = map_name_to_type_and_position[k]
            if map_type == int:
                feature.update(from_int_matrix_to_feature(features, map_number, name=k))
            if map_type == float:
                feature.update(from_float_matrix_to_feature(features, map_number, name=k))
            if map_type == 'str':
                feature.update(from_string_matrix_to_feature(features, map_number, name=k))
            if map_type == 'sparse':
                feature.update(from_sparse_matrix_to_feature(features, map_number, name=k))
                

        # Create an example protocol buffer
        example = tf.train.Example(features=tf.train.Features(feature=feature))

        # Serialize to string and write on the file
        writer.write(example.SerializeToString())

    writer.close()
sys.stdout.flush()

# Decoding for training

In [239]:
tf.reset_default_graph()

In [240]:
max_len_sequence = 2568

n_steps = 200
n_steps_to_predict = 10
n_features_spikeRaster = 96
n_dims_output = 3

n_inputs = n_features_spikeRaster + n_dims_output



In [241]:
def _parse_function(example_proto):
    
    features = {
                "handPos" : tf.VarLenFeature(tf.float32),
                'spikeRaster': tf.SparseFeature(index_key=['spikeRaster_x', 'spikeRaster_y'],
                                                  value_key='spikeRaster_values',
                                                  dtype=tf.float32, size=[n_features_spikeRaster, max_len_sequence]),
                "spikeRaster_shape": tf.FixedLenFeature([2],tf.int64)               
               }
    
    parsed_features = tf.parse_single_example(example_proto, features)
    
    # Preprocess spikeRaster => [Time Series n_steps x n_features_spikeRaster]
    parsed_features["spikeRaster"] = tf.sparse_slice(parsed_features["spikeRaster"],
                                                     [0,0],parsed_features["spikeRaster_shape"])
    parsed_features["spikeRaster"] = tf.sparse_tensor_to_dense(parsed_features["spikeRaster"])
    spikeRaster = tf.reshape(tf.transpose(parsed_features["spikeRaster"]), [-1,n_features_spikeRaster])
    
    # Preprocess lengths of sequences = []
    seq_length = tf.cast(parsed_features["spikeRaster_shape"][1], tf.int32)
        
    # Preprocess handPos = [n_steps x 3] => HELPER
    handPos = tf.sparse_tensor_to_dense(parsed_features["handPos"])
    handPos = tf.transpose(tf.reshape(handPos, [n_dims_output,-1]))
    
    # Useful features
    features = tf.concat([spikeRaster,handPos], axis=1)    
    return features

def get_slices(x):
    num_slices = tf.shape(x, out_type=tf.int64)[0] - n_steps - n_steps_to_predict + 1
    return tf.data.Dataset.range(num_slices).map(lambda i: (x[i:i + n_steps] , 
                                                            x[i+n_steps:i+n_steps+n_steps_to_predict,-3:]))

In [249]:
num_epochs = 2
size_batch = 32
filenames = ["Data/train_"+str(i+1)+".tfrecords" for i in range(1)]

dataset = tf.data.TFRecordDataset(filenames)
dataset = dataset.map(_parse_function)
dataset = dataset.flat_map(get_slices)

dataset = dataset.shuffle(buffer_size=10000)
dataset = dataset.batch(size_batch)
dataset = dataset.repeat(num_epochs)

iterator = dataset.make_initializable_iterator()

inputs, real_position = iterator.get_next()

inputs.set_shape([None,n_steps,n_inputs])
real_position.set_shape([None,n_steps_to_predict,3])




In [243]:
with tf.Session() as sess:
    sess.run(iterator.initializer)
    e1, e2 = sess.run([inputs, real_position])

In [244]:
inputs, real_position

(<tf.Tensor 'IteratorGetNext:0' shape=(?, 200, 99) dtype=float32>,
 <tf.Tensor 'IteratorGetNext:1' shape=(?, 10, 3) dtype=float32>)

In [246]:
is_training = tf.placeholder(tf.bool, [])

keep_prob = 0.5
num_units = 128

# Build RNN cell
encoder_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units)

# Run Dynamic RNN
# inputs: [batch_size, n_steps, n_inputs]
rnn_outputs, _ = tf.nn.dynamic_rnn(
    encoder_cell, inputs,
    time_major=False,
    dtype = tf.float32)

# Recover meaningful outputs // Predict 3D positions
rnn_outputs = rnn_outputs[:,- n_steps_to_predict:,:]
predicted_position = tf.layers.dense(rnn_outputs, 3)

loss = tf.losses.mean_squared_error(predictions=predicted_position, labels=real_position)
training_op = tf.train.AdamOptimizer().minimize(loss)

In [217]:
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)

mse_summary = tf.summary.scalar('MSE', loss)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

In [220]:
saver = tf.train.Saver()
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(num_epochs):
        sess.run(iterator.initializer)
        current_loss = sess.run(loss)
        n_iteration = 0
        print("Loss after {} epochs: {}".format(epoch,current_loss))
        
        while True:
            try:
                _, current_loss = sess.run([training_op,loss])
                n_iteration += 1
                
                if not(n_iteration%100):
                    print("Loss after {} iteration: {}".format(n_iteration,current_loss)) 
            except tf.errors.OutOfRangeError:
                saver.save(sess, "/ModelWeights/test_model.ckpt")

Loss after 0 epochs: 69063.28125
Loss after 100 iteration: 34369.8398438
Loss after 200 iteration: 17333.1113281
Loss after 300 iteration: 1036.01477051
Loss after 400 iteration: 560.381408691
Loss after 500 iteration: 48975.703125
Loss after 600 iteration: 340.898895264
Loss after 700 iteration: 16252.4794922
Loss after 800 iteration: 587.118469238
Loss after 900 iteration: 15884.0898438
Loss after 1000 iteration: 384.318023682
Loss after 1100 iteration: 79.0458145142
Loss after 1200 iteration: 184.307434082
Loss after 1300 iteration: 38.5350723267
Loss after 1400 iteration: 92.8044204712
Loss after 1500 iteration: 17.2639293671
Loss after 1600 iteration: 198.546676636
Loss after 1700 iteration: 155.789108276
Loss after 1800 iteration: 83.1469726562
Loss after 1900 iteration: 64.4686889648
Loss after 2000 iteration: 147.254638672
Loss after 2100 iteration: 94.3248214722
Loss after 2200 iteration: 53.9316978455
Loss after 2300 iteration: 36.1063995361
Loss after 2400 iteration: 62.4742

InvalidArgumentError: indices[666] = [22,2500] is out of bounds: need 0 <= index < [96,2500]
	 [[Node: SparseToDense = SparseToDense[T=DT_FLOAT, Tindices=DT_INT64, validate_indices=true](SparseSlice, SparseSlice:2, SparseSlice:1, SparseToDense/default_value)]]
	 [[Node: IteratorGetNext = IteratorGetNext[output_shapes=[[?,?,99], [?,?]], output_types=[DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](Iterator)]]

# Tests

In [None]:
mat = loadmat(train_addrs[0])
features = list(mat['structarr'][0,0])

In [None]:
spike_raster_base = features[28].todense()

In [None]:
spike_raster_base