In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [5]:
import math
import json
import numpy as np
import tensorflow as tf
import random
from sklearn.neighbors import KNeighborsClassifier
import timeit
import pickle

In [6]:
def load_data(path):
    with open(path,'rb') as f:
        data = pickle.load(f)
    return data

# Load data

In [30]:
# change to your own path
path = "/home/neuralnet/NTU_60/"
#train data
# train_data = load_data(path+'cross_subject_data/trans_train_data.pkl')
# test_data = load_data(path+'cross_subject_data/trans_test_data.pkl')
train_data = load_data(path+'cross_view_data/trans_train_data.pkl')
test_data = load_data(path+'cross_view_data/trans_test_data.pkl')
print("Size of training data: ", len(train_data))
print("Size of test data: ", len(test_data))

Size of training data:  37646
Size of test data:  18932


# normalize data

In [31]:
def normalize_video(video):
    max_75 = np.amax(video, axis=0)
    min_75 = np.amin(video, axis=0)
    max_x = np.max([max_75[i] for i in range(0,75,3)])
    max_y = np.max([max_75[i] for i in range(1,75,3)])
    max_z = np.max([max_75[i] for i in range(2,75,3)])
    min_x = np.min([min_75[i] for i in range(0,75,3)])
    min_y = np.min([min_75[i] for i in range(1,75,3)])
    min_z = np.min([min_75[i] for i in range(2,75,3)])
    norm = np.zeros_like(video)
    for i in range(0,75,3):
        norm[:,i] = 2*(video[:,i]-min_x)/(max_x-min_x)-1
        norm[:,i+1] = 2*(video[:,i+1]-min_y)/(max_y-min_y)-1
        norm[:,i+2] = 2*(video[:,i+2]-min_z)/(max_z-min_z)-1
    return norm
for i in range(len(train_data)):
    train_data[i]['input'] = normalize_video(np.array(train_data[i]['input']))
for i in range(len(test_data)):
    test_data[i]['input'] = normalize_video(np.array(test_data[i]['input']))

# downsample

In [32]:
import math
dsamp_train = []
for i in range(len(train_data)):
    
    val = np.asarray(train_data[i]['input'])
    if val.shape[0] > 50:
        new_val = np.zeros((50, 75))
        diff = math.floor(val.shape[0]/50)
        idx = 0
        for i in range(0, val.shape[0], diff):
            new_val[idx, :] = val[i, :]
            idx += 1
            if idx >= 50:
                break
        dsamp_train.append(new_val)
    else:
        dsamp_train.append(val)

In [33]:
dsamp_test = []
for i in range(len(test_data)):
    val = np.asarray(test_data[i]['input'])
    if val.shape[0] > 50:
        new_val = np.zeros((50, 75))
        diff = math.floor(val.shape[0]/50)
        idx = 0
        for i in range(0, val.shape[0], diff):
            new_val[idx, :] = val[i, :]
            idx += 1
            if idx >= 50:
                break
        dsamp_test.append(new_val)
    else:
        dsamp_test.append(val)

# Model

In [12]:
from tensorflow.contrib.rnn.python.ops.core_rnn_cell import RNNCell
class LinearSpaceDecoderWrapper(RNNCell):
  """Operator adding a linear encoder to an RNN cell"""

  def __init__(self, cell, output_size):
    """Create a cell with with a linear encoder in space.

    Args:
      cell: an RNNCell. The input is passed through a linear layer.

    Raises:
      TypeError: if cell is not an RNNCell.
    """
    if not isinstance(cell, RNNCell):
      raise TypeError("The parameter cell is not a RNNCell.")

    self._cell = cell

    print( 'output_size = {0}'.format(output_size) )
    print( ' state_size = {0}'.format(self._cell.state_size) )

    # Tuple if multi-rnn
    if isinstance(self._cell.state_size,tuple):

      # Fine if GRU...
      insize = self._cell.state_size[-1]

      # LSTMStateTuple if LSTM
      if isinstance( insize, LSTMStateTuple ):
        insize = insize.h

    else:
      # Fine if not multi-rnn
      insize = self._cell.state_size

    self.w_out = tf.get_variable("proj_w_out",
        [insize, output_size],
        dtype=tf.float32,
        initializer=tf.random_uniform_initializer(minval=-0.04, maxval=0.04))
    self.b_out = tf.get_variable("proj_b_out", [output_size],
        dtype=tf.float32,
        initializer=tf.random_uniform_initializer(minval=-0.04, maxval=0.04))

    self.linear_output_size = output_size


  @property
  def state_size(self):
    return self._cell.state_size

  @property
  def output_size(self):
    return self.linear_output_size

  def __call__(self, inputs, state, scope=None):
    """Use a linear layer and pass the output to the cell."""

    # Run the rnn as usual
    output, new_state = self._cell(inputs, state, scope)

    # Apply the multiplication to everything
    output = tf.matmul(output, self.w_out) + self.b_out

    return output, new_state

In [13]:
class ResidualWrapper(RNNCell):
  """Operator adding residual connections to a given cell."""

  def __init__(self, cell):
    """Create a cell with added residual connection.

    Args:
      cell: an RNNCell. The input is added to the output.

    Raises:
      TypeError: if cell is not an RNNCell.
    """
    if not isinstance(cell, RNNCell):
      raise TypeError("The parameter cell is not a RNNCell.")

    self._cell = cell

  @property
  def state_size(self):
    return self._cell.state_size

  @property
  def output_size(self):
    return self._cell.output_size

  def __call__(self, inputs, state, scope=None):
    """Run the cell and add a residual connection."""

    # Run the rnn as usual
    output, new_state = self._cell(inputs, state, scope)

    # Add the residual connection
    output = tf.add(output, inputs)

    return output, new_state

In [14]:
from tensorflow.python.ops.rnn import _transpose_batch_time
class Seq2SeqModelFS(object):
    def __init__(self, max_seq_len, input_size, rnn_size, batch_size, lr, train_keep_prob,decay_rate=0.95,dtype=tf.float32):
        self.max_seq_len = max_seq_len
        self.rnn_size = rnn_size
        self.batch_size = tf.placeholder_with_default(batch_size,shape=())
        self.input_size = input_size
        self.lr = tf.Variable( float(lr), trainable=False, dtype=dtype )
        self.learning_rate_decay_op = self.lr.assign( self.lr * decay_rate)
        self.keep_prob = tf.placeholder_with_default(1.0,shape=())
        self.global_step = tf.Variable(0, trainable=False)
        print('rnn_size = {0}'.format(rnn_size))
        
        with tf.variable_scope("inputs"):
            self.enc_xyz = tf.placeholder(dtype, shape=[None, self.max_seq_len, input_size], name='enc_xyz')
            self.dec_xyz = tf.placeholder(dtype, shape=[None, self.max_seq_len, input_size], name='dec_xyz')
            self.seq_len = tf.placeholder(tf.int32,[None])
            mask = tf.sign(tf.reduce_max(tf.abs(self.enc_xyz[:,1:,:]), 2))

        with tf.variable_scope("prediction"):
            with tf.variable_scope("encoder"):
                with tf.variable_scope("encoder_xyz",reuse=tf.AUTO_REUSE):
                    cell_fw_xyz = [tf.nn.rnn_cell.GRUCell(self.rnn_size//2) for i in range(3)]
                    cell_bw_xyz = [tf.nn.rnn_cell.GRUCell(self.rnn_size//2) for i in range(3)]
                    tuple_xyz = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(cell_fw_xyz,cell_bw_xyz,self.enc_xyz,dtype=tf.float32,sequence_length=self.seq_len)
                    bi_xyz_h = tf.concat((tuple_xyz[1][-1],tuple_xyz[2][-1]),-1)
                    self.bi_xyz_h = bi_xyz_h
            self.knn_state = self.bi_xyz_h
            with tf.variable_scope("decoder"):
                with tf.variable_scope("decoder_xyz",reuse=tf.AUTO_REUSE):
                    cell_xyz__ = tf.nn.rnn_cell.GRUCell(self.rnn_size)
                    cell_xyz_ = LinearSpaceDecoderWrapper(cell_xyz__,self.input_size)
                    cell_xyz = ResidualWrapper(cell_xyz_)
                    def loop_fn(time, cell_output, cell_state, loop_state):
                        """
                        Loop function that allows to control input to the rnn cell and manipulate cell outputs.
                        :param time: current time step
                        :param cell_output: output from previous time step or None if time == 0
                        :param cell_state: cell state from previous time step
                        :param loop_state: custom loop state to share information between different iterations of this loop fn
                        :return: tuple consisting of
                          elements_finished: tensor of size [bach_size] which is True for sequences that have reached their end,
                            needed because of variable sequence size
                          next_input: input to next time step
                          next_cell_state: cell state forwarded to next time step
                          emit_output: The first return argument of raw_rnn. This is not necessarily the output of the RNN cell,
                            but could e.g. be the output of a dense layer attached to the rnn layer.
                          next_loop_state: loop state forwarded to the next time step
                        """
                        if cell_output is None:
                            # time == 0, used for initialization before first call to cell
                            next_cell_state = self.bi_xyz_h
                            # the emit_output in this case tells TF how future emits look
                            emit_output = tf.zeros([self.input_size])
                        else:
                            # t > 0, called right after call to cell, i.e. cell_output is the output from time t-1.
                            # here you can do whatever ou want with cell_output before assigning it to emit_output.
                            # In this case, we don't do anything
                            next_cell_state = self.bi_xyz_h#NOTE:IF NO-FS, use cell_state#
                            emit_output = cell_output  

                        # check which elements are finished
                        elements_finished = (time >= self.seq_len-1)
                        finished = tf.reduce_all(elements_finished)

                        # assemble cell input for upcoming time step
                        current_output = emit_output if cell_output is not None else None
                        #input_original = inputs_ta.read(time)  # tensor of shape (None, input_dim)
                        input_original = self.enc_xyz[:,0,:]
                        if current_output is None:
                            # this is the initial step, i.e. there is no output from a previous time step, what we feed here
                            # can highly depend on the data. In this case we just assign the actual input in the first time step.
                            next_in = input_original
                        else:
                            # time > 0, so just use previous output as next input
                            # here you could do fancier things, whatever you want to do before passing the data into the rnn cell
                            # if here you were to pass input_original than you would get the normal behaviour of dynamic_rnn
                            next_in = current_output

                        next_input = tf.cond(finished,
                                             lambda: tf.zeros([self.batch_size, self.input_size], dtype=tf.float32),  # copy through zeros
                                             lambda: next_in)  # if not finished, feed the previous output as next input

                        # set shape manually, otherwise it is not defined for the last dimensions
                        next_input.set_shape([None, self.input_size])

                        # loop state not used in this example
                        next_loop_state = None
                        return (elements_finished, next_input, next_cell_state, emit_output, next_loop_state)
                    outputs_ta, def_final_state_xyz, _ = tf.nn.raw_rnn(cell_xyz, loop_fn)
                    self.dec_outputs_xyz = _transpose_batch_time(outputs_ta.stack())
            def loss_with_mask(pred,gt,mask):
                loss = tf.reduce_sum(tf.abs(pred-gt),2)*mask
                loss = tf.reduce_sum(loss,1)
                loss /= tf.reduce_sum(mask,1)
                loss = tf.reduce_mean(loss)
                return loss
        with tf.variable_scope("pred_xyz",reuse=tf.AUTO_REUSE):
            pred_xyz2xyz = self.dec_outputs_xyz
            self.loss = loss_with_mask(pred_xyz2xyz,self.enc_xyz[:,1:,:],mask)
        
        opt = tf.train.AdamOptimizer(self.lr)
        gradients, self.pred_vars = zip(*opt.compute_gradients(self.loss))
        clipped_gradients, norm = tf.clip_by_global_norm(gradients, 25)
        self.updates = opt.apply_gradients(zip(clipped_gradients,self.pred_vars),global_step = self.global_step)
        self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)
    
    def step(self, session, encoder_inputs_xyz,decoder_inputs_xyz,seq_len,forward_only):
        if not forward_only:
            input_feed = {self.enc_xyz: encoder_inputs_xyz,
                          self.dec_xyz: decoder_inputs_xyz,
                          self.seq_len: seq_len}
            output_feed = [self.updates,self.loss]
            outputs = session.run(output_feed, input_feed)
            return outputs[0], outputs[1]

In [15]:
def get_session():
    """Create a session that dynamically allocates memory."""
    # See: https://www.tensorflow.org/tutorials/using_gpu#allowing_gpu_memory_growth
    config = tf.ConfigProto(log_device_placement=True,allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    session = tf.Session(config=config)
    return session

In [16]:
def mini_batch_classify(feature_xyz,labels,seq_len,batch_size):
    for start in range(0,len(feature_xyz),batch_size):
        end = min(start+batch_size,len(feature_xyz))
        yield feature_xyz[start:end],labels[start:end],seq_len[start:end]

# Hyperparameter

In [34]:
max_seq_len=50
rnn_size=2048
input_size = 75
batch_size=64
lr = .0001
train_keep_prob = 1.0
iterations = 15000

# Model initialization

In [48]:
tf.reset_default_graph()
# FW
model = Seq2SeqModelFS(max_seq_len, input_size,rnn_size, batch_size, lr,train_keep_prob)
sess = get_session()
sess.run(tf.global_variables_initializer())

rnn_size = 2048
output_size = 75
 state_size = 2048


# Evaluate the acc before training

In [36]:
fea = []
lab = []
seq_len_new = []
for idx, data in enumerate(train_data):
    label = data["label"]
    val = np.asarray(data["input"])
    raw_len = val.shape[0]
    if raw_len > 50:
        seq_len_new.append(50)
        fea.append(dsamp_train[idx])
    else:
        seq_len_new.append(raw_len)
        pad_data = np.zeros((50, 75))
        pad_data[:raw_len, :] = dsamp_train[idx]
        fea.append(pad_data)
    one_hot_label = np.zeros((60,))
    one_hot_label[label] = 1.
    lab.append(one_hot_label)

In [37]:
test_fea = []
test_lab = []
test_seq_len_new = []
for idx, data in enumerate(test_data):
    label = data["label"]
    val = np.asarray(data["input"])
    raw_len = val.shape[0]
    if raw_len > 50:
        test_seq_len_new.append(50)
        test_fea.append(dsamp_test[idx])
    else:
        test_seq_len_new.append(raw_len)
        pad_data = np.zeros((50, 75))
        pad_data[:raw_len, :] = dsamp_test[idx]
        test_fea.append(pad_data)
    one_hot_label = np.zeros((60,))
    one_hot_label[label] = 1.
    test_lab.append(one_hot_label)

In [38]:
def get_feature(model,session,encoder_inputs,batch_size,seq_len):
    input_feed = {model.enc_xyz: encoder_inputs, model.seq_len: seq_len, model.batch_size:batch_size}
    output_feed = [model.knn_state]
    outputs = session.run(output_feed, input_feed)
    return outputs[0]

In [39]:
knn_feature = []
for encoder_inputs, labels, seq_len_enc in mini_batch_classify(fea, lab, seq_len_new, batch_size=64):
    start_time = timeit.default_timer()
    result = get_feature(model,sess,encoder_inputs,len(encoder_inputs),seq_len_enc)    
    knn_feature.append(result)

test_knn_feature = []
for encoder_inputs, labels, seq_len_enc in mini_batch_classify(test_fea, test_lab, test_seq_len_new, batch_size=64):
    result = get_feature(model,sess,encoder_inputs,len(encoder_inputs),seq_len_enc)
    test_knn_feature.append(result)

In [40]:
knn_feature = np.vstack(knn_feature)
test_knn_feature = np.vstack(test_knn_feature)

In [41]:
from sklearn.neighbors import KNeighborsClassifier
neigh = KNeighborsClassifier(n_neighbors=1,metric='cosine')
neigh.fit(knn_feature,np.argmax(lab,axis=1))

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='cosine',
                     metric_params=None, n_jobs=None, n_neighbors=1, p=2,
                     weights='uniform')

In [42]:
neigh.score(test_knn_feature,np.argmax(test_lab,axis=1))

0.5667124445383478

In [45]:
def mini_batch(data, seq_len, input_size, batch_size):
    encoder_inputs = np.zeros((batch_size, seq_len, input_size),dtype=float)
    seq_len_enc = np.zeros((batch_size,), dtype=float)
    decoder_inputs = np.zeros((batch_size, seq_len, input_size),dtype=float)
    data_len = len(data)
    for i in range(batch_size):
        index = np.random.choice(data_len)
        data_sel = data[index]
        encoder_inputs[i, :data_sel.shape[0], :] = np.copy(data_sel)
        seq_len_enc[i] = data_sel.shape[0]
    return encoder_inputs, decoder_inputs, seq_len_enc

# training loop

In [49]:
start_time = timeit.default_timer()
knn_score = []
train_loss_li = []
max_score = 0.0
for i in range(1,iterations+1):
    encoder_inputs, decoder_inputs, seq_len_enc = mini_batch(dsamp_train, seq_len=50, input_size=75, batch_size=256)
    _,train_loss = model.step(sess, encoder_inputs, decoder_inputs, seq_len_enc, False)

    if i%100 == 0:
        print("step {0}:train loss:{1:.4f}".format(i, train_loss))
        train_loss_li.append(train_loss)
        end_time = timeit.default_timer()
        print("iteration {}:".format(i),end='')
        print(" using {:.2f} sec".format(end_time-start_time))
        start_time = end_time
    # check knn score every 200 iterations
    if i % 200 == 0:
        knn_feature = []
        for encoder_inputs, labels, seq_len_enc in mini_batch_classify(fea, lab, seq_len_new, batch_size=64):
            result = get_feature(model,sess,encoder_inputs,len(encoder_inputs),seq_len_enc)
            knn_feature.append(result)
        test_knn_feature = []
        for encoder_inputs, labels, seq_len_enc in mini_batch_classify(test_fea, test_lab, test_seq_len_new, batch_size=64):
            result = get_feature(model,sess,encoder_inputs,len(encoder_inputs),seq_len_enc)
            test_knn_feature.append(result)
        knn_feature = np.vstack(knn_feature)
        test_knn_feature = np.vstack(test_knn_feature)
        neigh = KNeighborsClassifier(n_neighbors=1,metric='cosine')
        neigh.fit(knn_feature,np.argmax(lab,axis=1))
        score = neigh.score(test_knn_feature,np.argmax(test_lab,axis=1))
        knn_score.append(score)
        print(f"knn test score at {i}th iterations: ", score)
        # save the model: change to your own path
        if score > max_score:
#             model.saver.save(sess,"/home/neuralnet/skeleton_action_recog/NTU_models/cross_subject/fixed_state/lastenc_l1",global_step=i)
            max_score = score
            print("Current KNN Max Score is {}".format(max_score))
    if i%1000 == 0:
        sess.run(model.learning_rate_decay_op)

step 100:train loss:13.5850
iteration 100: using 97.12 sec
step 200:train loss:10.0412
iteration 200: using 94.71 sec
knn test score at 200th iterations:  0.6197971688147053
Current KNN Max Score is 0.6197971688147053
step 300:train loss:8.3745
iteration 300: using 197.96 sec
step 400:train loss:7.7679
iteration 400: using 95.95 sec
knn test score at 400th iterations:  0.6394992605113036
Current KNN Max Score is 0.6394992605113036
step 500:train loss:7.2031
iteration 500: using 196.44 sec
step 600:train loss:6.8727
iteration 600: using 95.73 sec
knn test score at 600th iterations:  0.6585146841326854
Current KNN Max Score is 0.6585146841326854
step 700:train loss:6.6160
iteration 700: using 197.47 sec
step 800:train loss:6.9852
iteration 800: using 95.55 sec
knn test score at 800th iterations:  0.6701352207901965
Current KNN Max Score is 0.6701352207901965
step 900:train loss:6.8098
iteration 900: using 198.44 sec
step 1000:train loss:6.2769
iteration 1000: using 95.93 sec
knn test sco

step 7800:train loss:4.7999
iteration 7800: using 97.25 sec
knn test score at 7800th iterations:  0.739435875765899
step 7900:train loss:4.5526
iteration 7900: using 201.90 sec
step 8000:train loss:4.6053
iteration 8000: using 97.17 sec
knn test score at 8000th iterations:  0.7412845975068667
Current KNN Max Score is 0.7412845975068667
step 8100:train loss:4.8429
iteration 8100: using 201.72 sec
step 8200:train loss:4.7566
iteration 8200: using 97.09 sec
knn test score at 8200th iterations:  0.7410733150221847
step 8300:train loss:4.7267
iteration 8300: using 203.78 sec
step 8400:train loss:4.5659
iteration 8400: using 96.47 sec
knn test score at 8400th iterations:  0.7427107542784703
Current KNN Max Score is 0.7427107542784703
step 8500:train loss:4.5794
iteration 8500: using 201.59 sec
step 8600:train loss:4.6262
iteration 8600: using 96.30 sec
knn test score at 8600th iterations:  0.7421825480667653
step 8700:train loss:4.7235
iteration 8700: using 201.15 sec
step 8800:train loss:4.