update

tangzhenyu · Sep 11, 2017 · 4097030 · 4097030
1 parent 8bce04a
commit 4097030
Show file tree

Hide file tree

Showing 7 changed files with 869 additions and 0 deletions.
diff --git a/DeepVO/ConvLSTM_Cell.py b/DeepVO/ConvLSTM_Cell.py
@@ -0,0 +1,153 @@
+import tensorflow as tf
+
+class ConvLSTMCell(tf.nn.rnn_cell.RNNCell):
+  """A LSTM cell with convolutions instead of multiplications.
+  Reference:
+    Xingjian, S. H. I., et al. "Convolutional LSTM network: A machine learning approach for precipitation nowcasting." Advances in Neural Information Processing Systems. 2015.
+  """
+
+  def __init__(self, shape, filters, kernel, forget_bias=1.0, activation=tf.tanh, normalize=True, peephole=True, data_format='channels_last', reuse=None):
+    super(ConvLSTMCell, self).__init__(_reuse=reuse)
+    self._kernel = kernel
+    self._filters = filters
+    self._forget_bias = forget_bias
+    self._activation = activation
+    self._normalize = normalize
+    self._peephole = peephole
+    if data_format == 'channels_last':
+        self._size = tf.TensorShape(shape + [self._filters])
+        self._feature_axis = self._size.ndims
+        self._data_format = None
+    elif data_format == 'channels_first':
+        self._size = tf.TensorShape([self._filters] + shape)
+        self._feature_axis = 0
+        self._data_format = 'NC'
+    else:
+        raise ValueError('Unknown data_format')
+
+  @property
+  def state_size(self):
+    return tf.nn.rnn_cell.LSTMStateTuple(self._size, self._size)
+
+  @property
+  def output_size(self):
+    return self._size
+
+  def call(self, x, state):
+    c, h = state
+
+    x = tf.concat([x, h], axis=self._feature_axis)
+    n = x.shape[-1].value
+    m = 4 * self._filters if self._filters > 1 else 4
+    W = tf.get_variable('kernel', self._kernel + [n, m])
+    y = tf.nn.convolution(x, W, 'SAME', data_format=self._data_format)
+    if not self._normalize:
+      y += tf.get_variable('bias', [m], initializer=tf.zeros_initializer())
+    j, i, f, o = tf.split(y, 4, axis=self._feature_axis)
+
+    if self._peephole:
+      i += tf.get_variable('W_ci', c.shape[1:]) * c
+      f += tf.get_variable('W_cf', c.shape[1:]) * c
+
+    if self._normalize:
+      j = tf.contrib.layers.layer_norm(j)
+      i = tf.contrib.layers.layer_norm(i)
+      f = tf.contrib.layers.layer_norm(f)
+
+    f = tf.sigmoid(f + self._forget_bias)
+    i = tf.sigmoid(i)
+    c = c * f + i * self._activation(j)
+
+    if self._peephole:
+      o += tf.get_variable('W_co', c.shape[1:]) * c
+
+    if self._normalize:
+      o = tf.contrib.layers.layer_norm(o)
+      c = tf.contrib.layers.layer_norm(c)
+
+    o = tf.sigmoid(o)
+    h = o * self._activation(c)
+
+    # TODO 
+    #tf.summary.histogram('forget_gate', f)
+    #tf.summary.histogram('input_gate', i)
+    #tf.summary.histogram('output_gate', o)
+    #tf.summary.histogram('cell_state', c)
+
+    state = tf.nn.rnn_cell.LSTMStateTuple(c, h)
+
+    return h, state
+
+
+class ConvGRUCell(tf.nn.rnn_cell.RNNCell):
+  """A GRU cell with convolutions instead of multiplications."""
+
+  def __init__(self, shape, filters, kernel, activation=tf.tanh, normalize=True, data_format='channels_last', reuse=None):
+    super(ConvGRUCell, self).__init__(_reuse=reuse)
+    self._filters = filters
+    self._kernel = kernel
+    self._activation = activation
+    self._normalize = normalize
+    if data_format == 'channels_last':
+        self._size = tf.TensorShape(shape + [self._filters])
+        self._feature_axis = self._size.ndims
+        self._data_format = None
+    elif data_format == 'channels_first':
+        self._size = tf.TensorShape([self._filters] + shape)
+        self._feature_axis = 0
+        self._data_format = 'NC'
+    else:
+        raise ValueError('Unknown data_format')
+
+  @property
+  def state_size(self):
+    return self._size
+
+  @property
+  def output_size(self):
+    return self._size
+
+  def call(self, x, h):
+    channels = x.shape[self._feature_axis].value
+
+    with tf.variable_scope('gates'):
+      inputs = tf.concat([x, h], axis=self._feature_axis)
+      n = channels + self._filters
+      m = 2 * self._filters if self._filters > 1 else 2
+      W = tf.get_variable('kernel', self._kernel + [n, m])
+      y = tf.nn.convolution(inputs, W, 'SAME', data_format=self._data_format)
+      if self._normalize:
+        r, u = tf.split(y, 2, axis=self._feature_axis)
+        r = tf.contrib.layers.layer_norm(r)
+        u = tf.contrib.layers.layer_norm(u)
+      else:
+        y += tf.get_variable('bias', [m], initializer=tf.ones_initializer())
+        r, u = tf.split(y, 2, axis=self._feature_axis)
+      r, u = tf.sigmoid(r), tf.sigmoid(u)
+
+      # TODO
+      #tf.summary.histogram('reset_gate', r)
+      #tf.summary.histogram('update_gate', u)
+
+    with tf.variable_scope('candidate'):
+      inputs = tf.concat([x, r * h], axis=self._feature_axis)
+      n = channels + self._filters
+      m = self._filters
+      W = tf.get_variable('kernel', self._kernel + [n, m])
+      y = tf.nn.convolution(inputs, W, 'SAME', data_format=self._data_format)
+      if self._normalize:
+        y = tf.contrib.layers.layer_norm(y)
+      else:
+        y += tf.get_variable('bias', [m], initializer=tf.zeros_initializer())
+      h = u * h + (1 - u) * self._activation(y)
+
+	return h, h
+if __name__ == '__main__':
+	inputs=tf.placeholder(tf.float32, [5,2,3,3,3])
+	cell = BasicConvLSTMCell([3,3], 6, [3,3]) 
+	outputs, state = tf.nn.dynamic_rnn(cell, inputs, dtype=inputs.dtype, time_major=True)
+	with tf.Session() as sess:
+		inp = np.random.normal(size=(5,2,3,3,3))
+		sess.run(tf.global_variables_initializer())
+		o, s = sess.run([outputs, state], feed_dict={inputs:inp})
+		print o.shape #(5,2,3,3,6)
diff --git a/DeepVO/Test_ConvLSTM_Cell.py b/DeepVO/Test_ConvLSTM_Cell.py
@@ -0,0 +1,34 @@
+import tensorflow as tf
+
+batch_size = 32
+timesteps = 100
+shape = [640, 480]
+kernel = [3, 3]
+channels = 3
+filters = 12
+
+# Create a placeholder for videos.
+inputs = tf.placeholder(tf.float32, [batch_size, timesteps] + shape + [channels])
+
+# Add the ConvLSTM step.
+from cell import ConvLSTMCell
+cell = ConvLSTMCell(shape, filters, kernel)
+outputs, state = tf.nn.dynamic_rnn(cell, inputs, dtype=inputs.dtype)
+
+# There's also a ConvGRUCell that is more memory efficient.
+from cell import ConvGRUCell
+cell = ConvGRUCell(shape, filters, kernel)
+outputs, state = tf.nn.dynamic_rnn(cell, inputs, dtype=inputs.dtype)
+
+# It's also possible to enter 2D input or 4D input instead of 3D.
+shape = [100]
+kernel = [3]
+inputs = tf.placeholder(tf.float32, [batch_size, timesteps] + shape + [channels])
+cell = ConvLSTMCell(shape, filters, kernel)
+outputs, state = tf.nn.bidirectional_dynamic_rnn(cell, cell, inputs, dtype=inputs.dtype)
+
+shape = [50, 50, 50]
+kernel = [1, 3, 5]
+inputs = tf.placeholder(tf.float32, [batch_size, timesteps] + shape + [channels])
+cell = ConvGRUCell(shape, filters, kernel)
+outputs, state= tf.nn.bidirectional_dynamic_rnn(cell, cell, inputs, dtype=inputs.dtype)
diff --git a/DeepVO/train.py b/DeepVO/train.py
@@ -0,0 +1,71 @@
+from __future__ import print_function
+
+from keras.preprocessing import sequence
+from keras.models import Sequential
+from keras.layers import Dense, Dropout, Activation
+from keras.layers import Embedding
+from keras.layers import LSTM
+from keras.layers import Conv1D, MaxPooling1D
+from keras.datasets import imdb
+
+# Embedding
+max_features = 20000
+maxlen = 100
+embedding_size = 128
+
+# Convolution
+kernel_size = 5
+filters = 64
+pool_size = 4
+
+# LSTM
+lstm_output_size = 70
+
+# Training
+batch_size = 30
+epochs = 2
+
+'''
+Note:
+batch_size is highly sensitive.
+Only 2 epochs are needed as the dataset is very small.
+'''
+
+print('Loading data...')
+(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
+print(len(x_train), 'train sequences')
+print(len(x_test), 'test sequences')
+
+print('Pad sequences (samples x time)')
+x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
+x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
+print('x_train shape:', x_train.shape)
+print('x_test shape:', x_test.shape)
+
+print('Build model...')
+
+model = Sequential()
+model.add(Embedding(max_features, embedding_size, input_length=maxlen))
+model.add(Dropout(0.25))
+model.add(Conv1D(filters,
+                 kernel_size,
+                 padding='valid',
+                 activation='relu',
+                 strides=1))
+model.add(MaxPooling1D(pool_size=pool_size))
+model.add(LSTM(lstm_output_size))
+model.add(Dense(1))
+model.add(Activation('sigmoid'))
+
+model.compile(loss='binary_crossentropy',
+              optimizer='adam',
+              metrics=['accuracy'])
+
+print('Train...')
+model.fit(x_train, y_train,
+          batch_size=batch_size,
+          epochs=epochs,
+          validation_data=(x_test, y_test))
+score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
+print('Test score:', score)
+print('Test accuracy:', acc)
diff --git a/FCN/BatchDatsetReader.py b/FCN/BatchDatsetReader.py
@@ -0,0 +1,83 @@
+"""
+Code ideas from https://github.com/Newmu/dcgan and tensorflow mnist dataset reader
+"""
+import numpy as np
+import scipy.misc as misc
+
+
+class BatchDatset:
+    files = []
+    images = []
+    annotations = []
+    image_options = {}
+    batch_offset = 0
+    epochs_completed = 0
+
+    def __init__(self, records_list, image_options={}):
+        """
+        Intialize a generic file reader with batching for list of files
+        :param records_list: list of file records to read -
+        sample record: {'image': f, 'annotation': annotation_file, 'filename': filename}
+        :param image_options: A dictionary of options for modifying the output image
+        Available options:
+        resize = True/ False
+        resize_size = #size of output image - does bilinear resize
+        color=True/False
+        """
+        print("Initializing Batch Dataset Reader...")
+        print(image_options)
+        self.files = records_list
+        self.image_options = image_options
+        self._read_images()
+
+    def _read_images(self):
+        self.__channels = True
+        self.images = np.array([self._transform(filename['image']) for filename in self.files])
+        self.__channels = False
+        self.annotations = np.array(
+            [np.expand_dims(self._transform(filename['annotation']), axis=3) for filename in self.files])
+        print (self.images.shape)
+        print (self.annotations.shape)
+
+    def _transform(self, filename):
+        image = misc.imread(filename)
+        if self.__channels and len(image.shape) < 3:  # make sure images are of shape(h,w,3)
+            image = np.array([image for i in range(3)])
+
+        if self.image_options.get("resize", False) and self.image_options["resize"]:
+            resize_size = int(self.image_options["resize_size"])
+            resize_image = misc.imresize(image,
+                                         [resize_size, resize_size], interp='nearest')
+        else:
+            resize_image = image
+
+        return np.array(resize_image)
+
+    def get_records(self):
+        return self.images, self.annotations
+
+    def reset_batch_offset(self, offset=0):
+        self.batch_offset = offset
+
+    def next_batch(self, batch_size):
+        start = self.batch_offset
+        self.batch_offset += batch_size
+        if self.batch_offset > self.images.shape[0]:
+            # Finished epoch
+            self.epochs_completed += 1
+            print("****************** Epochs completed: " + str(self.epochs_completed) + "******************")
+            # Shuffle the data
+            perm = np.arange(self.images.shape[0])
+            np.random.shuffle(perm)
+            self.images = self.images[perm]
+            self.annotations = self.annotations[perm]
+            # Start next epoch
+            start = 0
+            self.batch_offset = batch_size
+
+        end = self.batch_offset
+        return self.images[start:end], self.annotations[start:end]
+
+    def get_random_batch(self, batch_size):
+        indexes = np.random.randint(0, self.images.shape[0], size=[batch_size]).tolist()
+        return self.images[indexes], self.annotations[indexes]