diff --git a/README.md b/README.md index b4feda3..e29d8b0 100755 --- a/README.md +++ b/README.md @@ -1,16 +1,21 @@ # DCGAN in TensorLayer -TensorLayer implementation of [Deep Convolutional Generative Adversarial Networks](http://arxiv.org/abs/1511.06434). +This is the TensorLayer implementation of [Deep Convolutional Generative Adversarial Networks](http://arxiv.org/abs/1511.06434). Looking for Text to Image Synthesis ? [click here](https://github.com/zsdonghao/text-to-image) ![alt tag](img/DCGAN.png) + +- 🆕 🔥 2019 May: We just update this project to support TF2 and TL2. Enjoy! +- 🆕 🔥 2019 May: This project is chosen as the default template of TL projects. + + ## Prerequisites -- Python3 -- TensorFlow==1.13 -- TensorLayer (self-contained) +- Python3.5 3.6 +- TensorFlow==2.0.0a0 `pip3 install tensorflow-gpu==2.0.0a0` +- TensorLayer=2.0.0 `pip3 install tensorlayer==2.0.0` ## Usage @@ -18,11 +23,8 @@ First, download the aligned face images from [google](https://drive.google.com/o Second, train the GAN: - $ python main_eager_mode.py # single GPU for TF>=1.13 - $ python main_graph_mode.py # single GPU for TF<=1.13 - $ python main_eager_mode_horovod.py # multiple GPU (TODO) - $ python main_eager_mode_tlmagic.py # multiple GPU (TODO) - + $ python train.py + ## Result on celebA diff --git a/data.py b/data.py new file mode 100755 index 0000000..9a98080 --- /dev/null +++ b/data.py @@ -0,0 +1,57 @@ +import os +import numpy as np +import tensorflow as tf +import tensorlayer as tl +## enable debug logging +tl.logging.set_verbosity(tl.logging.DEBUG) + +class FLAGS(object): + def __init__(self): + self.n_epoch = 25 # "Epoch to train [25]" + self.z_dim = 100 # "Num of noise value]" + self.learning_rate = 0.0002 # "Learning rate of for adam [0.0002]") + self.beta1 = 0.5 # "Momentum term of adam [0.5]") + self.batch_size = 64 # "The number of batch images [64]") + self.output_size = 64 # "The size of the output images to produce [64]") + self.sample_size = 64 # "The number of sample images [64]") + self.c_dim = 3 # "Number of image channels. [3]") + self.save_step = 500 # "The interval of saveing checkpoints. [500]") + # self.dataset = "celebA" # "The name of dataset [celebA, mnist, lsun]") + self.checkpoint_dir = "checkpoint" # "Directory name to save the checkpoints [checkpoint]") + self.sample_dir = "samples" # "Directory name to save the image samples [samples]") + assert np.sqrt(self.sample_size) % 1 == 0., 'Flag `sample_size` needs to be a perfect square' +flags = FLAGS() + +tl.files.exists_or_mkdir(flags.checkpoint_dir) # save model +tl.files.exists_or_mkdir(flags.sample_dir) # save generated image + +def get_celebA(output_size, n_epoch, batch_size): + # dataset API and augmentation + images_path = tl.files.load_file_list(path='data', regx='.*.jpg', keep_prefix=True, printable=False) + def generator_train(): + for image_path in images_path: + yield image_path.encode('utf-8') + def _map_fn(image_path): + image = tf.io.read_file(image_path) + image = tf.image.decode_jpeg(image, channels=3) # get RGB with 0~1 + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # image = tf.image.crop_central(image, [FLAGS.output_size, FLAGS.output_size, FLAGS.c_dim]) + # image = tf.image.resize_images(image, FLAGS.output_size]) + image = image[45:173, 25:153, :] # central crop + image = tf.image.resize([image], (output_size, output_size))[0] + # image = tf.image.crop_and_resize(image, boxes=[[]], crop_size=[64, 64]) + # image = tf.image.resize_image_with_crop_or_pad(image, FLAGS.output_size, FLAGS.output_size) # central crop + image = tf.image.random_flip_left_right(image) + image = image * 2 - 1 + return image + train_ds = tf.data.Dataset.from_generator(generator_train, output_types=tf.string) + ds = train_ds.shuffle(buffer_size=4096) + # ds = ds.shard(num_shards=hvd.size(), index=hvd.rank()) + ds = ds.repeat(n_epoch) + ds = ds.map(_map_fn, num_parallel_calls=4) + ds = ds.batch(batch_size) + ds = ds.prefetch(buffer_size=2) + return ds, images_path + # for batch_images in train_ds: + # print(batch_images.shape) + # value = ds.make_one_shot_iterator().get_next() diff --git a/main_eager_mode2.py b/main_eager_mode2.py deleted file mode 100755 index 553b036..0000000 --- a/main_eager_mode2.py +++ /dev/null @@ -1,84 +0,0 @@ -"""Eager mode, single GPU - - -TODO -""" - -import os, time, multiprocessing -import numpy as np -import tensorflow as tf -tf.enable_eager_execution() -import tensorlayer as tl -from glob import glob -from utils import get_celebA, flags # get_image -from model import get_generator, get_discriminator - -FLAGS = flags.FLAGS -num_tiles = int(np.sqrt(FLAGS.sample_size)) - -def correct_grad(grad, scale): - if grad != None: - return grad * scale - else: - return None -def sigmoid(x): - return 1 / (1 + tf.exp(-x)) - -def train(): - images, images_path = get_celebA(FLAGS.output_size, FLAGS.n_epoch, FLAGS.batch_size) - G = get_generator([None, FLAGS.z_dim]) - D = get_discriminator([None, FLAGS.output_size, FLAGS.output_size, FLAGS.c_dim]) - - G.train() - D.train() - - d_optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate, beta1=FLAGS.beta1) - g_optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate, beta1=FLAGS.beta1) - - n_step_epoch = int(len(images_path) // FLAGS.batch_size) - - for step, batch_images in enumerate(images): - step_time = time.time() - - with tf.GradientTape(persistent=True) as tape: - z = tf.contrib.distributions.Normal(0., 1.).sample([FLAGS.batch_size, FLAGS.z_dim]) #tf.placeholder(tf.float32, [None, z_dim], name='z_noise') - d_logits = D(G(z)) - d2_logits = D(batch_images) - d_loss_real = tl.cost.sigmoid_cross_entropy(d2_logits, tf.ones_like(d2_logits), name='real') - d_loss_fake = tl.cost.sigmoid_cross_entropy(d_logits, tf.zeros_like(d_logits), name='fake') - - grad_gd = tape.gradient(d_loss_fake, G.weights+D.weights) - grad_d1 = tape.gradient(d_loss_real, D.weights) - scale = -1 #tf.reduce_mean(sigmoid(d_logits)/(sigmoid(d_logits)-1)) - grad_g = grad_gd[0:len(G.weights)] - for i in range(len(grad_g)): - if grad_g[i]!=None: # batch_norm moving mean, var - grad_g[i] = grad_g[i] * scale - # grad_d1 = list(filter(lambda x: correct_grad(x, scale), grad_d1)) - grad_d2 = grad_gd[len(G.weights):] - grad_d = [] - for x,y in zip(grad_d1, grad_d2): - if x==None: # batch_norm moving mean, var - grad_d.append(None) - else: - grad_d.append(x+y) - g_optimizer.apply_gradients(zip(grad_g, G.weights)) - d_optimizer.apply_gradients(zip(grad_d, D.weights)) - del tape - - g_loss = d_loss_fake - d_loss = d_loss_real+d_loss_fake - - print("Epoch: [{}/{}] [{}/{}] took: {:3f}, d_loss: {:5f}, g_loss: {:5f}".format(step//n_step_epoch, FLAGS.n_epoch, step, n_step_epoch, time.time()-step_time, d_loss, g_loss)) - if np.mod(step, FLAGS.save_step) == 0: - G.save_weights('{}/G.npz'.format(FLAGS.checkpoint_dir), format='npz') - D.save_weights('{}/D.npz'.format(FLAGS.checkpoint_dir), format='npz') - result = G(z) - tl.visualize.save_images(result.numpy(), [num_tiles, num_tiles], '{}/train_{:02d}_{:04d}.png'.format(FLAGS.sample_dir, step//n_step_epoch, step)) - -if __name__ == '__main__': - train() - # try: - # tf.app.run() - # except KeyboardInterrupt: - # print('EXIT') diff --git a/main_eager_mode_horovod.py b/main_eager_mode_horovod.py deleted file mode 100755 index c33b551..0000000 --- a/main_eager_mode_horovod.py +++ /dev/null @@ -1,89 +0,0 @@ -"""Eager mode, distributed training -ref: https://github.com/horovod/horovod/blob/master/examples/tensorflow_mnist_eager.py - - - -TODO - - -""" - -import os, time, multiprocessing -import numpy as np -import tensorflow as tf -import horovod.tensorflow as hvd -# tf.enable_eager_execution() -import tensorlayer as tl -from glob import glob -from utils import get_celebA, flags -from model import get_generator, get_discriminator - -FLAGS = flags.FLAGS -num_tiles = int(np.sqrt(FLAGS.sample_size)) - -def train(): - # Horovod: initialize Horovod. - hvd.init() - # Horovod: pin GPU to be used to process local rank (one GPU per process) - config = tf.ConfigProto() - config.gpu_options.visible_device_list = str(hvd.local_rank()) - tf.enable_eager_execution(config=config) - # Horovod: adjust number of steps based on number of GPUs. - images, images_path = get_celebA(FLAGS.output_size, FLAGS.n_epoch // hvd.size(), FLAGS.batch_size) - - G = get_generator([None, FLAGS.z_dim]) - D = get_discriminator([None, FLAGS.output_size, FLAGS.output_size, FLAGS.c_dim]) - - G.train() - D.train() - - d_optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate * hvd.size(), beta1=FLAGS.beta1) # linear scaling rule - g_optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate * hvd.size(), beta1=FLAGS.beta1) - - step_counter = tf.train.get_or_create_global_step() - - n_step_epoch = int(len(images_path) // FLAGS.batch_size) - - for step, batch_images in enumerate(images): - step_time = time.time() - with tf.GradientTape(persistent=True) as tape: - z = tf.contrib.distributions.Normal(0., 1.).sample([FLAGS.batch_size, FLAGS.z_dim]) #tf.placeholder(tf.float32, [None, z_dim], name='z_noise') - d_logits = D(G(z)) - d2_logits = D(batch_images) - # discriminator: real images are labelled as 1 - d_loss_real = tl.cost.sigmoid_cross_entropy(d2_logits, tf.ones_like(d2_logits), name='dreal') - # discriminator: images from generator (fake) are labelled as 0 - d_loss_fake = tl.cost.sigmoid_cross_entropy(d_logits, tf.zeros_like(d_logits), name='dfake') - # cost for updating discriminator - d_loss = d_loss_real + d_loss_fake - # generator: try to make the the fake images look real (1) - g_loss = tl.cost.sigmoid_cross_entropy(d_logits, tf.ones_like(d_logits), name='gfake') - - # Horovod: broadcast initial variable states from rank 0 to all other processes. - # This is necessary to ensure consistent initialization of all workers when - # training is started with random weights or restored from a checkpoint. - if step == 0: - hvd.broadcast_variables(G.weights, root_rank=0) - hvd.broadcast_variables(D.weights, root_rank=0) - - # Horovod: add Horovod Distributed GradientTape. - tape = hvd.DistributedGradientTape(tape) - # - grad = tape.gradient(d_loss, D.weights) - d_optimizer.apply_gradients(zip(grad, D.weights), global_step=tf.train.get_or_create_global_step()) - grad = tape.gradient(g_loss, G.weights) - g_optimizer.apply_gradients(zip(grad, G.weights), global_step=tf.train.get_or_create_global_step()) - - # Horovod: print logging only on worker 0 - if hvd.rank() == 0 - print("Epoch: [{}/{}] [{}/{}] took: {:3f}, d_loss: {:5f}, g_loss: {:5f}".format(step//n_step_epoch, FLAGS.n_epoch, step, n_step_epoch, time.time()-step_time, d_loss, g_loss)) - - # Horovod: save checkpoints only on worker 0 - if hvd.rank() == 0 and np.mod(step, FLAGS.save_step) == 0: - G.save_weights('{}/G.npz'.format(FLAGS.checkpoint_dir), format='npz') - D.save_weights('{}/D.npz'.format(FLAGS.checkpoint_dir), format='npz') - result = G(z) - tl.visualize.save_images(result.numpy(), [num_tiles, num_tiles], '{}/train_{:02d}_{:04d}.png'.format(FLAGS.sample_dir, step//n_step_epoch, step)) - -if __name__ == '__main__': - train() diff --git a/main_graph_mode.py b/main_graph_mode.py deleted file mode 100755 index 3417f7c..0000000 --- a/main_graph_mode.py +++ /dev/null @@ -1,66 +0,0 @@ -"""Graph mode, single GPU - -For TensorFlow 1.13 -""" - -import os, time, multiprocessing -import numpy as np -import tensorflow as tf -import tensorlayer as tl -from glob import glob -from utils import get_celebA, flags -from model import get_generator, get_discriminator - -FLAGS = flags.FLAGS -num_tiles = int(np.sqrt(FLAGS.sample_size)) - -def train(): - z = tf.contrib.distributions.Normal(0., 1.).sample([FLAGS.batch_size, FLAGS.z_dim]) #tf.placeholder(tf.float32, [None, z_dim], name='z_noise') - ds, images_path = get_celebA(FLAGS.output_size, FLAGS.n_epoch, FLAGS.batch_size) - iterator = ds.make_one_shot_iterator() - images = iterator.get_next() - - G = get_generator([None, FLAGS.z_dim]) - D = get_discriminator([None, FLAGS.output_size, FLAGS.output_size, FLAGS.c_dim]) - - G.train() - D.train() - fake_images = G(z) - d_logits = D(fake_images) - d2_logits = D(images) - - # discriminator: real images are labelled as 1 - d_loss_real = tl.cost.sigmoid_cross_entropy(d2_logits, tf.ones_like(d2_logits), name='dreal') - # discriminator: images from generator (fake) are labelled as 0 - d_loss_fake = tl.cost.sigmoid_cross_entropy(d_logits, tf.zeros_like(d_logits), name='dfake') - # cost for updating discriminator - d_loss = d_loss_real + d_loss_fake - - # generator: try to make the the fake images look real (1) - g_loss = tl.cost.sigmoid_cross_entropy(d_logits, tf.ones_like(d_logits), name='gfake') - # Define optimizers for updating discriminator and generator - d_optim = tf.train.AdamOptimizer(FLAGS.learning_rate, beta1=FLAGS.beta1) \ - .minimize(d_loss, var_list=D.weights) - g_optim = tf.train.AdamOptimizer(FLAGS.learning_rate, beta1=FLAGS.beta1) \ - .minimize(g_loss, var_list=G.weights) - - sess = tf.InteractiveSession() - sess.run(tf.global_variables_initializer()) - - n_step_epoch = int(len(images_path) // FLAGS.batch_size) - for epoch in range(FLAGS.n_epoch): - epoch_time = time.time() - for step in range(n_step_epoch): - step_time = time.time() - _d_loss, _g_loss, _, _ = sess.run([d_loss, g_loss, d_optim, g_optim]) - print("Epoch: [{}/{}] [{}/{}] took: {:3f}, d_loss: {:5f}, g_loss: {:5f}".format(epoch, FLAGS.n_epoch, step, n_step_epoch, time.time()-step_time, _d_loss, _g_loss)) - if np.mod(step, FLAGS.save_step) == 0: - G.save_weights('{}/G.npz'.format(FLAGS.checkpoint_dir), sess=sess, format='npz') - D.save_weights('{}/D.npz'.format(FLAGS.checkpoint_dir), sess=sess, format='npz') - result = sess.run(fake_images) - tl.visualize.save_images(result, [num_tiles, num_tiles], '{}/train_{:02d}_{:04d}.png'.format(FLAGS.sample_dir, epoch, step)) - - sess.close() - -if __name__ == '__main__': - train() diff --git a/model.py b/model.py index e4ee414..f4ce275 100755 --- a/model.py +++ b/model.py @@ -2,13 +2,11 @@ import tensorlayer as tl from tensorlayer.layers import Input, Dense, DeConv2d, Reshape, BatchNorm2d, Conv2d, Flatten, BatchNorm -flags = tf.app.flags -FLAGS = flags.FLAGS - def get_generator(shape, gf_dim=64): # Dimension of gen filters in first conv layer. [64] image_size = 64 s16 = image_size // 16 - w_init = tf.glorot_normal_initializer() + # w_init = tf.glorot_normal_initializer() + w_init = tf.random_normal_initializer(stddev=0.02) gamma_init = tf.random_normal_initializer(1., 0.02) ni = Input(shape) @@ -26,7 +24,8 @@ def get_generator(shape, gf_dim=64): # Dimension of gen filters in first conv la return tl.models.Model(inputs=ni, outputs=nn, name='generator') def get_discriminator(shape, df_dim=64): # Dimension of discrim filters in first conv layer. [64] - w_init = tf.glorot_normal_initializer() + # w_init = tf.glorot_normal_initializer() + w_init = tf.random_normal_initializer(stddev=0.02) gamma_init = tf.random_normal_initializer(1., 0.02) lrelu = lambda x : tf.nn.leaky_relu(x, 0.2) diff --git a/tensorlayer/__init__.py b/tensorlayer/__init__.py deleted file mode 100644 index e9b0e7d..0000000 --- a/tensorlayer/__init__.py +++ /dev/null @@ -1,75 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -"""Deep learning and Reinforcement learning library for Researchers and Engineers""" - -import os -from distutils.version import LooseVersion - -from tensorlayer.package_info import VERSION -from tensorlayer.package_info import __shortversion__ -from tensorlayer.package_info import __version__ - -from tensorlayer.package_info import __package_name__ -from tensorlayer.package_info import __contact_names__ -from tensorlayer.package_info import __contact_emails__ -from tensorlayer.package_info import __homepage__ -from tensorlayer.package_info import __repository_url__ -from tensorlayer.package_info import __download_url__ -from tensorlayer.package_info import __description__ -from tensorlayer.package_info import __license__ -from tensorlayer.package_info import __keywords__ - -if 'TENSORLAYER_PACKAGE_BUILDING' not in os.environ: - - try: - import tensorflow - except Exception as e: - raise ImportError( - "Tensorflow is not installed, please install it with the one of the following commands:\n" - " - `pip install --upgrade tensorflow`\n" - " - `pip install --upgrade tensorflow-gpu`" - ) - - if ("SPHINXBUILD" not in os.environ and "READTHEDOCS" not in os.environ and - LooseVersion(tensorflow.__version__) < LooseVersion("1.6.0")): - raise RuntimeError( - "TensorLayer does not support Tensorflow version older than 1.6.0.\n" - "Please update Tensorflow with:\n" - " - `pip install --upgrade tensorflow`\n" - " - `pip install --upgrade tensorflow-gpu`" - ) - - from tensorlayer import activation - from tensorlayer import array_ops - from tensorlayer import cost - from tensorlayer import decorators - from tensorlayer import files - from tensorlayer import initializers - from tensorlayer import iterate - from tensorlayer import layers - from tensorlayer import lazy_imports - from tensorlayer import logging - from tensorlayer import models - from tensorlayer import optimizers - from tensorlayer import rein - - from tensorlayer.lazy_imports import LazyImport - - # Lazy Imports - db = LazyImport("tensorlayer.db") - distributed = LazyImport("tensorlayer.distributed") - nlp = LazyImport("tensorlayer.nlp") - prepro = LazyImport("tensorlayer.prepro") - utils = LazyImport("tensorlayer.utils") - visualize = LazyImport("tensorlayer.visualize") - - # alias - act = activation - vis = visualize - - alphas = array_ops.alphas - alphas_like = array_ops.alphas_like - - # global vars - global_flag = {} - global_dict = {} diff --git a/tensorlayer/activation.py b/tensorlayer/activation.py deleted file mode 100644 index 8a8806f..0000000 --- a/tensorlayer/activation.py +++ /dev/null @@ -1,340 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- -"""A file containing various activation functions.""" - -import tensorflow as tf - -from tensorlayer.decorators import deprecated - -__all__ = [ - 'leaky_relu', - 'leaky_relu6', - 'leaky_twice_relu6', - 'lrelu', - 'lrelu6', - 'ltrelu6', - 'ramp', - 'swish', - 'sign', - 'htanh', - 'hard_tanh', - 'pixel_wise_softmax', -] - - -def ramp(x, v_min=0, v_max=1, name=None): - """Ramp activation function. - - Parameters - ---------- - x : Tensor - input. - v_min : float - cap input to v_min as a lower bound. - v_max : float - cap input to v_max as a upper bound. - name : str - The function name (optional). - - Returns - ------- - Tensor - A ``Tensor`` in the same type as ``x``. - - """ - return tf.clip_by_value(x, clip_value_min=v_min, clip_value_max=v_max, name=name) - - -# @deprecated(date="2018-09-30", instructions="This API is deprecated. Please use as `tf.nn.leaky_relu`") -def leaky_relu(x, alpha=0.2, name="leaky_relu"): - """leaky_relu can be used through its shortcut: :func:`tl.act.lrelu`. - - This function is a modified version of ReLU, introducing a nonzero gradient for negative input. Introduced by the paper: - `Rectifier Nonlinearities Improve Neural Network Acoustic Models [A. L. Maas et al., 2013] `__ - - The function return the following results: - - When x < 0: ``f(x) = alpha_low * x``. - - When x >= 0: ``f(x) = x``. - - Parameters - ---------- - x : Tensor - Support input type ``float``, ``double``, ``int32``, ``int64``, ``uint8``, ``int16``, or ``int8``. - alpha : float - Slope. - name : str - The function name (optional). - - Examples - -------- - >>> import tensorlayer as tl - >>> net = tl.layers.DenseLayer(net, 100, act=lambda x : tl.act.lrelu(x, 0.2), name='dense') - - Returns - ------- - Tensor - A ``Tensor`` in the same type as ``x``. - - References - ---------- - - `Rectifier Nonlinearities Improve Neural Network Acoustic Models [A. L. Maas et al., 2013] `__ - - """ - if not (0 < alpha <= 1): - raise ValueError("`alpha` value must be in [0, 1]`") - - with tf.name_scope(name, "leaky_relu") as name_scope: - x = tf.convert_to_tensor(x, name="features") - return tf.maximum(x, alpha * x, name=name_scope) - - -def leaky_relu6(x, alpha=0.2, name="leaky_relu6"): - """:func:`leaky_relu6` can be used through its shortcut: :func:`tl.act.lrelu6`. - - This activation function is a modified version :func:`leaky_relu` introduced by the following paper: - `Rectifier Nonlinearities Improve Neural Network Acoustic Models [A. L. Maas et al., 2013] `__ - - This activation function also follows the behaviour of the activation function :func:`tf.nn.relu6` introduced by the following paper: - `Convolutional Deep Belief Networks on CIFAR-10 [A. Krizhevsky, 2010] `__ - - The function return the following results: - - When x < 0: ``f(x) = alpha_low * x``. - - When x in [0, 6]: ``f(x) = x``. - - When x > 6: ``f(x) = 6``. - - Parameters - ---------- - x : Tensor - Support input type ``float``, ``double``, ``int32``, ``int64``, ``uint8``, ``int16``, or ``int8``. - alpha : float - Slope. - name : str - The function name (optional). - - Examples - -------- - >>> import tensorlayer as tl - >>> net = tl.layers.DenseLayer(net, 100, act=lambda x : tl.act.leaky_relu6(x, 0.2), name='dense') - - Returns - ------- - Tensor - A ``Tensor`` in the same type as ``x``. - - References - ---------- - - `Rectifier Nonlinearities Improve Neural Network Acoustic Models [A. L. Maas et al., 2013] `__ - - `Convolutional Deep Belief Networks on CIFAR-10 [A. Krizhevsky, 2010] `__ - """ - if not isinstance(alpha, tf.Tensor) and not (0 < alpha <= 1): - raise ValueError("`alpha` value must be in [0, 1]`") - - with tf.name_scope(name, "leaky_relu6") as name_scope: - x = tf.convert_to_tensor(x, name="features") - return tf.minimum(tf.maximum(x, alpha * x), 6, name=name_scope) - - -def leaky_twice_relu6(x, alpha_low=0.2, alpha_high=0.2, name="leaky_relu6"): - """:func:`leaky_twice_relu6` can be used through its shortcut: :func:`:func:`tl.act.ltrelu6`. - - This activation function is a modified version :func:`leaky_relu` introduced by the following paper: - `Rectifier Nonlinearities Improve Neural Network Acoustic Models [A. L. Maas et al., 2013] `__ - - This activation function also follows the behaviour of the activation function :func:`tf.nn.relu6` introduced by the following paper: - `Convolutional Deep Belief Networks on CIFAR-10 [A. Krizhevsky, 2010] `__ - - This function push further the logic by adding `leaky` behaviour both below zero and above six. - - The function return the following results: - - When x < 0: ``f(x) = alpha_low * x``. - - When x in [0, 6]: ``f(x) = x``. - - When x > 6: ``f(x) = 6 + (alpha_high * (x-6))``. - - Parameters - ---------- - x : Tensor - Support input type ``float``, ``double``, ``int32``, ``int64``, ``uint8``, ``int16``, or ``int8``. - alpha_low : float - Slope for x < 0: ``f(x) = alpha_low * x``. - alpha_high : float - Slope for x < 6: ``f(x) = 6 (alpha_high * (x-6))``. - name : str - The function name (optional). - - Examples - -------- - >>> import tensorlayer as tl - >>> net = tl.layers.DenseLayer(net, 100, act=lambda x : tl.act.leaky_twice_relu6(x, 0.2, 0.2), name='dense') - - Returns - ------- - Tensor - A ``Tensor`` in the same type as ``x``. - - References - ---------- - - `Rectifier Nonlinearities Improve Neural Network Acoustic Models [A. L. Maas et al., 2013] `__ - - `Convolutional Deep Belief Networks on CIFAR-10 [A. Krizhevsky, 2010] `__ - - """ - if not isinstance(alpha_high, tf.Tensor) and not (0 < alpha_high <= 1): - raise ValueError("`alpha_high` value must be in [0, 1]`") - - if not isinstance(alpha_low, tf.Tensor) and not (0 < alpha_low <= 1): - raise ValueError("`alpha_low` value must be in [0, 1]`") - - with tf.name_scope(name, "leaky_twice_relu6") as name_scope: - x = tf.convert_to_tensor(x, name="features") - - x_is_above_0 = tf.minimum(x, 6 * (1 - alpha_high) + alpha_high * x) - x_is_below_0 = tf.minimum(alpha_low * x, 0) - - return tf.maximum(x_is_above_0, x_is_below_0, name=name_scope) - - -def swish(x, name='swish'): - """Swish function. - - See `Swish: a Self-Gated Activation Function `__. - - Parameters - ---------- - x : Tensor - input. - name: str - function name (optional). - - Returns - ------- - Tensor - A ``Tensor`` in the same type as ``x``. - - """ - with tf.name_scope(name): - x = tf.nn.sigmoid(x) * x - return x - - -@tf.RegisterGradient("QuantizeGrad") -def _sign_grad(unused_op, grad): - return tf.clip_by_value(grad, -1, 1) - - -def sign(x): - """Sign function. - - Clip and binarize tensor using the straight through estimator (STE) for the gradient, usually be used for - quantizing values in `Binarized Neural Networks`: https://arxiv.org/abs/1602.02830. - - Parameters - ---------- - x : Tensor - input. - - Examples - -------- - >>> net = tl.layers.DenseLayer(net, 100, act=lambda x : tl.act.lrelu(x, 0.2), name='dense') - - Returns - ------- - Tensor - A ``Tensor`` in the same type as ``x``. - - References - ---------- - - `Rectifier Nonlinearities Improve Neural Network Acoustic Models, Maas et al. (2013)` - http://web.stanford.edu/~awni/papers/relu_hybrid_icml2013_final.pdf - - - `BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. (2016)` - https://arxiv.org/abs/1602.02830 - - """ - with tf.get_default_graph().gradient_override_map({"Sign": "QuantizeGrad"}): - return tf.sign(x, name='sign') - - -# if tf.__version__ > "1.7": -# @tf.custom_gradient -# def sign(x): # https://www.tensorflow.org/versions/master/api_docs/python/tf/custom_gradient?hl=ES#top_of_page -# """Differentiable sign function using sigmoid as the derivation function, -# see `tf.sign `__ and `tf.custom_gradient -# `__. -# -# Parameters -# ---------- -# x : Tensor -# input. -# -# Returns -# ------- -# Tensor -# A ``Tensor`` in the same type as ``x``. -# -# """ -# tao = tf.nn.sigmoid(x) -# def grad(): -# return tao * (1 - tao) -# return tf.sign(x), grad - - -def hard_tanh(x, name='htanh'): - """Hard tanh activation function. - - Which is a ramp function with low bound of -1 and upper bound of 1, shortcut is `htanh`. - - Parameters - ---------- - x : Tensor - input. - name : str - The function name (optional). - - Returns - ------- - Tensor - A ``Tensor`` in the same type as ``x``. - - """ - # with tf.variable_scope("hard_tanh"): - return tf.clip_by_value(x, -1, 1, name=name) - - -@deprecated(date="2018-06-30", instructions="This API will be deprecated soon as tf.nn.softmax can do the same thing") -def pixel_wise_softmax(x, name='pixel_wise_softmax'): - """Return the softmax outputs of images, every pixels have multiple label, the sum of a pixel is 1. - - Usually be used for image segmentation. - - Parameters - ---------- - x : Tensor - input. - - For 2d image, 4D tensor (batch_size, height, weight, channel), where channel >= 2. - - For 3d image, 5D tensor (batch_size, depth, height, weight, channel), where channel >= 2. - name : str - function name (optional) - - Returns - ------- - Tensor - A ``Tensor`` in the same type as ``x``. - - Examples - -------- - >>> outputs = pixel_wise_softmax(network.outputs) - >>> dice_loss = 1 - dice_coe(outputs, y_, epsilon=1e-5) - - References - ---------- - - `tf.reverse `__ - - """ - with tf.name_scope(name): - return tf.nn.softmax(x) - - -# Alias -lrelu = leaky_relu -lrelu6 = leaky_relu6 -ltrelu6 = leaky_twice_relu6 -htanh = hard_tanh diff --git a/tensorlayer/array_ops.py b/tensorlayer/array_ops.py deleted file mode 100644 index 5d4c304..0000000 --- a/tensorlayer/array_ops.py +++ /dev/null @@ -1,114 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- -"""A file containing functions related to array manipulation.""" - -from tensorflow.python.eager import context -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_shape -from tensorflow.python.framework.constant_op import constant -from tensorflow.python.framework.ops import convert_to_tensor -from tensorflow.python.ops.array_ops import shape_internal -from tensorflow.python.ops.gen_array_ops import fill -from tensorflow.python.ops.gen_array_ops import reshape - -__all__ = ['alphas', 'alphas_like'] - - -def alphas(shape, alpha_value, name=None): - """Creates a tensor with all elements set to `alpha_value`. - This operation returns a tensor of type `dtype` with shape `shape` and all - elements set to alpha. - - Parameters - ---------- - shape: A list of integers, a tuple of integers, or a 1-D `Tensor` of type `int32`. - The shape of the desired tensor - alpha_value: `float32`, `float64`, `int8`, `uint8`, `int16`, `uint16`, int32`, `int64` - The value used to fill the resulting `Tensor`. - name: str - A name for the operation (optional). - - Returns - ------- - A `Tensor` with all elements set to alpha. - - Examples - -------- - >>> tl.alphas([2, 3], tf.int32) # [[alpha, alpha, alpha], [alpha, alpha, alpha]] - """ - with ops.name_scope(name, "alphas", [shape]) as name: - - alpha_tensor = convert_to_tensor(alpha_value) - alpha_dtype = dtypes.as_dtype(alpha_tensor.dtype).base_dtype - - if not isinstance(shape, ops.Tensor): - try: - shape = constant_op._tensor_shape_tensor_conversion_function(tensor_shape.TensorShape(shape)) - except (TypeError, ValueError): - shape = ops.convert_to_tensor(shape, dtype=dtypes.int32) - - if not shape._shape_tuple(): - shape = reshape(shape, [-1]) # Ensure it's a vector - - try: - output = constant(alpha_value, shape=shape, dtype=alpha_dtype, name=name) - - except (TypeError, ValueError): - output = fill(shape, constant(alpha_value, dtype=alpha_dtype), name=name) - - if output.dtype.base_dtype != alpha_dtype: - raise AssertionError("Dtypes do not corresponds: %s and %s" % (output.dtype.base_dtype, alpha_dtype)) - - return output - - -def alphas_like(tensor, alpha_value, name=None, optimize=True): - """Creates a tensor with all elements set to `alpha_value`. - Given a single tensor (`tensor`), this operation returns a tensor of the same - type and shape as `tensor` with all elements set to `alpha_value`. - - Parameters - ---------- - tensor: tf.Tensor - The Tensorflow Tensor that will be used as a template. - alpha_value: `float32`, `float64`, `int8`, `uint8`, `int16`, `uint16`, int32`, `int64` - The value used to fill the resulting `Tensor`. - name: str - A name for the operation (optional). - optimize: bool - if true, attempt to statically determine the shape of 'tensor' and encode it as a constant. - - Returns - ------- - A `Tensor` with all elements set to `alpha_value`. - - Examples - -------- - >>> tensor = tf.constant([[1, 2, 3], [4, 5, 6]]) - >>> tl.alphas_like(tensor, 0.5) # [[0.5, 0.5, 0.5], [0.5, 0.5, 0.5]] - """ - with ops.name_scope(name, "alphas_like", [tensor]) as name: - tensor = ops.convert_to_tensor(tensor, name="tensor") - - if context.in_eager_mode(): # and dtype is not None and dtype != tensor.dtype: - ret = alphas(shape_internal(tensor, optimize=optimize), alpha_value=alpha_value, name=name) - - else: # if context.in_graph_mode(): - - # For now, variant types must be created via zeros_like; as we need to - # pass the input variant object to the proper zeros callback. - - if (optimize and tensor.shape.is_fully_defined()): - # We can produce a zeros tensor independent of the value of 'tensor', - # since the shape is known statically. - ret = alphas(tensor.shape, alpha_value=alpha_value, name=name) - - # elif dtype is not None and dtype != tensor.dtype and dtype != dtypes.variant: - else: - ret = alphas(shape_internal(tensor, optimize=optimize), alpha_value=alpha_value, name=name) - - ret.set_shape(tensor.get_shape()) - - return ret diff --git a/tensorlayer/cli/__init__.py b/tensorlayer/cli/__init__.py deleted file mode 100644 index e7522d7..0000000 --- a/tensorlayer/cli/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- -"""The tensorlayer.cli module provides a command-line tool for some common tasks.""" diff --git a/tensorlayer/cli/__main__.py b/tensorlayer/cli/__main__.py deleted file mode 100644 index 838038b..0000000 --- a/tensorlayer/cli/__main__.py +++ /dev/null @@ -1,17 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import argparse - -from tensorlayer.cli import train - -if __name__ == "__main__": - parser = argparse.ArgumentParser(prog='tl') - subparsers = parser.add_subparsers(dest='cmd') - train_parser = subparsers.add_parser('train', help='train a model using multiple local GPUs or CPUs.') - train.build_arg_parser(train_parser) - args = parser.parse_args() - if args.cmd == 'train': - train.main(args) - else: - parser.print_help() diff --git a/tensorlayer/cli/train.py b/tensorlayer/cli/train.py deleted file mode 100644 index d63b7f5..0000000 --- a/tensorlayer/cli/train.py +++ /dev/null @@ -1,171 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- -""" -tl train -======== - -(Alpha release - usage might change later) - -The tensorlayer.cli.train module provides the ``tl train`` subcommand. -It helps the user bootstrap a TensorFlow/TensorLayer program for distributed training -using multiple GPU cards or CPUs on a computer. - -You need to first setup the `CUDA_VISIBLE_DEVICES `_ -to tell ``tl train`` which GPUs are available. If the CUDA_VISIBLE_DEVICES is not given, -``tl train`` would try best to discover all available GPUs. - -In distribute training, each TensorFlow program needs a TF_CONFIG environment variable to describe -the cluster. It also needs a master daemon to -monitor all trainers. ``tl train`` is responsible -for automatically managing these two tasks. - -Usage ------ - -tl train [-h] [-p NUM_PSS] [-c CPU_TRAINERS] [args [args ...]] - -.. code-block:: bash - - # example of using GPU 0 and 1 for training mnist - CUDA_VISIBLE_DEVICES="0,1" - tl train example/tutorial_mnist_distributed.py - - # example of using CPU trainers for inception v3 - tl train -c 16 example/tutorial_imagenet_inceptionV3_distributed.py - - # example of using GPU trainers for inception v3 with customized arguments - # as CUDA_VISIBLE_DEVICES is not given, tl would try to discover all available GPUs - tl train example/tutorial_imagenet_inceptionV3_distributed.py -- --batch_size 16 - - -Command-line Arguments ----------------------- - -- ``file``: python file path. - -- ``NUM_PSS`` : The number of parameter servers. - -- ``CPU_TRAINERS``: The number of CPU trainers. - - It is recommended that ``NUM_PSS + CPU_TRAINERS <= cpu count`` - -- ``args``: Any parameter after ``--`` would be passed to the python program. - - -Notes ------ -A parallel training program would require multiple parameter servers -to help parallel trainers to exchange intermediate gradients. -The best number of parameter servers is often proportional to the -size of your model as well as the number of CPUs available. -You can control the number of parameter servers using the ``-p`` parameter. - -If you have a single computer with massive CPUs, you can use the ``-c`` parameter -to enable CPU-only parallel training. -The reason we are not supporting GPU-CPU co-training is because GPU and -CPU are running at different speeds. Using them together in training would -incur stragglers. - -""" - -import argparse -import json -import multiprocessing -import os -import platform -import re -import subprocess -import sys - -PORT_BASE = 10000 - - -def _get_gpu_ids(): - if 'CUDA_VISIBLE_DEVICES' in os.environ: - return [int(x) for x in os.environ.get('CUDA_VISIBLE_DEVICES', '').split(',')] - if platform.system() in ['Darwin', 'Linux']: - return [int(d.replace('nvidia', '')) for d in os.listdir('/dev') if re.match('^nvidia\d+$', d)] - else: - print('Please set CUDA_VISIBLE_DEVICES (see http://acceleware.com/blog/cudavisibledevices-masking-gpus)') - return [] - - -GPU_IDS = _get_gpu_ids() - - -def create_tf_config(cluster_spec, task_type, task_index): - return { - 'cluster': cluster_spec, - 'task': { - 'type': task_type, - 'index': task_index - }, - } - - -def create_tf_jobs(cluster_spec, prog, args): - gpu_assignment = dict((('worker', idx), gpu_idx) for (idx, gpu_idx) in enumerate(GPU_IDS)) - for job_type in cluster_spec: - for task_index in range(len(cluster_spec[job_type])): - new_env = os.environ.copy() - new_env.update( - { - 'CUDA_VISIBLE_DEVICES': str(gpu_assignment.get((job_type, task_index), '')), - 'TF_CONFIG': json.dumps(create_tf_config(cluster_spec, job_type, task_index)), - } - ) - yield subprocess.Popen(['python3', prog] + args, env=new_env) - - -def validate_arguments(args): - if args.num_pss < 1: - print('Value error: must have ore than one parameter servers.') - exit(1) - - if not GPU_IDS: - num_cpus = multiprocessing.cpu_count() - if args.cpu_trainers > num_cpus: - print('Value error: there are %s available CPUs but you are requiring %s.' % (num_cpus, args.cpu_trainers)) - exit(1) - - if not os.path.isfile(args.file): - print('Value error: model trainning file does not exist') - exit(1) - - -def main(args): - validate_arguments(args) - num_workers = len(GPU_IDS) if GPU_IDS else args.cpu_trainers - print('Using program %s with args %s' % (args.file, ' '.join(args.args))) - print('Using %d workers, %d parameter servers, %d GPUs.' % (num_workers, args.num_pss, len(GPU_IDS))) - cluster_spec = { - 'ps': ['localhost: %d' % (PORT_BASE + i) for i in range(args.num_pss)], - 'worker': ['localhost: %d' % (PORT_BASE + args.num_pss + i) for i in range(num_workers)] - } - processes = list(create_tf_jobs(cluster_spec, args.file, args.args)) - try: - print('Press ENTER to exit the training ...') - sys.stdin.readline() - except KeyboardInterrupt: # https://docs.python.org/3/library/exceptions.html#KeyboardInterrupt - print('Keyboard interrupt received') - finally: - print('stopping all subprocesses ...') - for p in processes: - p.kill() - for p in processes: - p.wait() - print('END') - - -def build_arg_parser(parser): - parser.add_argument('-p', '--pss', dest='num_pss', type=int, default=1, help='number of parameter servers') - parser.add_argument('-c', '--cpu_trainers', dest='cpu_trainers', type=int, default=1, help='number of CPU trainers') - parser.add_argument('file', help='model trainning file path') - parser.add_argument('args', nargs='*', type=str, help='arguments to ') - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - build_arg_parser(parser) - args = parser.parse_args() - main(args) diff --git a/tensorlayer/cost.py b/tensorlayer/cost.py deleted file mode 100644 index 946ff31..0000000 --- a/tensorlayer/cost.py +++ /dev/null @@ -1,726 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import numbers - -import tensorflow as tf -from tensorflow.python.framework import ops -from tensorflow.python.ops import standard_ops - -import tensorlayer as tl - -__all__ = [ - 'cross_entropy', - 'sigmoid_cross_entropy', - 'binary_cross_entropy', - 'mean_squared_error', - 'normalized_mean_square_error', - 'absolute_difference_error', - 'dice_coe', - 'dice_hard_coe', - 'iou_coe', - 'cross_entropy_seq', - 'cross_entropy_seq_with_mask', - 'cosine_similarity', - 'li_regularizer', - 'lo_regularizer', - 'maxnorm_regularizer', - 'maxnorm_o_regularizer', - 'maxnorm_i_regularizer', -] - - -def cross_entropy(output, target, name=None): - """Softmax cross-entropy operation, returns the TensorFlow expression of cross-entropy for two distributions, - it implements softmax internally. See ``tf.nn.sparse_softmax_cross_entropy_with_logits``. - - Parameters - ---------- - output : Tensor - A batch of distribution with shape: [batch_size, num of classes]. - target : Tensor - A batch of index with shape: [batch_size, ]. - name : string - Name of this loss. - - Examples - -------- - >>> ce = tl.cost.cross_entropy(y_logits, y_target_logits, 'my_loss') - - References - ----------- - - About cross-entropy: ``__. - - The code is borrowed from: ``__. - - """ - if name is None: - raise Exception("Please give a unique name to tl.cost.cross_entropy for TF1.0+") - return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target, logits=output), name=name) - - -def sigmoid_cross_entropy(output, target, name=None): - """Sigmoid cross-entropy operation, see ``tf.nn.sigmoid_cross_entropy_with_logits``. - - Parameters - ---------- - output : Tensor - A batch of distribution with shape: [batch_size, num of classes]. - target : Tensor - A batch of index with shape: [batch_size, ]. - name : string - Name of this loss. - - """ - return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output), name=name) - - -def binary_cross_entropy(output, target, epsilon=1e-8, name='bce_loss'): - """Binary cross entropy operation. - - Parameters - ---------- - output : Tensor - Tensor with type of `float32` or `float64`. - target : Tensor - The target distribution, format the same with `output`. - epsilon : float - A small value to avoid output to be zero. - name : str - An optional name to attach to this function. - - References - ----------- - - `ericjang-DRAW `__ - - """ - # with ops.op_scope([output, target], name, "bce_loss") as name: - # output = ops.convert_to_tensor(output, name="preds") - # target = ops.convert_to_tensor(targets, name="target") - - # with tf.name_scope(name): - return tf.reduce_mean( - tf.reduce_sum(-(target * tf.log(output + epsilon) + (1. - target) * tf.log(1. - output + epsilon)), axis=1), - name=name - ) - - # For brevity, let `x = output`, `z = target`. The binary cross entropy loss is - # - # loss(x, z) = - sum_i (x[i] * log(z[i]) + (1 - x[i]) * log(1 - z[i])) - - -def mean_squared_error(output, target, is_mean=False, axis=-1, name="mean_squared_error"): - """Return the TensorFlow expression of mean-square-error (L2) of two batch of data. - - Parameters - ---------- - output : Tensor - 2D, 3D or 4D tensor i.e. [batch_size, n_feature], [batch_size, height, width] or [batch_size, height, width, channel]. - target : Tensor - The target distribution, format the same with `output`. - is_mean : boolean - Whether compute the mean or sum for each example. - - If True, use ``tf.reduce_mean`` to compute the loss between one target and predict data. - - If False, use ``tf.reduce_sum`` (default). - axis : int or list of int - The dimensions to reduce. - name : str - An optional name to attach to this function. - - References - ------------ - - `Wiki Mean Squared Error `__ - - """ - # with tf.name_scope(name): - # if len(output.shape) == 2: # [batch_size, n_feature] - # axis = 1 - # elif len(output.shape) == 3: # [batch_size, w, h] - # axis = [1, 2] - # elif len(output.shape) == 4: # [batch_size, w, h, c] - # axis = [1, 2, 3] - # else: - # raise Exception("Unknow dimension") - - if is_mean: - mse = tf.reduce_mean(tf.reduce_mean(tf.squared_difference(output, target), axis), name=name) - else: - mse = tf.reduce_mean(tf.reduce_sum(tf.squared_difference(output, target), axis), name=name) - return mse - - -def normalized_mean_square_error(output, target, axis=-1, name="normalized_mean_squared_error_loss"): - """Return the TensorFlow expression of normalized mean-square-error of two distributions. - - Parameters - ---------- - output : Tensor - 2D, 3D or 4D tensor i.e. [batch_size, n_feature], [batch_size, height, width] or [batch_size, height, width, channel]. - target : Tensor - The target distribution, format the same with `output`. - axis : int or list of int - The dimensions to reduce. - name : str - An optional name to attach to this function. - - """ - with tf.name_scope("normalized_mean_squared_error_loss"): - # if len(output.shape) == 2: # [batch_size, n_feature] - # axis = 1 - # elif len(output.shape) == 3: # [batch_size, w, h] - # axis = [1, 2] - # elif len(output.shape) == 4: # [batch_size, w, h, c] - # axis = [1, 2, 3] - nmse_a = tf.sqrt(tf.reduce_sum(tf.squared_difference(output, target), axis=axis)) - nmse_b = tf.sqrt(tf.reduce_sum(tf.square(target), axis=axis)) - nmse = tf.reduce_mean(nmse_a / nmse_b, name=name) - return nmse - - -def absolute_difference_error(output, target, is_mean=False, axis=-1, name="absolute_difference_error_loss"): - """Return the TensorFlow expression of absolute difference error (L1) of two batch of data. - - Parameters - ---------- - output : Tensor - 2D, 3D or 4D tensor i.e. [batch_size, n_feature], [batch_size, height, width] or [batch_size, height, width, channel]. - target : Tensor - The target distribution, format the same with `output`. - is_mean : boolean - Whether compute the mean or sum for each example. - - If True, use ``tf.reduce_mean`` to compute the loss between one target and predict data. - - If False, use ``tf.reduce_sum`` (default). - axis : int or list of int - The dimensions to reduce. - name : str - An optional name to attach to this function. - - """ - # # with tf.name_scope("absolute_difference_error_loss"): - # if len(output.shape) == 2: # [batch_size, n_feature] - # axis = 1 - # elif len(output.shape) == 3: # [batch_size, w, h] - # axis = [1, 2] - # elif len(output.shape) == 4: # [batch_size, w, h, c] - # axis = [1, 2, 3] - # else: - # raise Exception("Unknow dimension") - if is_mean: - loss = tf.reduce_mean(tf.reduce_mean(tf.abs(output - target), axis), name=name) - else: - loss = tf.reduce_mean(tf.reduce_sum(tf.abs(output - target), axis), name=name) - return loss - - -def dice_coe(output, target, loss_type='jaccard', axis=(1, 2, 3), smooth=1e-5): - """Soft dice (Sørensen or Jaccard) coefficient for comparing the similarity - of two batch of data, usually be used for binary image segmentation - i.e. labels are binary. The coefficient between 0 to 1, 1 means totally match. - - Parameters - ----------- - output : Tensor - A distribution with shape: [batch_size, ....], (any dimensions). - target : Tensor - The target distribution, format the same with `output`. - loss_type : str - ``jaccard`` or ``sorensen``, default is ``jaccard``. - axis : tuple of int - All dimensions are reduced, default ``[1,2,3]``. - smooth : float - This small value will be added to the numerator and denominator. - - If both output and target are empty, it makes sure dice is 1. - - If either output or target are empty (all pixels are background), dice = ```smooth/(small_value + smooth)``, then if smooth is very small, dice close to 0 (even the image values lower than the threshold), so in this case, higher smooth can have a higher dice. - - Examples - --------- - >>> outputs = tl.act.pixel_wise_softmax(network.outputs) - >>> dice_loss = 1 - tl.cost.dice_coe(outputs, y_) - - References - ----------- - - `Wiki-Dice `__ - - """ - inse = tf.reduce_sum(output * target, axis=axis) - if loss_type == 'jaccard': - l = tf.reduce_sum(output * output, axis=axis) - r = tf.reduce_sum(target * target, axis=axis) - elif loss_type == 'sorensen': - l = tf.reduce_sum(output, axis=axis) - r = tf.reduce_sum(target, axis=axis) - else: - raise Exception("Unknow loss_type") - # old axis=[0,1,2,3] - # dice = 2 * (inse) / (l + r) - # epsilon = 1e-5 - # dice = tf.clip_by_value(dice, 0, 1.0-epsilon) # if all empty, dice = 1 - # new haodong - dice = (2. * inse + smooth) / (l + r + smooth) - ## - dice = tf.reduce_mean(dice, name='dice_coe') - return dice - - -def dice_hard_coe(output, target, threshold=0.5, axis=(1, 2, 3), smooth=1e-5): - """Non-differentiable Sørensen–Dice coefficient for comparing the similarity - of two batch of data, usually be used for binary image segmentation i.e. labels are binary. - The coefficient between 0 to 1, 1 if totally match. - - Parameters - ----------- - output : tensor - A distribution with shape: [batch_size, ....], (any dimensions). - target : tensor - The target distribution, format the same with `output`. - threshold : float - The threshold value to be true. - axis : tuple of integer - All dimensions are reduced, default ``(1,2,3)``. - smooth : float - This small value will be added to the numerator and denominator, see ``dice_coe``. - - References - ----------- - - `Wiki-Dice `__ - - """ - output = tf.cast(output > threshold, dtype=tf.float32) - target = tf.cast(target > threshold, dtype=tf.float32) - inse = tf.reduce_sum(tf.multiply(output, target), axis=axis) - l = tf.reduce_sum(output, axis=axis) - r = tf.reduce_sum(target, axis=axis) - # old axis=[0,1,2,3] - # hard_dice = 2 * (inse) / (l + r) - # epsilon = 1e-5 - # hard_dice = tf.clip_by_value(hard_dice, 0, 1.0-epsilon) - # new haodong - hard_dice = (2. * inse + smooth) / (l + r + smooth) - ## - hard_dice = tf.reduce_mean(hard_dice, name='hard_dice') - return hard_dice - - -def iou_coe(output, target, threshold=0.5, axis=(1, 2, 3), smooth=1e-5): - """Non-differentiable Intersection over Union (IoU) for comparing the - similarity of two batch of data, usually be used for evaluating binary image segmentation. - The coefficient between 0 to 1, and 1 means totally match. - - Parameters - ----------- - output : tensor - A batch of distribution with shape: [batch_size, ....], (any dimensions). - target : tensor - The target distribution, format the same with `output`. - threshold : float - The threshold value to be true. - axis : tuple of integer - All dimensions are reduced, default ``(1,2,3)``. - smooth : float - This small value will be added to the numerator and denominator, see ``dice_coe``. - - Notes - ------ - - IoU cannot be used as training loss, people usually use dice coefficient for training, IoU and hard-dice for evaluating. - - """ - pre = tf.cast(output > threshold, dtype=tf.float32) - truth = tf.cast(target > threshold, dtype=tf.float32) - inse = tf.reduce_sum(tf.multiply(pre, truth), axis=axis) # AND - union = tf.reduce_sum(tf.cast(tf.add(pre, truth) >= 1, dtype=tf.float32), axis=axis) # OR - # old axis=[0,1,2,3] - # epsilon = 1e-5 - # batch_iou = inse / (union + epsilon) - # new haodong - batch_iou = (inse + smooth) / (union + smooth) - iou = tf.reduce_mean(batch_iou, name='iou_coe') - return iou # , pre, truth, inse, union - - -# ## test soft/hard dice and iou -# import numpy as np -# y = np.zeros((1,10,10,1)) -# # y[0,0:5,0:5]=1.0 -# o = np.zeros((1,10,10,1)) -# # o[:,:,:,:] = 0 # what we want: dice=0 iou=0 OK -# # o[0,0:2,0:2]=0.3 # what we want: dice larger iou=0 OK -# # o[0,0:2,0:2]=0.6 # what we want: dice larger iou small OK -# # o[0,0:3,0:3]=0.6 # what we want: dice larger iou larger OK -# # o[0,0:3,0:3]=1 # what we want: dice larger iou same OK -# # o[0,0:5,0:5]=1 # what we want: dice=1 iou=1 OK -# # o[0,0:5,0:5]=0.3 # what we want: dice smaller iou=0 OK -# # o[0,0:5,0:5]=1e-2 # what we want: dice≈0 iou=0 OK -# # o[0,8:10,8:10]=1.0 # what we want: dice=0 iou=0 OK -# # o[0,8:10,8:10]=1e-10 # what we want: dice=0 iou=0 OK -# # y[:,:,:,:] = o[:,:,:,:] = 0 # what we want: dice=1 iou=1 OK -# ## why in u-net, dice=1 hard-dice=1 iou=1 exist?? print bug? -# -# d = dice_coe(o, y, 'jaccard', smooth=1.) -# hd = dice_hard_coe(o, y, smooth=1e-5) -# i = iou_coe(o, y, smooth=1e-5) -# sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) -# # sess.run(tf.local_variables_initializer()) -# print(sess.run([d,hd,i])) -# # p, t, i, u = sess.run([pre, truth, inse, union]) -# # import pprint -# # pprint.pprint(((y>0.5)*(o>0.5)).astype(int).tolist()) -# # pprint.pprint(p.tolist()) -# # pprint.pprint(t.tolist()) -# # pprint.pprint(i) -# # pprint.pprint(u) -# exit() - - -def cross_entropy_seq(logits, target_seqs, batch_size=None): # , batch_size=1, num_steps=None): - """Returns the expression of cross-entropy of two sequences, implement - softmax internally. Normally be used for fixed length RNN outputs, see `PTB example `__. - - Parameters - ---------- - logits : Tensor - 2D tensor with shape of `[batch_size * n_steps, n_classes]`. - target_seqs : Tensor - The target sequence, 2D tensor `[batch_size, n_steps]`, if the number of step is dynamic, please use ``tl.cost.cross_entropy_seq_with_mask`` instead. - batch_size : None or int. - Whether to divide the cost by batch size. - - If integer, the return cost will be divided by `batch_size`. - - If None (default), the return cost will not be divided by anything. - - Examples - -------- - >>> see `PTB example `__.for more details - >>> input_data = tf.placeholder(tf.int32, [batch_size, n_steps]) - >>> targets = tf.placeholder(tf.int32, [batch_size, n_steps]) - >>> # build the network - >>> print(net.outputs) - (batch_size * n_steps, n_classes) - >>> cost = tl.cost.cross_entropy_seq(network.outputs, targets) - - """ - sequence_loss_by_example_fn = tf.contrib.legacy_seq2seq.sequence_loss_by_example - - loss = sequence_loss_by_example_fn( - [logits], [tf.reshape(target_seqs, [-1])], [tf.ones_like(tf.reshape(target_seqs, [-1]), dtype=tf.float32)] - ) - # [tf.ones([batch_size * num_steps])]) - cost = tf.reduce_sum(loss) # / batch_size - if batch_size is not None: - cost = cost / batch_size - return cost - - -def cross_entropy_seq_with_mask(logits, target_seqs, input_mask, return_details=False, name=None): - """Returns the expression of cross-entropy of two sequences, implement - softmax internally. Normally be used for Dynamic RNN with Synced sequence input and output. - - Parameters - ----------- - logits : Tensor - 2D tensor with shape of [batch_size * ?, n_classes], `?` means dynamic IDs for each example. - - Can be get from `DynamicRNNLayer` by setting ``return_seq_2d`` to `True`. - target_seqs : Tensor - int of tensor, like word ID. [batch_size, ?], `?` means dynamic IDs for each example. - input_mask : Tensor - The mask to compute loss, it has the same size with `target_seqs`, normally 0 or 1. - return_details : boolean - Whether to return detailed losses. - - If False (default), only returns the loss. - - If True, returns the loss, losses, weights and targets (see source code). - - Examples - -------- - >>> batch_size = 64 - >>> vocab_size = 10000 - >>> embedding_size = 256 - >>> input_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="input") - >>> target_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="target") - >>> input_mask = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="mask") - >>> net = tl.layers.EmbeddingInputlayer( - ... inputs = input_seqs, - ... vocabulary_size = vocab_size, - ... embedding_size = embedding_size, - ... name = 'seq_embedding') - >>> net = tl.layers.DynamicRNNLayer(net, - ... cell_fn = tf.contrib.rnn.BasicLSTMCell, - ... n_hidden = embedding_size, - ... dropout = (0.7 if is_train else None), - ... sequence_length = tl.layers.retrieve_seq_length_op2(input_seqs), - ... return_seq_2d = True, - ... name = 'dynamicrnn') - >>> print(net.outputs) - (?, 256) - >>> net = tl.layers.DenseLayer(net, n_units=vocab_size, name="output") - >>> print(net.outputs) - (?, 10000) - >>> loss = tl.cost.cross_entropy_seq_with_mask(net.outputs, target_seqs, input_mask) - - """ - targets = tf.reshape(target_seqs, [-1]) # to one vector - weights = tf.to_float(tf.reshape(input_mask, [-1])) # to one vector like targets - losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=targets, name=name) * weights - # losses = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=targets, name=name)) # for TF1.0 and others - - loss = tf.divide( - tf.reduce_sum(losses), # loss from mask. reduce_sum before element-wise mul with mask !! - tf.reduce_sum(weights), - name="seq_loss_with_mask" - ) - - if return_details: - return loss, losses, weights, targets - else: - return loss - - -def cosine_similarity(v1, v2): - """Cosine similarity [-1, 1]. - - Parameters - ---------- - v1, v2 : Tensor - Tensor with the same shape [batch_size, n_feature]. - - References - ---------- - - `Wiki `__. - - """ - - return tf.reduce_sum(tf.multiply(v1, v2), 1) / \ - (tf.sqrt(tf.reduce_sum(tf.multiply(v1, v1), 1)) * - tf.sqrt(tf.reduce_sum(tf.multiply(v2, v2), 1))) - - -# Regularization Functions -def li_regularizer(scale, scope=None): - """Li regularization removes the neurons of previous layer. The `i` represents `inputs`. - Returns a function that can be used to apply group li regularization to weights. - The implementation follows `TensorFlow contrib `__. - - Parameters - ---------- - scale : float - A scalar multiplier `Tensor`. 0.0 disables the regularizer. - scope: str - An optional scope name for this function. - - Returns - -------- - A function with signature `li(weights, name=None)` that apply Li regularization. - - Raises - ------ - ValueError : if scale is outside of the range [0.0, 1.0] or if scale is not a float. - - """ - if isinstance(scale, numbers.Integral): - raise ValueError('scale cannot be an integer: %s' % scale) - if isinstance(scale, numbers.Real): - if scale < 0.: - raise ValueError('Setting a scale less than 0 on a regularizer: %g' % scale) - if scale >= 1.: - raise ValueError('Setting a scale greater than 1 on a regularizer: %g' % scale) - if scale == 0.: - tl.logging.info('Scale of 0 disables regularizer.') - return lambda _, name=None: None - - def li(weights): - """Applies li regularization to weights.""" - with tf.name_scope('li_regularizer') as scope: - my_scale = ops.convert_to_tensor(scale, dtype=weights.dtype.base_dtype, name='scale') - # if tf.__version__ <= '0.12': - # standard_ops_fn = standard_ops.mul - # else: - standard_ops_fn = standard_ops.multiply - return standard_ops_fn( - my_scale, standard_ops.reduce_sum(standard_ops.sqrt(standard_ops.reduce_sum(tf.square(weights), 1))), - name=scope - ) - - return li - - -def lo_regularizer(scale): - """Lo regularization removes the neurons of current layer. The `o` represents `outputs` - Returns a function that can be used to apply group lo regularization to weights. - The implementation follows `TensorFlow contrib `__. - - Parameters - ---------- - scale : float - A scalar multiplier `Tensor`. 0.0 disables the regularizer. - - Returns - ------- - A function with signature `lo(weights, name=None)` that apply Lo regularization. - - Raises - ------ - ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float. - - """ - if isinstance(scale, numbers.Integral): - raise ValueError('scale cannot be an integer: %s' % scale) - - if isinstance(scale, numbers.Real): - if scale < 0.: - raise ValueError('Setting a scale less than 0 on a regularizer: %g' % scale) - if scale >= 1.: - raise ValueError('Setting a scale greater than 1 on a regularizer: %g' % scale) - if scale == 0.: - tl.logging.info('Scale of 0 disables regularizer.') - return lambda _, name=None: None - - def lo(weights, name='lo_regularizer'): - """Applies group column regularization to weights.""" - with tf.name_scope(name) as scope: - my_scale = ops.convert_to_tensor(scale, dtype=weights.dtype.base_dtype, name='scale') - # if tf.__version__ <= '0.12': - # standard_ops_fn = standard_ops.mul - # else: - standard_ops_fn = standard_ops.multiply - return standard_ops_fn( - my_scale, standard_ops.reduce_sum(standard_ops.sqrt(standard_ops.reduce_sum(tf.square(weights), 0))), - name=scope - ) - - return lo - - -def maxnorm_regularizer(scale=1.0): - """Max-norm regularization returns a function that can be used to apply max-norm regularization to weights. - - More about max-norm, see `wiki-max norm `_. - The implementation follows `TensorFlow contrib `__. - - Parameters - ---------- - scale : float - A scalar multiplier `Tensor`. 0.0 disables the regularizer. - - Returns - --------- - A function with signature `mn(weights, name=None)` that apply Lo regularization. - - Raises - -------- - ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float. - - """ - if isinstance(scale, numbers.Integral): - raise ValueError('scale cannot be an integer: %s' % scale) - - if isinstance(scale, numbers.Real): - if scale < 0.: - raise ValueError('Setting a scale less than 0 on a regularizer: %g' % scale) - # if scale >= 1.: - # raise ValueError('Setting a scale greater than 1 on a regularizer: %g' % - # scale) - if scale == 0.: - tl.logging.info('Scale of 0 disables regularizer.') - return lambda _, name=None: None - - def mn(weights, name='max_regularizer'): - """Applies max-norm regularization to weights.""" - with tf.name_scope(name) as scope: - my_scale = ops.convert_to_tensor(scale, dtype=weights.dtype.base_dtype, name='scale') - # if tf.__version__ <= '0.12': - # standard_ops_fn = standard_ops.mul - # else: - standard_ops_fn = standard_ops.multiply - return standard_ops_fn(my_scale, standard_ops.reduce_max(standard_ops.abs(weights)), name=scope) - - return mn - - -def maxnorm_o_regularizer(scale): - """Max-norm output regularization removes the neurons of current layer. - Returns a function that can be used to apply max-norm regularization to each column of weight matrix. - The implementation follows `TensorFlow contrib `__. - - Parameters - ---------- - scale : float - A scalar multiplier `Tensor`. 0.0 disables the regularizer. - - Returns - --------- - A function with signature `mn_o(weights, name=None)` that apply Lo regularization. - - Raises - --------- - ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float. - - """ - if isinstance(scale, numbers.Integral): - raise ValueError('scale cannot be an integer: %s' % scale) - - if isinstance(scale, numbers.Real): - if scale < 0.: - raise ValueError('Setting a scale less than 0 on a regularizer: %g' % scale) - # if scale >= 1.: - # raise ValueError('Setting a scale greater than 1 on a regularizer: %g' % - # scale) - if scale == 0.: - tl.logging.info('Scale of 0 disables regularizer.') - return lambda _, name=None: None - - def mn_o(weights, name='maxnorm_o_regularizer'): - """Applies max-norm regularization to weights.""" - with tf.name_scope(name) as scope: - my_scale = ops.convert_to_tensor(scale, dtype=weights.dtype.base_dtype, name='scale') - if tf.__version__ <= '0.12': - standard_ops_fn = standard_ops.mul - else: - standard_ops_fn = standard_ops.multiply - return standard_ops_fn( - my_scale, standard_ops.reduce_sum(standard_ops.reduce_max(standard_ops.abs(weights), 0)), name=scope - ) - - return mn_o - - -def maxnorm_i_regularizer(scale): - """Max-norm input regularization removes the neurons of previous layer. - Returns a function that can be used to apply max-norm regularization to each row of weight matrix. - The implementation follows `TensorFlow contrib `__. - - Parameters - ---------- - scale : float - A scalar multiplier `Tensor`. 0.0 disables the regularizer. - - Returns - --------- - A function with signature `mn_i(weights, name=None)` that apply Lo regularization. - - Raises - --------- - ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float. - - """ - if isinstance(scale, numbers.Integral): - raise ValueError('scale cannot be an integer: %s' % scale) - - if isinstance(scale, numbers.Real): - if scale < 0.: - raise ValueError('Setting a scale less than 0 on a regularizer: %g' % scale) - # if scale >= 1.: - # raise ValueError('Setting a scale greater than 1 on a regularizer: %g' % - # scale) - if scale == 0.: - tl.logging.info('Scale of 0 disables regularizer.') - return lambda _, name=None: None - - def mn_i(weights, name='maxnorm_i_regularizer'): - """Applies max-norm regularization to weights.""" - with tf.name_scope(name) as scope: - my_scale = ops.convert_to_tensor(scale, dtype=weights.dtype.base_dtype, name='scale') - if tf.__version__ <= '0.12': - standard_ops_fn = standard_ops.mul - else: - standard_ops_fn = standard_ops.multiply - return standard_ops_fn( - my_scale, standard_ops.reduce_sum(standard_ops.reduce_max(standard_ops.abs(weights), 1)), name=scope - ) - - return mn_i diff --git a/tensorlayer/db.py b/tensorlayer/db.py deleted file mode 100644 index 3e86566..0000000 --- a/tensorlayer/db.py +++ /dev/null @@ -1,744 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import pickle -import time -import os -import sys -from datetime import datetime - -import gridfs -import pymongo - -# from tensorlayer.files import load_graph_and_params -from tensorlayer.files import exists_or_mkdir -from tensorlayer.files import del_folder - -from tensorlayer import logging - -import tensorflow as tf -import numpy as np - - -class TensorHub(object): - """It is a MongoDB based manager that help you to manage data, network architecture, parameters and logging. - - Parameters - ------------- - ip : str - Localhost or IP address. - port : int - Port number. - dbname : str - Database name. - username : str or None - User name, set to None if you do not need authentication. - password : str - Password. - project_name : str or None - Experiment key for this entire project, similar with the repository name of Github. - - Attributes - ------------ - ip, port, dbname and other input parameters : see above - See above. - project_name : str - The given project name, if no given, set to the script name. - db : mongodb client - See ``pymongo.MongoClient``. - """ - - # @deprecated_alias(db_name='dbname', user_name='username', end_support_version=2.1) - def __init__( - self, ip='localhost', port=27017, dbname='dbname', username='None', password='password', project_name=None - ): - self.ip = ip - self.port = port - self.dbname = dbname - self.username = username - - print("[Database] Initializing ...") - # connect mongodb - client = pymongo.MongoClient(ip, port) - self.db = client[dbname] - if username is None: - print(username, password) - self.db.authenticate(username, password) - else: - print("[Database] No username given, it works if authentication is not required") - if project_name is None: - self.project_name = sys.argv[0].split('.')[0] - print("[Database] No project_name given, use {}".format(self.project_name)) - else: - self.project_name = project_name - - # define file system (Buckets) - self.dataset_fs = gridfs.GridFS(self.db, collection="datasetFilesystem") - self.model_fs = gridfs.GridFS(self.db, collection="modelfs") - # self.params_fs = gridfs.GridFS(self.db, collection="parametersFilesystem") - # self.architecture_fs = gridfs.GridFS(self.db, collection="architectureFilesystem") - - print("[Database] Connected ") - _s = "[Database] Info:\n" - _s += " ip : {}\n".format(self.ip) - _s += " port : {}\n".format(self.port) - _s += " dbname : {}\n".format(self.dbname) - _s += " username : {}\n".format(self.username) - _s += " password : {}\n".format("*******") - _s += " project_name : {}\n".format(self.project_name) - self._s = _s - print(self._s) - - def __str__(self): - """Print information of databset.""" - return self._s - - def _fill_project_info(self, args): - """Fill in project_name for all studies, architectures and parameters.""" - return args.update({'project_name': self.project_name}) - - @staticmethod - def _serialization(ps): - """Serialize data.""" - return pickle.dumps(ps, protocol=pickle.HIGHEST_PROTOCOL) # protocol=2) - # with open('_temp.pkl', 'wb') as file: - # return pickle.dump(ps, file, protocol=pickle.HIGHEST_PROTOCOL) - - @staticmethod - def _deserialization(ps): - """Deseralize data.""" - return pickle.loads(ps) - - # =========================== MODELS ================================ - def save_model(self, network=None, model_name='model', **kwargs): - """Save model architecture and parameters into database, timestamp will be added automatically. - - Parameters - ---------- - network : TensorLayer layer - TensorLayer layer instance. - model_name : str - The name/key of model. - kwargs : other events - Other events, such as name, accuracy, loss, step number and etc (optinal). - - Examples - --------- - Save model architecture and parameters into database. - >>> db.save_model(net, accuracy=0.8, loss=2.3, name='second_model') - - Load one model with parameters from database (run this in other script) - >>> net = db.find_top_model(sess=sess, accuracy=0.8, loss=2.3) - - Find and load the latest model. - >>> net = db.find_top_model(sess=sess, sort=[("time", pymongo.DESCENDING)]) - >>> net = db.find_top_model(sess=sess, sort=[("time", -1)]) - - Find and load the oldest model. - >>> net = db.find_top_model(sess=sess, sort=[("time", pymongo.ASCENDING)]) - >>> net = db.find_top_model(sess=sess, sort=[("time", 1)]) - - Get model information - >>> net._accuracy - ... 0.8 - - Returns - --------- - boolean : True for success, False for fail. - """ - kwargs.update({'model_name': model_name}) - self._fill_project_info(kwargs) # put project_name into kwargs - - params = network.get_all_params() - - s = time.time() - - kwargs.update({'architecture': network.all_graphs, 'time': datetime.utcnow()}) - - try: - params_id = self.model_fs.put(self._serialization(params)) - kwargs.update({'params_id': params_id, 'time': datetime.utcnow()}) - self.db.Model.insert_one(kwargs) - print("[Database] Save model: SUCCESS, took: {}s".format(round(time.time() - s, 2))) - return True - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] - logging.info("{} {} {} {} {}".format(exc_type, exc_obj, fname, exc_tb.tb_lineno, e)) - print("[Database] Save model: FAIL") - return False - - def find_top_model(self, sess, sort=None, model_name='model', **kwargs): - """Finds and returns a model architecture and its parameters from the database which matches the requirement. - - Parameters - ---------- - sess : Session - TensorFlow session. - sort : List of tuple - PyMongo sort comment, search "PyMongo find one sorting" and `collection level operations `__ for more details. - model_name : str or None - The name/key of model. - kwargs : other events - Other events, such as name, accuracy, loss, step number and etc (optinal). - - Examples - --------- - - see ``save_model``. - - Returns - --------- - network : TensorLayer layer - Note that, the returned network contains all information of the document (record), e.g. if you saved accuracy in the document, you can get the accuracy by using ``net._accuracy``. - """ - # print(kwargs) # {} - kwargs.update({'model_name': model_name}) - self._fill_project_info(kwargs) - - s = time.time() - - d = self.db.Model.find_one(filter=kwargs, sort=sort) - - _temp_file_name = '_find_one_model_ztemp_file' - if d is not None: - params_id = d['params_id'] - graphs = d['architecture'] - _datetime = d['time'] - exists_or_mkdir(_temp_file_name, False) - with open(os.path.join(_temp_file_name, 'graph.pkl'), 'wb') as file: - pickle.dump(graphs, file, protocol=pickle.HIGHEST_PROTOCOL) - else: - print("[Database] FAIL! Cannot find model: {}".format(kwargs)) - return False - try: - params = self._deserialization(self.model_fs.get(params_id).read()) - np.savez(os.path.join(_temp_file_name, 'params.npz'), params=params) - - network = load_graph_and_params(name=_temp_file_name, sess=sess) - del_folder(_temp_file_name) - - pc = self.db.Model.find(kwargs) - print( - "[Database] Find one model SUCCESS. kwargs:{} sort:{} save time:{} took: {}s". - format(kwargs, sort, _datetime, round(time.time() - s, 2)) - ) - - # put all informations of model into the TL layer - for key in d: - network.__dict__.update({"_%s" % key: d[key]}) - - # check whether more parameters match the requirement - params_id_list = pc.distinct('params_id') - n_params = len(params_id_list) - if n_params != 1: - print(" Note that there are {} models match the kwargs".format(n_params)) - return network - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] - logging.info("{} {} {} {} {}".format(exc_type, exc_obj, fname, exc_tb.tb_lineno, e)) - return False - - def delete_model(self, **kwargs): - """Delete model. - - Parameters - ----------- - kwargs : logging information - Find items to delete, leave it empty to delete all log. - """ - self._fill_project_info(kwargs) - self.db.Model.delete_many(kwargs) - logging.info("[Database] Delete Model SUCCESS") - - # =========================== DATASET =============================== - def save_dataset(self, dataset=None, dataset_name=None, **kwargs): - """Saves one dataset into database, timestamp will be added automatically. - - Parameters - ---------- - dataset : any type - The dataset you want to store. - dataset_name : str - The name of dataset. - kwargs : other events - Other events, such as description, author and etc (optinal). - - Examples - ---------- - Save dataset - >>> db.save_dataset([X_train, y_train, X_test, y_test], 'mnist', description='this is a tutorial') - - Get dataset - >>> dataset = db.find_top_dataset('mnist') - - Returns - --------- - boolean : Return True if save success, otherwise, return False. - """ - self._fill_project_info(kwargs) - if dataset_name is None: - raise Exception("dataset_name is None, please give a dataset name") - kwargs.update({'dataset_name': dataset_name}) - - s = time.time() - try: - dataset_id = self.dataset_fs.put(self._serialization(dataset)) - kwargs.update({'dataset_id': dataset_id, 'time': datetime.utcnow()}) - self.db.Dataset.insert_one(kwargs) - # print("[Database] Save params: {} SUCCESS, took: {}s".format(file_name, round(time.time()-s, 2))) - print("[Database] Save dataset: SUCCESS, took: {}s".format(round(time.time() - s, 2))) - return True - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] - logging.info("{} {} {} {} {}".format(exc_type, exc_obj, fname, exc_tb.tb_lineno, e)) - print("[Database] Save dataset: FAIL") - return False - - def find_top_dataset(self, dataset_name=None, sort=None, **kwargs): - """Finds and returns a dataset from the database which matches the requirement. - - Parameters - ---------- - dataset_name : str - The name of dataset. - sort : List of tuple - PyMongo sort comment, search "PyMongo find one sorting" and `collection level operations `__ for more details. - kwargs : other events - Other events, such as description, author and etc (optinal). - - Examples - --------- - Save dataset - >>> db.save_dataset([X_train, y_train, X_test, y_test], 'mnist', description='this is a tutorial') - - Get dataset - >>> dataset = db.find_top_dataset('mnist') - >>> datasets = db.find_datasets('mnist') - - Returns - -------- - dataset : the dataset or False - Return False if nothing found. - - """ - - self._fill_project_info(kwargs) - if dataset_name is None: - raise Exception("dataset_name is None, please give a dataset name") - kwargs.update({'dataset_name': dataset_name}) - - s = time.time() - - d = self.db.Dataset.find_one(filter=kwargs, sort=sort) - - if d is not None: - dataset_id = d['dataset_id'] - else: - print("[Database] FAIL! Cannot find dataset: {}".format(kwargs)) - return False - try: - dataset = self._deserialization(self.dataset_fs.get(dataset_id).read()) - pc = self.db.Dataset.find(kwargs) - print("[Database] Find one dataset SUCCESS, {} took: {}s".format(kwargs, round(time.time() - s, 2))) - - # check whether more datasets match the requirement - dataset_id_list = pc.distinct('dataset_id') - n_dataset = len(dataset_id_list) - if n_dataset != 1: - print(" Note that there are {} datasets match the requirement".format(n_dataset)) - return dataset - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] - logging.info("{} {} {} {} {}".format(exc_type, exc_obj, fname, exc_tb.tb_lineno, e)) - return False - - def find_datasets(self, dataset_name=None, **kwargs): - """Finds and returns all datasets from the database which matches the requirement. - In some case, the data in a dataset can be stored separately for better management. - - Parameters - ---------- - dataset_name : str - The name/key of dataset. - kwargs : other events - Other events, such as description, author and etc (optional). - - Returns - -------- - params : the parameters, return False if nothing found. - - """ - - self._fill_project_info(kwargs) - if dataset_name is None: - raise Exception("dataset_name is None, please give a dataset name") - kwargs.update({'dataset_name': dataset_name}) - - s = time.time() - pc = self.db.Dataset.find(kwargs) - - if pc is not None: - dataset_id_list = pc.distinct('dataset_id') - dataset_list = [] - for dataset_id in dataset_id_list: # you may have multiple Buckets files - tmp = self.dataset_fs.get(dataset_id).read() - dataset_list.append(self._deserialization(tmp)) - else: - print("[Database] FAIL! Cannot find any dataset: {}".format(kwargs)) - return False - - print("[Database] Find {} datasets SUCCESS, took: {}s".format(len(dataset_list), round(time.time() - s, 2))) - return dataset_list - - def delete_datasets(self, **kwargs): - """Delete datasets. - - Parameters - ----------- - kwargs : logging information - Find items to delete, leave it empty to delete all log. - - """ - - self._fill_project_info(kwargs) - self.db.Dataset.delete_many(kwargs) - logging.info("[Database] Delete Dataset SUCCESS") - - # =========================== LOGGING =============================== - def save_training_log(self, **kwargs): - """Saves the training log, timestamp will be added automatically. - - Parameters - ----------- - kwargs : logging information - Events, such as accuracy, loss, step number and etc. - - Examples - --------- - >>> db.save_training_log(accuracy=0.33, loss=0.98) - - """ - - self._fill_project_info(kwargs) - kwargs.update({'time': datetime.utcnow()}) - _result = self.db.TrainLog.insert_one(kwargs) - _log = self._print_dict(kwargs) - logging.info("[Database] train log: " + _log) - - def save_validation_log(self, **kwargs): - """Saves the validation log, timestamp will be added automatically. - - Parameters - ----------- - kwargs : logging information - Events, such as accuracy, loss, step number and etc. - - Examples - --------- - >>> db.save_validation_log(accuracy=0.33, loss=0.98) - - """ - - self._fill_project_info(kwargs) - kwargs.update({'time': datetime.utcnow()}) - _result = self.db.ValidLog.insert_one(kwargs) - _log = self._print_dict(kwargs) - logging.info("[Database] valid log: " + _log) - - def save_testing_log(self, **kwargs): - """Saves the testing log, timestamp will be added automatically. - - Parameters - ----------- - kwargs : logging information - Events, such as accuracy, loss, step number and etc. - - Examples - --------- - >>> db.save_testing_log(accuracy=0.33, loss=0.98) - - """ - - self._fill_project_info(kwargs) - kwargs.update({'time': datetime.utcnow()}) - _result = self.db.TestLog.insert_one(kwargs) - _log = self._print_dict(kwargs) - logging.info("[Database] test log: " + _log) - - def delete_training_log(self, **kwargs): - """Deletes training log. - - Parameters - ----------- - kwargs : logging information - Find items to delete, leave it empty to delete all log. - - Examples - --------- - Save training log - >>> db.save_training_log(accuracy=0.33) - >>> db.save_training_log(accuracy=0.44) - - Delete logs that match the requirement - >>> db.delete_training_log(accuracy=0.33) - - Delete all logs - >>> db.delete_training_log() - """ - self._fill_project_info(kwargs) - self.db.TrainLog.delete_many(kwargs) - logging.info("[Database] Delete TrainLog SUCCESS") - - def delete_validation_log(self, **kwargs): - """Deletes validation log. - - Parameters - ----------- - kwargs : logging information - Find items to delete, leave it empty to delete all log. - - Examples - --------- - - see ``save_training_log``. - """ - self._fill_project_info(kwargs) - self.db.ValidLog.delete_many(kwargs) - logging.info("[Database] Delete ValidLog SUCCESS") - - def delete_testing_log(self, **kwargs): - """Deletes testing log. - - Parameters - ----------- - kwargs : logging information - Find items to delete, leave it empty to delete all log. - - Examples - --------- - - see ``save_training_log``. - """ - self._fill_project_info(kwargs) - self.db.TestLog.delete_many(kwargs) - logging.info("[Database] Delete TestLog SUCCESS") - - # def find_training_logs(self, **kwargs): - # pass - # - # def find_validation_logs(self, **kwargs): - # pass - # - # def find_testing_logs(self, **kwargs): - # pass - - # =========================== Task =================================== - def create_task(self, task_name=None, script=None, hyper_parameters=None, saved_result_keys=None, **kwargs): - """Uploads a task to the database, timestamp will be added automatically. - - Parameters - ----------- - task_name : str - The task name. - script : str - File name of the python script. - hyper_parameters : dictionary - The hyper parameters pass into the script. - saved_result_keys : list of str - The keys of the task results to keep in the database when the task finishes. - kwargs : other parameters - Users customized parameters such as description, version number. - - Examples - ----------- - Uploads a task - >>> db.create_task(task_name='mnist', script='example/tutorial_mnist_simple.py', description='simple tutorial') - - Finds and runs the latest task - >>> db.run_top_task(sess=sess, sort=[("time", pymongo.DESCENDING)]) - >>> db.run_top_task(sess=sess, sort=[("time", -1)]) - - Finds and runs the oldest task - >>> db.run_top_task(sess=sess, sort=[("time", pymongo.ASCENDING)]) - >>> db.run_top_task(sess=sess, sort=[("time", 1)]) - - """ - if not isinstance(task_name, str): # is None: - raise Exception("task_name should be string") - if not isinstance(script, str): # is None: - raise Exception("script should be string") - if hyper_parameters is None: - hyper_parameters = {} - if saved_result_keys is None: - saved_result_keys = [] - - self._fill_project_info(kwargs) - kwargs.update({'time': datetime.utcnow()}) - kwargs.update({'hyper_parameters': hyper_parameters}) - kwargs.update({'saved_result_keys': saved_result_keys}) - - _script = open(script, 'rb').read() - - kwargs.update({'status': 'pending', 'script': _script, 'result': {}}) - self.db.Task.insert_one(kwargs) - logging.info("[Database] Saved Task - task_name: {} script: {}".format(task_name, script)) - - def run_top_task(self, task_name=None, sort=None, **kwargs): - """Finds and runs a pending task that in the first of the sorting list. - - Parameters - ----------- - task_name : str - The task name. - sort : List of tuple - PyMongo sort comment, search "PyMongo find one sorting" and `collection level operations `__ for more details. - kwargs : other parameters - Users customized parameters such as description, version number. - - Examples - --------- - Monitors the database and pull tasks to run - >>> while True: - >>> print("waiting task from distributor") - >>> db.run_top_task(task_name='mnist', sort=[("time", -1)]) - >>> time.sleep(1) - - Returns - -------- - boolean : True for success, False for fail. - """ - if not isinstance(task_name, str): # is None: - raise Exception("task_name should be string") - self._fill_project_info(kwargs) - kwargs.update({'status': 'pending'}) - - # find task and set status to running - task = self.db.Task.find_one_and_update(kwargs, {'$set': {'status': 'running'}}, sort=sort) - - try: - # get task info e.g. hyper parameters, python script - if task is None: - logging.info("[Database] Find Task FAIL: key: {} sort: {}".format(task_name, sort)) - return False - else: - logging.info("[Database] Find Task SUCCESS: key: {} sort: {}".format(task_name, sort)) - _datetime = task['time'] - _script = task['script'] - _id = task['_id'] - _hyper_parameters = task['hyper_parameters'] - _saved_result_keys = task['saved_result_keys'] - logging.info(" hyper parameters:") - for key in _hyper_parameters: - globals()[key] = _hyper_parameters[key] - logging.info(" {}: {}".format(key, _hyper_parameters[key])) - # run task - s = time.time() - logging.info("[Database] Start Task: key: {} sort: {} push time: {}".format(task_name, sort, _datetime)) - _script = _script.decode('utf-8') - with tf.Graph().as_default(): # as graph: # clear all TF graphs - exec(_script, globals()) - - # set status to finished - _ = self.db.Task.find_one_and_update({'_id': _id}, {'$set': {'status': 'finished'}}) - - # return results - __result = {} - for _key in _saved_result_keys: - logging.info(" result: {}={} {}".format(_key, globals()[_key], type(globals()[_key]))) - __result.update({"%s" % _key: globals()[_key]}) - _ = self.db.Task.find_one_and_update( - { - '_id': _id - }, {'$set': { - 'result': __result - }}, return_document=pymongo.ReturnDocument.AFTER - ) - logging.info( - "[Database] Finished Task: task_name - {} sort: {} push time: {} took: {}s". - format(task_name, sort, _datetime, - time.time() - s) - ) - return True - except Exception as e: - exc_type, exc_obj, exc_tb = sys.exc_info() - fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] - logging.info("{} {} {} {} {}".format(exc_type, exc_obj, fname, exc_tb.tb_lineno, e)) - logging.info("[Database] Fail to run task") - # if fail, set status back to pending - _ = self.db.Task.find_one_and_update({'_id': _id}, {'$set': {'status': 'pending'}}) - return False - - def delete_tasks(self, **kwargs): - """Delete tasks. - - Parameters - ----------- - kwargs : logging information - Find items to delete, leave it empty to delete all log. - - Examples - --------- - >>> db.delete_tasks() - - """ - - self._fill_project_info(kwargs) - self.db.Task.delete_many(kwargs) - logging.info("[Database] Delete Task SUCCESS") - - def check_unfinished_task(self, task_name=None, **kwargs): - """Finds and runs a pending task. - - Parameters - ----------- - task_name : str - The task name. - kwargs : other parameters - Users customized parameters such as description, version number. - - Examples - --------- - Wait until all tasks finish in user's local console - - >>> while not db.check_unfinished_task(): - >>> time.sleep(1) - >>> print("all tasks finished") - >>> sess = tf.InteractiveSession() - >>> net = db.find_top_model(sess=sess, sort=[("test_accuracy", -1)]) - >>> print("the best accuracy {} is from model {}".format(net._test_accuracy, net._name)) - - Returns - -------- - boolean : True for success, False for fail. - - """ - - if not isinstance(task_name, str): # is None: - raise Exception("task_name should be string") - self._fill_project_info(kwargs) - - kwargs.update({'$or': [{'status': 'pending'}, {'status': 'running'}]}) - - # ## find task - # task = self.db.Task.find_one(kwargs) - task = self.db.Task.find(kwargs) - - task_id_list = task.distinct('_id') - n_task = len(task_id_list) - - if n_task == 0: - logging.info("[Database] No unfinished task - task_name: {}".format(task_name)) - return False - else: - - logging.info("[Database] Find {} unfinished task - task_name: {}".format(n_task, task_name)) - return True - - @staticmethod - def _print_dict(args): - string = '' - for key, value in args.items(): - if key is not '_id': - string += str(key) + ": " + str(value) + " / " - return string diff --git a/tensorlayer/decorators/__init__.py b/tensorlayer/decorators/__init__.py deleted file mode 100644 index 9d4eeaa..0000000 --- a/tensorlayer/decorators/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- -""" -TensorLayer provides rich layer implementations trailed for -various benchmarks and domain-specific problems. In addition, we also -support transparent access to native TensorFlow parameters. -For example, we provide not only layers for local response normalization, but also -layers that allow user to apply ``tf.nn.lrn`` on ``network.outputs``. -More functions can be found in `TensorFlow API `__. -""" - -from .deprecated import deprecated -from .deprecated_alias import deprecated_alias -from .method_decorator import private_method -from .method_decorator import protected_method - -__all__ = ['deprecated', 'deprecated_alias', 'private_method', 'protected_method'] diff --git a/tensorlayer/decorators/deprecated.py b/tensorlayer/decorators/deprecated.py deleted file mode 100644 index e3df14c..0000000 --- a/tensorlayer/decorators/deprecated.py +++ /dev/null @@ -1,61 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import inspect -import sys -import functools - -from tensorlayer.decorators.utils import add_deprecation_notice_to_docstring -from tensorlayer.decorators.utils import get_qualified_name -from tensorlayer.decorators.utils import validate_deprecation_args - -import wrapt - -__all__ = ['deprecated'] - -# Allow deprecation warnings to be silenced temporarily with a context manager. -_PRINT_DEPRECATION_WARNINGS = True - -# Remember which deprecation warnings have been printed already. -_PRINTED_WARNING = {} - - -def deprecated(wrapped=None, date='', instructions='', warn_once=True): - - if wrapped is None: - return functools.partial(deprecated, date=date, instructions=instructions, warn_once=warn_once) - - @wrapt.decorator - def wrapper(wrapped, instance=None, args=None, kwargs=None): - - validate_deprecation_args(date, instructions) - - if _PRINT_DEPRECATION_WARNINGS: - - class_or_func_name = get_qualified_name(wrapped) - - if class_or_func_name not in _PRINTED_WARNING: - if warn_once: - _PRINTED_WARNING[class_or_func_name] = True - - from tensorlayer import logging - - logging.warning( - '%s: `%s.%s` (in file: %s) is deprecated and will be removed %s.\n' - 'Instructions for updating: %s\n' % ( - "Class" if inspect.isclass(wrapped) else "Function", wrapped.__module__, class_or_func_name, - wrapped.__code__.co_filename, 'in a future version' if date is None else - ('after %s' % date), instructions - ) - ) - - return wrapped(*args, **kwargs) - - decorated = wrapper(wrapped) - - if sys.version_info > (3, 0): # docstring can only be edited with Python 3 - wrapt.FunctionWrapper.__setattr__( - decorated, "__doc__", add_deprecation_notice_to_docstring(wrapped.__doc__, date, instructions) - ) - - return decorated diff --git a/tensorlayer/decorators/deprecated_alias.py b/tensorlayer/decorators/deprecated_alias.py deleted file mode 100644 index acd2c0f..0000000 --- a/tensorlayer/decorators/deprecated_alias.py +++ /dev/null @@ -1,46 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import functools -import warnings - -from tensorlayer import logging - - -def deprecated_alias(end_support_version, **aliases): - - def deco(f): - - @functools.wraps(f) - def wrapper(*args, **kwargs): - - try: - func_name = "{}.{}".format(args[0].__class__.__name__, f.__name__) - except (NameError, IndexError): - func_name = f.__name__ - - rename_kwargs(kwargs, aliases, end_support_version, func_name) - - return f(*args, **kwargs) - - return wrapper - - return deco - - -def rename_kwargs(kwargs, aliases, end_support_version, func_name): - - for alias, new in aliases.items(): - - if alias in kwargs: - - if new in kwargs: - raise TypeError('{}() received both {} and {}'.format(func_name, alias, new)) - - warnings.warn('{}() - {} is deprecated; use {}'.format(func_name, alias, new), DeprecationWarning) - logging.warning( - "DeprecationWarning: {}(): " - "`{}` argument is deprecated and will be removed in version {}, " - "please change for `{}.`".format(func_name, alias, end_support_version, new) - ) - kwargs[new] = kwargs.pop(alias) diff --git a/tensorlayer/decorators/method_decorator.py b/tensorlayer/decorators/method_decorator.py deleted file mode 100644 index 5d26220..0000000 --- a/tensorlayer/decorators/method_decorator.py +++ /dev/null @@ -1,44 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import inspect - - -def private_method(func): - """Decorator for making an instance method private.""" - - def func_wrapper(*args, **kwargs): - """Decorator wrapper function.""" - outer_frame = inspect.stack()[1][0] - if 'self' not in outer_frame.f_locals or outer_frame.f_locals['self'] is not args[0]: - raise RuntimeError('%s.%s is a private method' % (args[0].__class__.__name__, func.__name__)) - - return func(*args, **kwargs) - - return func_wrapper - - -def protected_method(func): - """Decorator for making an instance method private.""" - - def func_wrapper(*args, **kwargs): - """Decorator wrapper function.""" - outer_frame = inspect.stack()[1][0] - - caller = inspect.getmro(outer_frame.f_locals['self'].__class__)[:-1] - target = inspect.getmro(args[0].__class__)[:-1] - - share_subsclass = False - - for cls_ in target: - if issubclass(caller[0], cls_) or caller[0] is cls_: - share_subsclass = True - break - - if ('self' not in outer_frame.f_locals or - outer_frame.f_locals['self'] is not args[0]) and (not share_subsclass): - raise RuntimeError('%s.%s is a protected method' % (args[0].__class__.__name__, func.__name__)) - - return func(*args, **kwargs) - - return func_wrapper diff --git a/tensorlayer/decorators/utils.py b/tensorlayer/decorators/utils.py deleted file mode 100644 index d564fec..0000000 --- a/tensorlayer/decorators/utils.py +++ /dev/null @@ -1,122 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- -""" -NOTE: DO NOT REMOVE THESE FILES. They are copied from Tensorflow repository and are necessary to build the library without installing TF. - -Source: https://github.com/tensorflow/tensorflow/tree/master/tensorflow/python/util - -They replace the following imports: ->>> from tensorflow.python.util import decorator_utils ->>> from tensorflow.python.util.deprecation import _validate_deprecation_args -""" - -import sys -import re - -__all__ = ["add_deprecation_notice_to_docstring", "get_qualified_name", "validate_deprecation_args"] - - -def add_deprecation_notice_to_docstring(doc, date, instructions): - return _add_deprecated_function_notice_to_docstring(doc, date, instructions) - - -def get_qualified_name(function): - # Python 3 - if hasattr(function, '__qualname__'): - return function.__qualname__ - - # Python 2 - if hasattr(function, 'im_class'): - return function.im_class.__name__ + '.' + function.__name__ - return function.__name__ - - -def validate_deprecation_args(date, instructions): - if date is not None and not re.match(r'20\d\d-[01]\d-[0123]\d', date): - raise ValueError('Date must be YYYY-MM-DD.') - if not instructions: - raise ValueError('Don\'t deprecate things without conversion instructions!') - - -def _add_deprecated_function_notice_to_docstring(doc, date, instructions): - """Adds a deprecation notice to a docstring for deprecated functions.""" - - if instructions: - deprecation_message = """ - .. warning:: - **THIS FUNCTION IS DEPRECATED:** It will be removed after %s. - *Instructions for updating:* %s. - """ % (('in a future version' if date is None else ('after %s' % date)), instructions) - - else: - deprecation_message = """ - .. warning:: - **THIS FUNCTION IS DEPRECATED:** It will be removed after %s. - """ % (('in a future version' if date is None else ('after %s' % date))) - - main_text = [deprecation_message] - - return _add_notice_to_docstring(doc, 'DEPRECATED FUNCTION', main_text) - - -def _add_notice_to_docstring(doc, no_doc_str, notice): - """Adds a deprecation notice to a docstring.""" - if not doc: - lines = [no_doc_str] - - else: - lines = _normalize_docstring(doc).splitlines() - - notice = [''] + notice - - if len(lines) > 1: - # Make sure that we keep our distance from the main body - if lines[1].strip(): - notice.append('') - - lines[1:1] = notice - else: - lines += notice - - return '\n'.join(lines) - - -def _normalize_docstring(docstring): - """Normalizes the docstring. - - Replaces tabs with spaces, removes leading and trailing blanks lines, and - removes any indentation. - - Copied from PEP-257: - https://www.python.org/dev/peps/pep-0257/#handling-docstring-indentation - - Args: - docstring: the docstring to normalize - - Returns: - The normalized docstring - """ - if not docstring: - return '' - # Convert tabs to spaces (following the normal Python rules) - # and split into a list of lines: - lines = docstring.expandtabs().splitlines() - # Determine minimum indentation (first line doesn't count): - # (we use sys.maxsize because sys.maxint doesn't exist in Python 3) - indent = sys.maxsize - for line in lines[1:]: - stripped = line.lstrip() - if stripped: - indent = min(indent, len(line) - len(stripped)) - # Remove indentation (first line is special): - trimmed = [lines[0].strip()] - if indent < sys.maxsize: - for line in lines[1:]: - trimmed.append(line[indent:].rstrip()) - # Strip off trailing and leading blank lines: - while trimmed and not trimmed[-1]: - trimmed.pop() - while trimmed and not trimmed[0]: - trimmed.pop(0) - # Return a single string: - return '\n'.join(trimmed) diff --git a/tensorlayer/distributed.py b/tensorlayer/distributed.py deleted file mode 100644 index 35319d2..0000000 --- a/tensorlayer/distributed.py +++ /dev/null @@ -1,546 +0,0 @@ -# -*- coding: utf-8 -*- - -import json -import os -import time -import math - -import tensorflow as tf -from tensorflow.python.training import session_run_hook - -from tensorlayer import logging -from tensorlayer.decorators import deprecated -from tensorlayer.lazy_imports import LazyImport - -hvd = LazyImport('horovod.tensorflow') - -__all__ = ['TaskSpecDef', 'TaskSpec', 'DistributedSession', 'StopAtTimeHook', 'LoadCheckpoint', 'Trainer'] - - -class Trainer(object): - """Trainer for neural networks in a distributed environment. - - TensorLayer Trainer is a high-level training interface built on top of TensorFlow MonitoredSession and - `Horovod `__. It transparently scales the training of a TensorLayer model - from a single GPU to multiple GPUs that be placed on different machines in a single cluster. - - To run the trainer, you will need to install Horovod on your machine. Check the installation script at - `tensorlayer/scripts/download_and_install_openmpi3_ubuntu.sh` - - The minimal inputs to the Trainer include (1) a training dataset defined using the TensorFlow DataSet API, - and (2) a model build function given the inputs of the training dataset, and returns the neural network - to train, the loss function to minimize, and the names of the tensor to log during training, and (3) - an optimizer and its arguments. - - The default parameter choices of Trainer is inspired by the Facebook paper: - `Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour `__ - - Parameters - ---------- - training_dataset : class TensorFlow ``DataSet`` - The training dataset which zips samples and labels. The trainer automatically - shards the training dataset based on the number of GPUs. - build_training_func : function - A function that builds the training operator. It takes the training dataset as an input, - and returns the neural network, the loss function and a dictionary that maps - string tags to tensors to log during training. - optimizer : class TensorFlow ``Optimizer`` - The loss function optimizer. The trainer automatically linearly scale the learning rate based on - the number of GPUs. - optimizer_args : dict - The optimizer argument dictionary. It must contain a `learning_rate` field in type of float. - Note that the learning rate is linearly scaled according to the number of GPU by default. - You can disable it using the option `scaling_learning_rate` - batch_size : int - The training mini-batch size (i.e., number of samples per batch). - prefetch_size: int or None - The dataset prefetch buffer size. Set this parameter to overlap the GPU training and data preparation - if the data preparation is heavy. - checkpoint_dir : None or str - The path to the TensorFlow model checkpoint. Note that only one trainer master would checkpoints its model. - If None, checkpoint is disabled. - log_step_size : int - The trainer logs training information every N mini-batches (i.e., step size). - validation_dataset: None or class TensorFlow ``DataSet`` - The optional validation dataset that zips samples and labels. Note that - only the trainer master needs to the validation often. - build_validation_func: None or function - The function that builds the validation operator. It returns the validation neural network (which - share the weights of the training network) and a custom number of validation metrics. - scaling_learning_rate: Boolean - Linearly scale the learning rate by the number of GPUs. Default is True. - This `linear scaling rule` is generally effective and is highly recommended by the practioners. - Check `Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour `__ - max_iteration: int - The maximum iteration (i.e., mini-batch) to train. - The default is `math.inf`. You can set it to a small number to end the training earlier. This is - usually set for testing purpose. - - Attributes - ---------- - training_network : class TensorLayer ``Layer`` - The training model. - session : class TensorFlow ``MonitoredTrainingSession`` - The training session tha the Trainer wraps. - global_step : int - The number of training mini-batch by far. - validation_metrics : list of tuples - The validation metrics that zips the validation metric property and the average value. - - Examples - -------- - See `tutorial_mnist_distributed_trainer.py - `__. - - """ - - def __init__( - self, training_dataset, build_training_func, optimizer, optimizer_args, batch_size=32, prefetch_size=None, - checkpoint_dir=None, scaling_learning_rate=True, log_step_size=1, validation_dataset=None, - build_validation_func=None, max_iteration=float('inf') - ): - # Initialize Horovod. - hvd.init() - self.is_master = hvd.rank() == 0 - self._last_global_step = 0 - - if prefetch_size is None: - prefetch_size = batch_size - - # Define the loss for validation dataset - if validation_dataset: - validation_dataset = validation_dataset.shard(num_shards=hvd.size(), index=hvd.rank()).batch(batch_size) - validation_dataset.prefetch(buffer_size=prefetch_size) - self._validation_iterator = validation_dataset.make_initializable_iterator() - next_example, next_label = self._validation_iterator.get_next() - _, self._validation_metrics = build_validation_func(next_example, next_label) - if not isinstance(self._validation_metrics, list): - self._validation_metrics = list(self._validation_metrics) - else: - self._validation_iterator = None - self._validation_metrics = None - - # Get the shard of the dataset based on my local rank - training_dataset = training_dataset.shard(num_shards=hvd.size(), index=hvd.rank()).batch(batch_size) - - training_dataset.prefetch(buffer_size=prefetch_size) - training_iterator = training_dataset.make_one_shot_iterator() - self._training_network, loss, log_tensors = build_training_func(*training_iterator.get_next()) - - # Adjust learning rate based on number of GPUs. - lr = optimizer_args['learning_rate'] - optimizer_args['learning_rate'] = lr * hvd.size() if scaling_learning_rate else lr - opt = optimizer(**optimizer_args) - - # Add Horovod Distributed Optimizer. - opt = hvd.DistributedOptimizer(opt) - - self._global_step = tf.train.get_or_create_global_step() - if isinstance(log_tensors, list): - log_tensors.append(self._global_step) - else: - log_tensors['global_step'] = self._global_step - self._train_op = opt.minimize(loss, global_step=self._global_step) - - hooks = [ - # Horovod: BroadcastGlobalVariablesHook broadcasts initial variable states - # from rank 0 to all other processes. This is necessary to ensure consistent - # initialization of all workers when training is started with random weights - # or restored from a checkpoint. - hvd.BroadcastGlobalVariablesHook(0), - - # Horovod: adjust number of steps based on number of GPUs. - tf.train.StopAtStepHook(last_step=max_iteration // hvd.size()), - tf.train.LoggingTensorHook(tensors=log_tensors, every_n_iter=log_step_size), - ] - - # Pin GPU to be used to process local rank (one GPU per process) - config = tf.ConfigProto() - config.gpu_options.allow_growth = True - config.gpu_options.visible_device_list = str(hvd.local_rank()) - - # Save checkpoints only on worker 0 to prevent other workers from - # corrupting them. - checkpoint_dir = checkpoint_dir if self.is_master else None - - # The MonitoredTrainingSession takes care of session initialization, - # restoring from a checkpoint, saving to a checkpoint, and closing when done - # or an error occurs. - self._sess = tf.train.MonitoredTrainingSession(checkpoint_dir=checkpoint_dir, hooks=hooks, config=config) - - @property - def global_step(self): - if self._sess.should_stop(): - return self._last_global_step - self._last_global_step = self._sess.run(self._global_step) - return self._last_global_step - - @property - def session(self): - return self._sess - - @property - def training_network(self): - return self._training_network - - @property - def validation_metrics(self): - """A helper function to compute validation related metrics""" - - if (self._validation_iterator is None) or (self._validation_metrics is None): - raise AttributeError('Validation is not setup.') - - n = 0.0 - metric_sums = [0.0] * len(self._validation_metrics) - self._sess.run(self._validation_iterator.initializer) - while True: - try: - metrics = self._sess.run(self._validation_metrics) - for i, m in enumerate(metrics): - metric_sums[i] += m - n += 1.0 - except tf.errors.OutOfRangeError: - break - for i, m in enumerate(metric_sums): - metric_sums[i] = metric_sums[i] / n - return zip(self._validation_metrics, metric_sums) - - def train_on_batch(self): - """Train a mini-batch.""" - self._sess.run(self._train_op) - - def train_and_validate_to_end(self, validate_step_size=50): - """A helper function that shows how to train and validate a model at the same time. - - Parameters - ---------- - validate_step_size : int - Validate the training network every N steps. - - """ - while not self._sess.should_stop(): - self.train_on_batch() # Run a training step synchronously. - if self.global_step % validate_step_size == 0: - # logging.info("Average loss for validation dataset: %s" % self.get_validation_metrics()) - log_str = 'step: %d, ' % self.global_step - for n, m in self.validation_metrics: - log_str += '%s: %f, ' % (n.name, m) - logging.info(log_str) - - -@deprecated(date="2018-10-30", instructions="Using the TensorLayer distributed trainer.") -class TaskSpecDef(object): - """Specification for a distributed task. - - It contains the job name, index of the task, - the parameter servers and the worker servers. If you want to use the last worker - for continuous evaluation you can call the method `use_last_worker_as_evaluator` - which returns a new :class:`TaskSpecDef` object without the last worker in the - cluster specification. - - Parameters - ---------- - task_type : str - Task type. One of `master`, `worker` or `ps`. - index : int - The zero-based index of the task. Distributed training jobs will have a single - master task, one or more parameter servers, and one or more workers. - trial : int - The identifier of the trial being run. - ps_hosts : str OR list of str - A string with a coma separate list of hosts for the parameter servers - or a list of hosts. - worker_hosts : str OR list of str - A string with a coma separate list of hosts for the worker servers - or a list of hosts. - master : str - A string with the master hosts - - Notes - ---------- - master might not be included in TF_CONFIG and can be None. The shard_index is adjusted - in any case to assign 0 to master and >= 1 to workers. - This implementation doesn't support sparse arrays in the `TF_CONFIG` variable as the - official TensorFlow documentation shows, as it is not a supported by the json - definition. - - References - ---------- - - `ML-engine trainer considerations `__ - - """ - - def __init__(self, task_type='master', index=0, trial=None, ps_hosts=None, worker_hosts=None, master=None): - self.type = task_type - self._index = int(index) - self._cluster_spec = None - self.num_workers = 1 - self.num_ps = 0 - self.shard_index = int(index) - self._master = True - self.trial = trial - self.ps_hosts = ps_hosts - self.worker_hosts = worker_hosts - self.master = master - self._server = None - - if ps_hosts and worker_hosts: - self.ps_hosts = ps_hosts if isinstance(ps_hosts, list) else ps_hosts.split(',') - self.num_ps = len(self.ps_hosts) - self.worker_hosts = worker_hosts if isinstance(worker_hosts, list) else worker_hosts.split(',') - if master is not None and len(master) > 0: - self._cluster_spec = tf.train.ClusterSpec( - { - 'ps': self.ps_hosts, - 'worker': self.worker_hosts, - 'master': master - } - ) - # master is a worker too - self.num_workers = len(self.worker_hosts) + 1 - if self.type == 'worker': - self.shard_index = self._index + 1 - self._master = self.type == 'master' - else: - self._cluster_spec = tf.train.ClusterSpec({'ps': self.ps_hosts, 'worker': self.worker_hosts}) - self.num_workers = len(self.worker_hosts) - if self.type == 'worker': - self.shard_index = self._index - self._master = self.type == 'worker' and self._index == 0 - - def is_ps(self): - """Returns true if this server is a parameter server""" - return self.type == 'ps' - - def is_worker(self): - """Returns true if this server is a worker server""" - return self.type == 'worker' - - def is_master(self): - """Returns true if this server is the master server""" - return self._master - - def is_evaluator(self): - """Returns true if this server is the evaluator server""" - return self.type == 'worker' and self.num_workers == self._index - - def device_fn(self): - """Returns the function with the specification to create the graph in this server""" - current_device = '/job:{}/task:{}'.format(self.type, self._index) - ps_devices = '/job:ps' - return tf.train.replica_device_setter( - ps_device=ps_devices, worker_device=current_device, cluster=self._cluster_spec - ) - - def create_server(self): - if self._server is None and self.ps_hosts and self.worker_hosts and not self.is_evaluator(): - # create server and join if it is a parameter server - self._server = tf.train.Server(self._cluster_spec, job_name=self.type, task_index=self._index) - if self.is_ps(): - self._server.join() - - def target(self): - if self._server is None: - self.create_server() - if self._server is not None: - return self._server.target - else: - return None - - def use_last_worker_as_evaluator(self): - """Returns a new :class:`TaskSpecDef` where the last worker has been removed from - the list of worker_hosts, so it is not used for training anymore. You can call - is_evaluator to know whether this server is the evaluator one or not. - In case there is only one server for training this method raises an exception, as - you cannot use any server for evaluation. - - """ - if self.num_workers <= 1: - raise Exception('You need more than one worker instance to use one as evaluator') - - return TaskSpecDef( - task_type=self.type, index=self._index, trial=self.trial, ps_hosts=self.ps_hosts, - worker_hosts=self.worker_hosts[:-1], master=self.master - ) - - -@deprecated(date="2018-10-30", instructions="Using the TensorLayer distributed trainer.") -def create_task_spec_def(): - """Returns the a :class:`TaskSpecDef` based on the environment variables for distributed training. - - References - ---------- - - `ML-engine trainer considerations `__ - - `TensorPort Distributed Computing `__ - - """ - if 'TF_CONFIG' in os.environ: - # TF_CONFIG is used in ML-engine - env = json.loads(os.environ.get('TF_CONFIG', '{}')) - task_data = env.get('task', None) or {'type': 'master', 'index': 0} - cluster_data = env.get('cluster', None) or {'ps': None, 'worker': None, 'master': None} - return TaskSpecDef( - task_type=task_data['type'], index=task_data['index'], - trial=task_data['trial'] if 'trial' in task_data else None, ps_hosts=cluster_data['ps'], - worker_hosts=cluster_data['worker'], master=cluster_data['master'] if 'master' in cluster_data else None - ) - elif 'JOB_NAME' in os.environ: - # JOB_NAME, TASK_INDEX, PS_HOSTS, WORKER_HOSTS and MASTER_HOST are used in TensorPort - return TaskSpecDef( - task_type=os.environ['JOB_NAME'], index=os.environ['TASK_INDEX'], ps_hosts=os.environ.get('PS_HOSTS', None), - worker_hosts=os.environ.get('WORKER_HOSTS', None), master=os.environ.get('MASTER_HOST', None) - ) - else: - raise Exception('You need to setup TF_CONFIG or JOB_NAME to define the task.') - - -@deprecated(date="2018-10-30", instructions="Using the TensorLayer distributed trainer.") -def create_distributed_session( - task_spec=None, checkpoint_dir=None, scaffold=None, hooks=None, chief_only_hooks=None, save_checkpoint_secs=600, - save_summaries_steps=object(), save_summaries_secs=object(), config=None, stop_grace_period_secs=120, - log_step_count_steps=100 -): - """Creates a distributed session. - - It calls `MonitoredTrainingSession` to create a :class:`MonitoredSession` for distributed training. - - Parameters - ---------- - task_spec : :class:`TaskSpecDef`. - The task spec definition from create_task_spec_def() - checkpoint_dir : str. - Optional path to a directory where to restore variables. - scaffold : ``Scaffold`` - A `Scaffold` used for gathering or building supportive ops. - If not specified, a default one is created. It's used to finalize the graph. - hooks : list of ``SessionRunHook`` objects. - Optional - chief_only_hooks : list of ``SessionRunHook`` objects. - Activate these hooks if `is_chief==True`, ignore otherwise. - save_checkpoint_secs : int - The frequency, in seconds, that a checkpoint is saved - using a default checkpoint saver. If `save_checkpoint_secs` is set to - `None`, then the default checkpoint saver isn't used. - save_summaries_steps : int - The frequency, in number of global steps, that the - summaries are written to disk using a default summary saver. If both - `save_summaries_steps` and `save_summaries_secs` are set to `None`, then - the default summary saver isn't used. Default 100. - save_summaries_secs : int - The frequency, in secs, that the summaries are written - to disk using a default summary saver. If both `save_summaries_steps` and - `save_summaries_secs` are set to `None`, then the default summary saver - isn't used. Default not enabled. - config : ``tf.ConfigProto`` - an instance of `tf.ConfigProto` proto used to configure the session. - It's the `config` argument of constructor of `tf.Session`. - stop_grace_period_secs : int - Number of seconds given to threads to stop after - `close()` has been called. - log_step_count_steps : int - The frequency, in number of global steps, that the - global step/sec is logged. - - Examples - -------- - A simple example for distributed training where all the workers use the same dataset: - - >>> task_spec = TaskSpec() - >>> with tf.device(task_spec.device_fn()): - >>> tensors = create_graph() - >>> with tl.DistributedSession(task_spec=task_spec, - ... checkpoint_dir='/tmp/ckpt') as session: - >>> while not session.should_stop(): - >>> session.run(tensors) - - An example where the dataset is shared among the workers - (see https://www.tensorflow.org/programmers_guide/datasets): - - >>> task_spec = TaskSpec() - >>> # dataset is a :class:`tf.data.Dataset` with the raw data - >>> dataset = create_dataset() - >>> if task_spec is not None: - >>> dataset = dataset.shard(task_spec.num_workers, task_spec.shard_index) - >>> # shuffle or apply a map function to the new sharded dataset, for example: - >>> dataset = dataset.shuffle(buffer_size=10000) - >>> dataset = dataset.batch(batch_size) - >>> dataset = dataset.repeat(num_epochs) - >>> # create the iterator for the dataset and the input tensor - >>> iterator = dataset.make_one_shot_iterator() - >>> next_element = iterator.get_next() - >>> with tf.device(task_spec.device_fn()): - >>> # next_element is the input for the graph - >>> tensors = create_graph(next_element) - >>> with tl.DistributedSession(task_spec=task_spec, - ... checkpoint_dir='/tmp/ckpt') as session: - >>> while not session.should_stop(): - >>> session.run(tensors) - - References - ---------- - - `MonitoredTrainingSession `__ - - """ - target = task_spec.target() if task_spec is not None else None - is_chief = task_spec.is_master() if task_spec is not None else True - return tf.train.MonitoredTrainingSession( - master=target, is_chief=is_chief, checkpoint_dir=checkpoint_dir, scaffold=scaffold, - save_checkpoint_secs=save_checkpoint_secs, save_summaries_steps=save_summaries_steps, - save_summaries_secs=save_summaries_secs, log_step_count_steps=log_step_count_steps, - stop_grace_period_secs=stop_grace_period_secs, config=config, hooks=hooks, chief_only_hooks=chief_only_hooks - ) - - -@deprecated(date="2018-10-30", instructions="Using the TensorLayer distributed trainer.") -class StopAtTimeHook(session_run_hook.SessionRunHook): - """Hook that requests stop after a specified time. - - Parameters - ---------- - time_running: int - Maximum time running in seconds - - """ - - def __init__(self, time_running): - self._time_running = time_running - self._end_time = 0 - - def begin(self): - self._end_time = time.time() + self._time_running - - def after_run(self, run_context, run_values): - if time.time() > self._end_time: - run_context.request_stop() - - -@deprecated(date="2018-10-30", instructions="Using the TensorLayer distributed trainer.") -class LoadCheckpoint(session_run_hook.SessionRunHook): - """Hook that loads a checkpoint after the session is created. - - >>> from tensorflow.python.ops import variables as tf_variables - >>> from tensorflow.python.training.monitored_session import SingularMonitoredSession - >>> - >>> tensors = create_graph() - >>> saver = tf.train.Saver(var_list=tf_variables.trainable_variables()) - >>> checkpoint_hook = LoadCheckpoint(saver, my_checkpoint_file) - >>> with tf.SingularMonitoredSession(hooks=[checkpoint_hook]) as session: - >>> while not session.should_stop(): - >>> session.run(tensors) - - """ - - def __init__(self, saver, checkpoint): - self._saver = saver - self._checkpoint = checkpoint - self._loaded = False - - def after_create_session(self, session, coord): - if not self._loaded: - self._loaded = True - self._saver.restore(self._checkpoint) - - -# Alias -TaskSpec = create_task_spec_def -DistributedSession = create_distributed_session diff --git a/tensorlayer/files/__init__.py b/tensorlayer/files/__init__.py deleted file mode 100644 index a0c38ed..0000000 --- a/tensorlayer/files/__init__.py +++ /dev/null @@ -1,74 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- -""" -TensorLayer provides rich layer implementations trailed for -various benchmarks and domain-specific problems. In addition, we also -support transparent access to native TensorFlow parameters. -For example, we provide not only layers for local response normalization, but also -layers that allow user to apply ``tf.nn.lrn`` on ``network.outputs``. -More functions can be found in `TensorFlow API `__. -""" - -from .dataset_loaders.celebA_dataset import * -from .dataset_loaders.cifar10_dataset import * -from .dataset_loaders.cyclegan_dataset import * -from .dataset_loaders.flickr_1M_dataset import * -from .dataset_loaders.flickr_25k_dataset import * -from .dataset_loaders.imdb_dataset import * -from .dataset_loaders.matt_mahoney_dataset import * -from .dataset_loaders.mnist_dataset import * -from .dataset_loaders.mnist_fashion_dataset import * -from .dataset_loaders.mpii_dataset import * -from .dataset_loaders.nietzsche_dataset import * -from .dataset_loaders.ptb_dataset import * -from .dataset_loaders.voc_dataset import * -from .dataset_loaders.wmt_en_fr_dataset import * - -from .utils import * - -__all__ = [ - # Dataset Loaders - 'load_celebA_dataset', - 'load_cifar10_dataset', - 'load_cyclegan_dataset', - 'load_fashion_mnist_dataset', - 'load_flickr1M_dataset', - 'load_flickr25k_dataset', - 'load_imdb_dataset', - 'load_matt_mahoney_text8_dataset', - 'load_mnist_dataset', - 'load_mpii_pose_dataset', - 'load_nietzsche_dataset', - 'load_ptb_dataset', - 'load_voc_dataset', - 'load_wmt_en_fr_dataset', - - # Util Functions - 'assign_params', - 'del_file', - 'del_folder', - 'download_file_from_google_drive', - 'exists_or_mkdir', - 'file_exists', - 'folder_exists', - 'load_and_assign_npz', - 'load_and_assign_npz_dict', - 'load_ckpt', - 'load_cropped_svhn', - 'load_file_list', - 'load_folder_list', - 'load_npy_to_any', - 'load_npz', - 'maybe_download_and_extract', - 'natural_keys', - 'npz_to_W_pdf', - 'read_file', - 'save_any_to_npy', - 'save_ckpt', - 'save_npz', - 'save_npz_dict', - #'save_graph', - #'load_graph', - #'save_graph_and_params', - #'load_graph_and_params', -] diff --git a/tensorlayer/files/dataset_loaders/__init__.py b/tensorlayer/files/dataset_loaders/__init__.py deleted file mode 100644 index 59a5551..0000000 --- a/tensorlayer/files/dataset_loaders/__init__.py +++ /dev/null @@ -1,34 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -from .celebA_dataset import * -from .cifar10_dataset import * -from .cyclegan_dataset import * -from .flickr_1M_dataset import * -from .flickr_25k_dataset import * -from .imdb_dataset import * -from .matt_mahoney_dataset import * -from .mnist_dataset import * -from .mnist_fashion_dataset import * -from .mpii_dataset import * -from .nietzsche_dataset import * -from .ptb_dataset import * -from .voc_dataset import * -from .wmt_en_fr_dataset import * - -__all__ = [ - 'load_celebA_dataset', - 'load_cifar10_dataset', - 'load_cyclegan_dataset', - 'load_fashion_mnist_dataset', - 'load_flickr1M_dataset', - 'load_flickr25k_dataset', - 'load_imdb_dataset', - 'load_matt_mahoney_text8_dataset', - 'load_mnist_dataset', - 'load_mpii_pose_dataset', - 'load_nietzsche_dataset', - 'load_ptb_dataset', - 'load_voc_dataset', - 'load_wmt_en_fr_dataset', -] diff --git a/tensorlayer/files/dataset_loaders/celebA_dataset.py b/tensorlayer/files/dataset_loaders/celebA_dataset.py deleted file mode 100644 index 67d0165..0000000 --- a/tensorlayer/files/dataset_loaders/celebA_dataset.py +++ /dev/null @@ -1,47 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import os - -import zipfile - -from tensorlayer import logging - -from tensorlayer.files.utils import download_file_from_google_drive -from tensorlayer.files.utils import exists_or_mkdir -from tensorlayer.files.utils import load_file_list - -__all__ = ['load_celebA_dataset'] - - -def load_celebA_dataset(path='data'): - """Load CelebA dataset - - Return a list of image path. - - Parameters - ----------- - path : str - The path that the data is downloaded to, defaults is ``data/celebA/``. - - """ - data_dir = 'celebA' - filename, drive_id = "img_align_celeba.zip", "0B7EVK8r0v71pZjFTYXZWM3FlRnM" - save_path = os.path.join(path, filename) - image_path = os.path.join(path, data_dir) - if os.path.exists(image_path): - logging.info('[*] {} already exists'.format(save_path)) - else: - exists_or_mkdir(path) - download_file_from_google_drive(drive_id, save_path) - zip_dir = '' - with zipfile.ZipFile(save_path) as zf: - zip_dir = zf.namelist()[0] - zf.extractall(path) - os.remove(save_path) - os.rename(os.path.join(path, zip_dir), image_path) - - data_files = load_file_list(path=image_path, regx='\\.jpg', printable=False) - for i, _v in enumerate(data_files): - data_files[i] = os.path.join(image_path, data_files[i]) - return data_files diff --git a/tensorlayer/files/dataset_loaders/cifar10_dataset.py b/tensorlayer/files/dataset_loaders/cifar10_dataset.py deleted file mode 100644 index a864601..0000000 --- a/tensorlayer/files/dataset_loaders/cifar10_dataset.py +++ /dev/null @@ -1,136 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import os -import sys - -import pickle - -import numpy as np - -from tensorlayer import logging - -from tensorlayer.files.utils import maybe_download_and_extract - -__all__ = ['load_cifar10_dataset'] - - -def load_cifar10_dataset(shape=(-1, 32, 32, 3), path='data', plotable=False): - """Load CIFAR-10 dataset. - - It consists of 60000 32x32 colour images in 10 classes, with - 6000 images per class. There are 50000 training images and 10000 test images. - - The dataset is divided into five training batches and one test batch, each with - 10000 images. The test batch contains exactly 1000 randomly-selected images from - each class. The training batches contain the remaining images in random order, - but some training batches may contain more images from one class than another. - Between them, the training batches contain exactly 5000 images from each class. - - Parameters - ---------- - shape : tupe - The shape of digit images e.g. (-1, 3, 32, 32) and (-1, 32, 32, 3). - path : str - The path that the data is downloaded to, defaults is ``data/cifar10/``. - plotable : boolean - Whether to plot some image examples, False as default. - - Examples - -------- - >>> X_train, y_train, X_test, y_test = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3)) - - References - ---------- - - `CIFAR website `__ - - `Data download link `__ - - ``__ - - """ - path = os.path.join(path, 'cifar10') - logging.info("Load or Download cifar10 > {}".format(path)) - - #Helper function to unpickle the data - def unpickle(file): - fp = open(file, 'rb') - if sys.version_info.major == 2: - data = pickle.load(fp) - elif sys.version_info.major == 3: - data = pickle.load(fp, encoding='latin-1') - else: - raise RuntimeError("Sys Version Unsupported") - fp.close() - return data - - filename = 'cifar-10-python.tar.gz' - url = 'https://www.cs.toronto.edu/~kriz/' - #Download and uncompress file - maybe_download_and_extract(filename, path, url, extract=True) - - #Unpickle file and fill in data - X_train = None - y_train = [] - for i in range(1, 6): - data_dic = unpickle(os.path.join(path, 'cifar-10-batches-py/', "data_batch_{}".format(i))) - if i == 1: - X_train = data_dic['data'] - else: - X_train = np.vstack((X_train, data_dic['data'])) - y_train += data_dic['labels'] - - test_data_dic = unpickle(os.path.join(path, 'cifar-10-batches-py/', "test_batch")) - X_test = test_data_dic['data'] - y_test = np.array(test_data_dic['labels']) - - if shape == (-1, 3, 32, 32): - X_test = X_test.reshape(shape) - X_train = X_train.reshape(shape) - elif shape == (-1, 32, 32, 3): - X_test = X_test.reshape(shape, order='F') - X_train = X_train.reshape(shape, order='F') - X_test = np.transpose(X_test, (0, 2, 1, 3)) - X_train = np.transpose(X_train, (0, 2, 1, 3)) - else: - X_test = X_test.reshape(shape) - X_train = X_train.reshape(shape) - - y_train = np.array(y_train) - - if plotable: - logging.info('\nCIFAR-10') - import matplotlib.pyplot as plt - fig = plt.figure(1) - - logging.info('Shape of a training image: X_train[0] %s' % X_train[0].shape) - - plt.ion() # interactive mode - count = 1 - for _ in range(10): # each row - for _ in range(10): # each column - _ = fig.add_subplot(10, 10, count) - if shape == (-1, 3, 32, 32): - # plt.imshow(X_train[count-1], interpolation='nearest') - plt.imshow(np.transpose(X_train[count - 1], (1, 2, 0)), interpolation='nearest') - # plt.imshow(np.transpose(X_train[count-1], (2, 1, 0)), interpolation='nearest') - elif shape == (-1, 32, 32, 3): - plt.imshow(X_train[count - 1], interpolation='nearest') - # plt.imshow(np.transpose(X_train[count-1], (1, 0, 2)), interpolation='nearest') - else: - raise Exception("Do not support the given 'shape' to plot the image examples") - plt.gca().xaxis.set_major_locator(plt.NullLocator()) - plt.gca().yaxis.set_major_locator(plt.NullLocator()) - count = count + 1 - plt.draw() # interactive mode - plt.pause(3) # interactive mode - - logging.info("X_train: %s" % X_train.shape) - logging.info("y_train: %s" % y_train.shape) - logging.info("X_test: %s" % X_test.shape) - logging.info("y_test: %s" % y_test.shape) - - X_train = np.asarray(X_train, dtype=np.float32) - X_test = np.asarray(X_test, dtype=np.float32) - y_train = np.asarray(y_train, dtype=np.int32) - y_test = np.asarray(y_test, dtype=np.int32) - - return X_train, y_train, X_test, y_test diff --git a/tensorlayer/files/dataset_loaders/cyclegan_dataset.py b/tensorlayer/files/dataset_loaders/cyclegan_dataset.py deleted file mode 100644 index d67bd76..0000000 --- a/tensorlayer/files/dataset_loaders/cyclegan_dataset.py +++ /dev/null @@ -1,63 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import os - -import numpy as np - -from tensorlayer import logging -from tensorlayer import visualize - -from tensorlayer.files.utils import del_file -from tensorlayer.files.utils import folder_exists -from tensorlayer.files.utils import load_file_list -from tensorlayer.files.utils import maybe_download_and_extract - -__all__ = ['load_cyclegan_dataset'] - - -def load_cyclegan_dataset(filename='summer2winter_yosemite', path='data'): - """Load images from CycleGAN's database, see `this link `__. - - Parameters - ------------ - filename : str - The dataset you want, see `this link `__. - path : str - The path that the data is downloaded to, defaults is `data/cyclegan` - - Examples - --------- - >>> im_train_A, im_train_B, im_test_A, im_test_B = load_cyclegan_dataset(filename='summer2winter_yosemite') - - """ - path = os.path.join(path, 'cyclegan') - url = 'https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets/' - - if folder_exists(os.path.join(path, filename)) is False: - logging.info("[*] {} is nonexistent in {}".format(filename, path)) - maybe_download_and_extract(filename + '.zip', path, url, extract=True) - del_file(os.path.join(path, filename + '.zip')) - - def load_image_from_folder(path): - path_imgs = load_file_list(path=path, regx='\\.jpg', printable=False) - return visualize.read_images(path_imgs, path=path, n_threads=10, printable=False) - - im_train_A = load_image_from_folder(os.path.join(path, filename, "trainA")) - im_train_B = load_image_from_folder(os.path.join(path, filename, "trainB")) - im_test_A = load_image_from_folder(os.path.join(path, filename, "testA")) - im_test_B = load_image_from_folder(os.path.join(path, filename, "testB")) - - def if_2d_to_3d(images): # [h, w] --> [h, w, 3] - for i, _v in enumerate(images): - if len(images[i].shape) == 2: - images[i] = images[i][:, :, np.newaxis] - images[i] = np.tile(images[i], (1, 1, 3)) - return images - - im_train_A = if_2d_to_3d(im_train_A) - im_train_B = if_2d_to_3d(im_train_B) - im_test_A = if_2d_to_3d(im_test_A) - im_test_B = if_2d_to_3d(im_test_B) - - return im_train_A, im_train_B, im_test_A, im_test_B diff --git a/tensorlayer/files/dataset_loaders/flickr_1M_dataset.py b/tensorlayer/files/dataset_loaders/flickr_1M_dataset.py deleted file mode 100644 index 6f13acf..0000000 --- a/tensorlayer/files/dataset_loaders/flickr_1M_dataset.py +++ /dev/null @@ -1,121 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import os - -from tensorlayer import logging -from tensorlayer import visualize - -from tensorlayer.files.utils import del_file -from tensorlayer.files.utils import folder_exists -from tensorlayer.files.utils import load_file_list -from tensorlayer.files.utils import load_folder_list -from tensorlayer.files.utils import maybe_download_and_extract -from tensorlayer.files.utils import read_file - -__all__ = ['load_flickr1M_dataset'] - - -def load_flickr1M_dataset(tag='sky', size=10, path="data", n_threads=50, printable=False): - """Load Flick1M dataset. - - Returns a list of images by a given tag from Flickr1M dataset, - it will download Flickr1M from `the official website `__ - at the first time you use it. - - Parameters - ------------ - tag : str or None - What images to return. - - If you want to get images with tag, use string like 'dog', 'red', see `Flickr Search `__. - - If you want to get all images, set to ``None``. - - size : int - integer between 1 to 10. 1 means 100k images ... 5 means 500k images, 10 means all 1 million images. Default is 10. - path : str - The path that the data is downloaded to, defaults is ``data/flickr25k/``. - n_threads : int - The number of thread to read image. - printable : boolean - Whether to print infomation when reading images, default is ``False``. - - Examples - ---------- - Use 200k images - - >>> images = tl.files.load_flickr1M_dataset(tag='zebra', size=2) - - Use 1 Million images - - >>> images = tl.files.load_flickr1M_dataset(tag='zebra') - - """ - import shutil - - path = os.path.join(path, 'flickr1M') - logging.info("[Flickr1M] using {}% of images = {}".format(size * 10, size * 100000)) - images_zip = [ - 'images0.zip', 'images1.zip', 'images2.zip', 'images3.zip', 'images4.zip', 'images5.zip', 'images6.zip', - 'images7.zip', 'images8.zip', 'images9.zip' - ] - tag_zip = 'tags.zip' - url = 'http://press.liacs.nl/mirflickr/mirflickr1m/' - - # download dataset - for image_zip in images_zip[0:size]: - image_folder = image_zip.split(".")[0] - # logging.info(path+"/"+image_folder) - if folder_exists(os.path.join(path, image_folder)) is False: - # logging.info(image_zip) - logging.info("[Flickr1M] {} is missing in {}".format(image_folder, path)) - maybe_download_and_extract(image_zip, path, url, extract=True) - del_file(os.path.join(path, image_zip)) - # os.system("mv {} {}".format(os.path.join(path, 'images'), os.path.join(path, image_folder))) - shutil.move(os.path.join(path, 'images'), os.path.join(path, image_folder)) - else: - logging.info("[Flickr1M] {} exists in {}".format(image_folder, path)) - - # download tag - if folder_exists(os.path.join(path, "tags")) is False: - logging.info("[Flickr1M] tag files is nonexistent in {}".format(path)) - maybe_download_and_extract(tag_zip, path, url, extract=True) - del_file(os.path.join(path, tag_zip)) - else: - logging.info("[Flickr1M] tags exists in {}".format(path)) - - # 1. image path list - images_list = [] - images_folder_list = [] - for i in range(0, size): - images_folder_list += load_folder_list(path=os.path.join(path, 'images%d' % i)) - images_folder_list.sort(key=lambda s: int(s.split('/')[-1])) # folder/images/ddd - - for folder in images_folder_list[0:size * 10]: - tmp = load_file_list(path=folder, regx='\\.jpg', printable=False) - tmp.sort(key=lambda s: int(s.split('.')[-2])) # ddd.jpg - images_list.extend([os.path.join(folder, x) for x in tmp]) - - # 2. tag path list - tag_list = [] - tag_folder_list = load_folder_list(os.path.join(path, "tags")) - - # tag_folder_list.sort(key=lambda s: int(s.split("/")[-1])) # folder/images/ddd - tag_folder_list.sort(key=lambda s: int(os.path.basename(s))) - - for folder in tag_folder_list[0:size * 10]: - tmp = load_file_list(path=folder, regx='\\.txt', printable=False) - tmp.sort(key=lambda s: int(s.split('.')[-2])) # ddd.txt - tmp = [os.path.join(folder, s) for s in tmp] - tag_list += tmp - - # 3. select images - logging.info("[Flickr1M] searching tag: {}".format(tag)) - select_images_list = [] - for idx, _val in enumerate(tag_list): - tags = read_file(tag_list[idx]).split('\n') - if tag in tags: - select_images_list.append(images_list[idx]) - - logging.info("[Flickr1M] reading images with tag: {}".format(tag)) - images = visualize.read_images(select_images_list, '', n_threads=n_threads, printable=printable) - return images diff --git a/tensorlayer/files/dataset_loaders/flickr_25k_dataset.py b/tensorlayer/files/dataset_loaders/flickr_25k_dataset.py deleted file mode 100644 index c452dbd..0000000 --- a/tensorlayer/files/dataset_loaders/flickr_25k_dataset.py +++ /dev/null @@ -1,86 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import os - -from tensorlayer import logging -from tensorlayer import visualize - -from tensorlayer.files.utils import del_file -from tensorlayer.files.utils import folder_exists -from tensorlayer.files.utils import load_file_list -from tensorlayer.files.utils import maybe_download_and_extract -from tensorlayer.files.utils import natural_keys -from tensorlayer.files.utils import read_file - -__all__ = ['load_flickr25k_dataset'] - - -def load_flickr25k_dataset(tag='sky', path="data", n_threads=50, printable=False): - """Load Flickr25K dataset. - - Returns a list of images by a given tag from Flick25k dataset, - it will download Flickr25k from `the official website `__ - at the first time you use it. - - Parameters - ------------ - tag : str or None - What images to return. - - If you want to get images with tag, use string like 'dog', 'red', see `Flickr Search `__. - - If you want to get all images, set to ``None``. - - path : str - The path that the data is downloaded to, defaults is ``data/flickr25k/``. - n_threads : int - The number of thread to read image. - printable : boolean - Whether to print infomation when reading images, default is ``False``. - - Examples - ----------- - Get images with tag of sky - - >>> images = tl.files.load_flickr25k_dataset(tag='sky') - - Get all images - - >>> images = tl.files.load_flickr25k_dataset(tag=None, n_threads=100, printable=True) - - """ - path = os.path.join(path, 'flickr25k') - - filename = 'mirflickr25k.zip' - url = 'http://press.liacs.nl/mirflickr/mirflickr25k/' - - # download dataset - if folder_exists(os.path.join(path, "mirflickr")) is False: - logging.info("[*] Flickr25k is nonexistent in {}".format(path)) - maybe_download_and_extract(filename, path, url, extract=True) - del_file(os.path.join(path, filename)) - - # return images by the given tag. - # 1. image path list - folder_imgs = os.path.join(path, "mirflickr") - path_imgs = load_file_list(path=folder_imgs, regx='\\.jpg', printable=False) - path_imgs.sort(key=natural_keys) - - # 2. tag path list - folder_tags = os.path.join(path, "mirflickr", "meta", "tags") - path_tags = load_file_list(path=folder_tags, regx='\\.txt', printable=False) - path_tags.sort(key=natural_keys) - - # 3. select images - if tag is None: - logging.info("[Flickr25k] reading all images") - else: - logging.info("[Flickr25k] reading images with tag: {}".format(tag)) - images_list = [] - for idx, _v in enumerate(path_tags): - tags = read_file(os.path.join(folder_tags, path_tags[idx])).split('\n') - # logging.info(idx+1, tags) - if tag is None or tag in tags: - images_list.append(path_imgs[idx]) - - images = visualize.read_images(images_list, folder_imgs, n_threads=n_threads, printable=printable) - return images diff --git a/tensorlayer/files/dataset_loaders/imdb_dataset.py b/tensorlayer/files/dataset_loaders/imdb_dataset.py deleted file mode 100644 index 6982bee..0000000 --- a/tensorlayer/files/dataset_loaders/imdb_dataset.py +++ /dev/null @@ -1,115 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import gzip -import os - -import numpy as np - -import six.moves.cPickle as pickle -from tensorlayer.files.utils import maybe_download_and_extract - -__all__ = ['load_imdb_dataset'] - - -def load_imdb_dataset( - path='data', nb_words=None, skip_top=0, maxlen=None, test_split=0.2, seed=113, start_char=1, oov_char=2, - index_from=3 -): - """Load IMDB dataset. - - Parameters - ---------- - path : str - The path that the data is downloaded to, defaults is ``data/imdb/``. - nb_words : int - Number of words to get. - skip_top : int - Top most frequent words to ignore (they will appear as oov_char value in the sequence data). - maxlen : int - Maximum sequence length. Any longer sequence will be truncated. - seed : int - Seed for reproducible data shuffling. - start_char : int - The start of a sequence will be marked with this character. Set to 1 because 0 is usually the padding character. - oov_char : int - Words that were cut out because of the num_words or skip_top limit will be replaced with this character. - index_from : int - Index actual words with this index and higher. - - Examples - -------- - >>> X_train, y_train, X_test, y_test = tl.files.load_imdb_dataset( - ... nb_words=20000, test_split=0.2) - >>> print('X_train.shape', X_train.shape) - (20000,) [[1, 62, 74, ... 1033, 507, 27],[1, 60, 33, ... 13, 1053, 7]..] - >>> print('y_train.shape', y_train.shape) - (20000,) [1 0 0 ..., 1 0 1] - - References - ----------- - - `Modified from keras. `__ - - """ - path = os.path.join(path, 'imdb') - - filename = "imdb.pkl" - url = 'https://s3.amazonaws.com/text-datasets/' - maybe_download_and_extract(filename, path, url) - - if filename.endswith(".gz"): - f = gzip.open(os.path.join(path, filename), 'rb') - else: - f = open(os.path.join(path, filename), 'rb') - - X, labels = pickle.load(f) - f.close() - - np.random.seed(seed) - np.random.shuffle(X) - np.random.seed(seed) - np.random.shuffle(labels) - - if start_char is not None: - X = [[start_char] + [w + index_from for w in x] for x in X] - elif index_from: - X = [[w + index_from for w in x] for x in X] - - if maxlen: - new_X = [] - new_labels = [] - for x, y in zip(X, labels): - if len(x) < maxlen: - new_X.append(x) - new_labels.append(y) - X = new_X - labels = new_labels - if not X: - raise Exception( - 'After filtering for sequences shorter than maxlen=' + str(maxlen) + ', no sequence was kept. ' - 'Increase maxlen.' - ) - if not nb_words: - nb_words = max([max(x) for x in X]) - - # by convention, use 2 as OOV word - # reserve 'index_from' (=3 by default) characters: 0 (padding), 1 (start), 2 (OOV) - if oov_char is not None: - X = [[oov_char if (w >= nb_words or w < skip_top) else w for w in x] for x in X] - else: - nX = [] - for x in X: - nx = [] - for w in x: - if (w >= nb_words or w < skip_top): - nx.append(w) - nX.append(nx) - X = nX - - X_train = np.array(X[:int(len(X) * (1 - test_split))]) - y_train = np.array(labels[:int(len(X) * (1 - test_split))]) - - X_test = np.array(X[int(len(X) * (1 - test_split)):]) - y_test = np.array(labels[int(len(X) * (1 - test_split)):]) - - return X_train, y_train, X_test, y_test diff --git a/tensorlayer/files/dataset_loaders/matt_mahoney_dataset.py b/tensorlayer/files/dataset_loaders/matt_mahoney_dataset.py deleted file mode 100644 index 3d6a6b6..0000000 --- a/tensorlayer/files/dataset_loaders/matt_mahoney_dataset.py +++ /dev/null @@ -1,50 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import os - -import zipfile - -from tensorlayer import logging - -from tensorlayer.files.utils import maybe_download_and_extract - -__all__ = ['load_matt_mahoney_text8_dataset'] - - -def load_matt_mahoney_text8_dataset(path='data'): - """Load Matt Mahoney's dataset. - - Download a text file from Matt Mahoney's website - if not present, and make sure it's the right size. - Extract the first file enclosed in a zip file as a list of words. - This dataset can be used for Word Embedding. - - Parameters - ---------- - path : str - The path that the data is downloaded to, defaults is ``data/mm_test8/``. - - Returns - -------- - list of str - The raw text data e.g. [.... 'their', 'families', 'who', 'were', 'expelled', 'from', 'jerusalem', ...] - - Examples - -------- - >>> words = tl.files.load_matt_mahoney_text8_dataset() - >>> print('Data size', len(words)) - - """ - path = os.path.join(path, 'mm_test8') - logging.info("Load or Download matt_mahoney_text8 Dataset> {}".format(path)) - - filename = 'text8.zip' - url = 'http://mattmahoney.net/dc/' - maybe_download_and_extract(filename, path, url, expected_bytes=31344016) - - with zipfile.ZipFile(os.path.join(path, filename)) as f: - word_list = f.read(f.namelist()[0]).split() - for idx, _ in enumerate(word_list): - word_list[idx] = word_list[idx].decode() - return word_list diff --git a/tensorlayer/files/dataset_loaders/mnist_dataset.py b/tensorlayer/files/dataset_loaders/mnist_dataset.py deleted file mode 100644 index 4e1346d..0000000 --- a/tensorlayer/files/dataset_loaders/mnist_dataset.py +++ /dev/null @@ -1,31 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -from tensorlayer.files.utils import _load_mnist_dataset - -__all__ = ['load_mnist_dataset'] - - -def load_mnist_dataset(shape=(-1, 784), path='data'): - """Load the original mnist. - - Automatically download MNIST dataset and return the training, validation and test set with 50000, 10000 and 10000 digit images respectively. - - Parameters - ---------- - shape : tuple - The shape of digit images (the default is (-1, 784), alternatively (-1, 28, 28, 1)). - path : str - The path that the data is downloaded to. - - Returns - ------- - X_train, y_train, X_val, y_val, X_test, y_test: tuple - Return splitted training/validation/test set respectively. - - Examples - -------- - >>> X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_mnist_dataset(shape=(-1,784), path='datasets') - >>> X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_mnist_dataset(shape=(-1, 28, 28, 1)) - """ - return _load_mnist_dataset(shape, path, name='mnist', url='http://yann.lecun.com/exdb/mnist/') diff --git a/tensorlayer/files/dataset_loaders/mnist_fashion_dataset.py b/tensorlayer/files/dataset_loaders/mnist_fashion_dataset.py deleted file mode 100644 index c7f1bb9..0000000 --- a/tensorlayer/files/dataset_loaders/mnist_fashion_dataset.py +++ /dev/null @@ -1,33 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -from tensorlayer.files.utils import _load_mnist_dataset - -__all__ = ['load_fashion_mnist_dataset'] - - -def load_fashion_mnist_dataset(shape=(-1, 784), path='data'): - """Load the fashion mnist. - - Automatically download fashion-MNIST dataset and return the training, validation and test set with 50000, 10000 and 10000 fashion images respectively, `examples `__. - - Parameters - ---------- - shape : tuple - The shape of digit images (the default is (-1, 784), alternatively (-1, 28, 28, 1)). - path : str - The path that the data is downloaded to. - - Returns - ------- - X_train, y_train, X_val, y_val, X_test, y_test: tuple - Return splitted training/validation/test set respectively. - - Examples - -------- - >>> X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_fashion_mnist_dataset(shape=(-1,784), path='datasets') - >>> X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_fashion_mnist_dataset(shape=(-1, 28, 28, 1)) - """ - return _load_mnist_dataset( - shape, path, name='fashion_mnist', url='http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/' - ) diff --git a/tensorlayer/files/dataset_loaders/mnist_utils.py b/tensorlayer/files/dataset_loaders/mnist_utils.py deleted file mode 100644 index 8991516..0000000 --- a/tensorlayer/files/dataset_loaders/mnist_utils.py +++ /dev/null @@ -1,77 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import os - -import gzip - -import numpy as np - -from tensorlayer import logging - -from tensorlayer.files.utils import maybe_download_and_extract - -__all__ = ["_load_mnist_dataset"] - - -def _load_mnist_dataset(shape, path, name='mnist', url='http://yann.lecun.com/exdb/mnist/'): - """A generic function to load mnist-like dataset. - - Parameters: - ---------- - shape : tuple - The shape of digit images. - path : str - The path that the data is downloaded to. - name : str - The dataset name you want to use(the default is 'mnist'). - url : str - The url of dataset(the default is 'http://yann.lecun.com/exdb/mnist/'). - """ - path = os.path.join(path, name) - - # Define functions for loading mnist-like data's images and labels. - # For convenience, they also download the requested files if needed. - def load_mnist_images(path, filename): - filepath = maybe_download_and_extract(filename, path, url) - - logging.info(filepath) - # Read the inputs in Yann LeCun's binary format. - with gzip.open(filepath, 'rb') as f: - data = np.frombuffer(f.read(), np.uint8, offset=16) - # The inputs are vectors now, we reshape them to monochrome 2D images, - # following the shape convention: (examples, channels, rows, columns) - data = data.reshape(shape) - # The inputs come as bytes, we convert them to float32 in range [0,1]. - # (Actually to range [0, 255/256], for compatibility to the version - # provided at http://deeplearning.net/data/mnist/mnist.pkl.gz.) - return data / np.float32(256) - - def load_mnist_labels(path, filename): - filepath = maybe_download_and_extract(filename, path, url) - # Read the labels in Yann LeCun's binary format. - with gzip.open(filepath, 'rb') as f: - data = np.frombuffer(f.read(), np.uint8, offset=8) - # The labels are vectors of integers now, that's exactly what we want. - return data - - # Download and read the training and test set images and labels. - logging.info("Load or Download {0} > {1}".format(name.upper(), path)) - X_train = load_mnist_images(path, 'train-images-idx3-ubyte.gz') - y_train = load_mnist_labels(path, 'train-labels-idx1-ubyte.gz') - X_test = load_mnist_images(path, 't10k-images-idx3-ubyte.gz') - y_test = load_mnist_labels(path, 't10k-labels-idx1-ubyte.gz') - - # We reserve the last 10000 training examples for validation. - X_train, X_val = X_train[:-10000], X_train[-10000:] - y_train, y_val = y_train[:-10000], y_train[-10000:] - - # We just return all the arrays in order, as expected in main(). - # (It doesn't matter how we do this as long as we can read them again.) - X_train = np.asarray(X_train, dtype=np.float32) - y_train = np.asarray(y_train, dtype=np.int32) - X_val = np.asarray(X_val, dtype=np.float32) - y_val = np.asarray(y_val, dtype=np.int32) - X_test = np.asarray(X_test, dtype=np.float32) - y_test = np.asarray(y_test, dtype=np.int32) - return X_train, y_train, X_val, y_val, X_test, y_test diff --git a/tensorlayer/files/dataset_loaders/mpii_dataset.py b/tensorlayer/files/dataset_loaders/mpii_dataset.py deleted file mode 100644 index 69fa228..0000000 --- a/tensorlayer/files/dataset_loaders/mpii_dataset.py +++ /dev/null @@ -1,256 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import os - -from tensorlayer import logging - -from tensorlayer.files.utils import del_file -from tensorlayer.files.utils import folder_exists -from tensorlayer.files.utils import load_file_list -from tensorlayer.files.utils import maybe_download_and_extract - -__all__ = ['load_mpii_pose_dataset'] - - -def load_mpii_pose_dataset(path='data', is_16_pos_only=False): - """Load MPII Human Pose Dataset. - - Parameters - ----------- - path : str - The path that the data is downloaded to. - is_16_pos_only : boolean - If True, only return the peoples contain 16 pose keypoints. (Usually be used for single person pose estimation) - - Returns - ---------- - img_train_list : list of str - The image directories of training data. - ann_train_list : list of dict - The annotations of training data. - img_test_list : list of str - The image directories of testing data. - ann_test_list : list of dict - The annotations of testing data. - - Examples - -------- - >>> import pprint - >>> import tensorlayer as tl - >>> img_train_list, ann_train_list, img_test_list, ann_test_list = tl.files.load_mpii_pose_dataset() - >>> image = tl.vis.read_image(img_train_list[0]) - >>> tl.vis.draw_mpii_pose_to_image(image, ann_train_list[0], 'image.png') - >>> pprint.pprint(ann_train_list[0]) - - References - ----------- - - `MPII Human Pose Dataset. CVPR 14 `__ - - `MPII Human Pose Models. CVPR 16 `__ - - `MPII Human Shape, Poselet Conditioned Pictorial Structures and etc `__ - - `MPII Keyponts and ID `__ - """ - path = os.path.join(path, 'mpii_human_pose') - logging.info("Load or Download MPII Human Pose > {}".format(path)) - - # annotation - url = "http://datasets.d2.mpi-inf.mpg.de/andriluka14cvpr/" - tar_filename = "mpii_human_pose_v1_u12_2.zip" - extracted_filename = "mpii_human_pose_v1_u12_2" - if folder_exists(os.path.join(path, extracted_filename)) is False: - logging.info("[MPII] (annotation) {} is nonexistent in {}".format(extracted_filename, path)) - maybe_download_and_extract(tar_filename, path, url, extract=True) - del_file(os.path.join(path, tar_filename)) - - # images - url = "http://datasets.d2.mpi-inf.mpg.de/andriluka14cvpr/" - tar_filename = "mpii_human_pose_v1.tar.gz" - extracted_filename2 = "images" - if folder_exists(os.path.join(path, extracted_filename2)) is False: - logging.info("[MPII] (images) {} is nonexistent in {}".format(extracted_filename, path)) - maybe_download_and_extract(tar_filename, path, url, extract=True) - del_file(os.path.join(path, tar_filename)) - - # parse annotation, format see http://human-pose.mpi-inf.mpg.de/#download - import scipy.io as sio - logging.info("reading annotations from mat file ...") - # mat = sio.loadmat(os.path.join(path, extracted_filename, "mpii_human_pose_v1_u12_1.mat")) - - # def fix_wrong_joints(joint): # https://github.com/mitmul/deeppose/blob/master/datasets/mpii_dataset.py - # if '12' in joint and '13' in joint and '2' in joint and '3' in joint: - # if ((joint['12'][0] < joint['13'][0]) and - # (joint['3'][0] < joint['2'][0])): - # joint['2'], joint['3'] = joint['3'], joint['2'] - # if ((joint['12'][0] > joint['13'][0]) and - # (joint['3'][0] > joint['2'][0])): - # joint['2'], joint['3'] = joint['3'], joint['2'] - # return joint - - ann_train_list = [] - ann_test_list = [] - img_train_list = [] - img_test_list = [] - - def save_joints(): - # joint_data_fn = os.path.join(path, 'data.json') - # fp = open(joint_data_fn, 'w') - mat = sio.loadmat(os.path.join(path, extracted_filename, "mpii_human_pose_v1_u12_1.mat")) - - for _, (anno, train_flag) in enumerate( # all images - zip(mat['RELEASE']['annolist'][0, 0][0], mat['RELEASE']['img_train'][0, 0][0])): - - img_fn = anno['image']['name'][0, 0][0] - train_flag = int(train_flag) - - # print(i, img_fn, train_flag) # DEBUG print all images - - if train_flag: - img_train_list.append(img_fn) - ann_train_list.append([]) - else: - img_test_list.append(img_fn) - ann_test_list.append([]) - - head_rect = [] - if 'x1' in str(anno['annorect'].dtype): - head_rect = zip( - [x1[0, 0] for x1 in anno['annorect']['x1'][0]], [y1[0, 0] for y1 in anno['annorect']['y1'][0]], - [x2[0, 0] for x2 in anno['annorect']['x2'][0]], [y2[0, 0] for y2 in anno['annorect']['y2'][0]] - ) - else: - head_rect = [] # TODO - - if 'annopoints' in str(anno['annorect'].dtype): - annopoints = anno['annorect']['annopoints'][0] - head_x1s = anno['annorect']['x1'][0] - head_y1s = anno['annorect']['y1'][0] - head_x2s = anno['annorect']['x2'][0] - head_y2s = anno['annorect']['y2'][0] - - for annopoint, head_x1, head_y1, head_x2, head_y2 in zip(annopoints, head_x1s, head_y1s, head_x2s, - head_y2s): - # if annopoint != []: - # if len(annopoint) != 0: - if annopoint.size: - head_rect = [ - float(head_x1[0, 0]), - float(head_y1[0, 0]), - float(head_x2[0, 0]), - float(head_y2[0, 0]) - ] - - # joint coordinates - annopoint = annopoint['point'][0, 0] - j_id = [str(j_i[0, 0]) for j_i in annopoint['id'][0]] - x = [x[0, 0] for x in annopoint['x'][0]] - y = [y[0, 0] for y in annopoint['y'][0]] - joint_pos = {} - for _j_id, (_x, _y) in zip(j_id, zip(x, y)): - joint_pos[int(_j_id)] = [float(_x), float(_y)] - # joint_pos = fix_wrong_joints(joint_pos) - - # visibility list - if 'is_visible' in str(annopoint.dtype): - vis = [v[0] if v.size > 0 else [0] for v in annopoint['is_visible'][0]] - vis = dict([(k, int(v[0])) if len(v) > 0 else v for k, v in zip(j_id, vis)]) - else: - vis = None - - # if len(joint_pos) == 16: - if ((is_16_pos_only ==True) and (len(joint_pos) == 16)) or (is_16_pos_only == False): - # only use image with 16 key points / or use all - data = { - 'filename': img_fn, - 'train': train_flag, - 'head_rect': head_rect, - 'is_visible': vis, - 'joint_pos': joint_pos - } - # print(json.dumps(data), file=fp) # py3 - if train_flag: - ann_train_list[-1].append(data) - else: - ann_test_list[-1].append(data) - - # def write_line(datum, fp): - # joints = sorted([[int(k), v] for k, v in datum['joint_pos'].items()]) - # joints = np.array([j for i, j in joints]).flatten() - # - # out = [datum['filename']] - # out.extend(joints) - # out = [str(o) for o in out] - # out = ','.join(out) - # - # print(out, file=fp) - - # def split_train_test(): - # # fp_test = open('data/mpii/test_joints.csv', 'w') - # fp_test = open(os.path.join(path, 'test_joints.csv'), 'w') - # # fp_train = open('data/mpii/train_joints.csv', 'w') - # fp_train = open(os.path.join(path, 'train_joints.csv'), 'w') - # # all_data = open('data/mpii/data.json').readlines() - # all_data = open(os.path.join(path, 'data.json')).readlines() - # N = len(all_data) - # N_test = int(N * 0.1) - # N_train = N - N_test - # - # print('N:{}'.format(N)) - # print('N_train:{}'.format(N_train)) - # print('N_test:{}'.format(N_test)) - # - # np.random.seed(1701) - # perm = np.random.permutation(N) - # test_indices = perm[:N_test] - # train_indices = perm[N_test:] - # - # print('train_indices:{}'.format(len(train_indices))) - # print('test_indices:{}'.format(len(test_indices))) - # - # for i in train_indices: - # datum = json.loads(all_data[i].strip()) - # write_line(datum, fp_train) - # - # for i in test_indices: - # datum = json.loads(all_data[i].strip()) - # write_line(datum, fp_test) - - save_joints() - # split_train_test() # - - ## read images dir - logging.info("reading images list ...") - img_dir = os.path.join(path, extracted_filename2) - _img_list = load_file_list(path=os.path.join(path, extracted_filename2), regx='\\.jpg', printable=False) - # ann_list = json.load(open(os.path.join(path, 'data.json'))) - for i, im in enumerate(img_train_list): - if im not in _img_list: - print('missing training image {} in {} (remove from img(ann)_train_list)'.format(im, img_dir)) - # img_train_list.remove(im) - del img_train_list[i] - del ann_train_list[i] - for i, im in enumerate(img_test_list): - if im not in _img_list: - print('missing testing image {} in {} (remove from img(ann)_test_list)'.format(im, img_dir)) - # img_test_list.remove(im) - del img_train_list[i] - del ann_train_list[i] - - ## check annotation and images - n_train_images = len(img_train_list) - n_test_images = len(img_test_list) - n_images = n_train_images + n_test_images - logging.info("n_images: {} n_train_images: {} n_test_images: {}".format(n_images, n_train_images, n_test_images)) - n_train_ann = len(ann_train_list) - n_test_ann = len(ann_test_list) - n_ann = n_train_ann + n_test_ann - logging.info("n_ann: {} n_train_ann: {} n_test_ann: {}".format(n_ann, n_train_ann, n_test_ann)) - n_train_people = len(sum(ann_train_list, [])) - n_test_people = len(sum(ann_test_list, [])) - n_people = n_train_people + n_test_people - logging.info("n_people: {} n_train_people: {} n_test_people: {}".format(n_people, n_train_people, n_test_people)) - # add path to all image file name - for i, value in enumerate(img_train_list): - img_train_list[i] = os.path.join(img_dir, value) - for i, value in enumerate(img_test_list): - img_test_list[i] = os.path.join(img_dir, value) - return img_train_list, ann_train_list, img_test_list, ann_test_list diff --git a/tensorlayer/files/dataset_loaders/nietzsche_dataset.py b/tensorlayer/files/dataset_loaders/nietzsche_dataset.py deleted file mode 100644 index 8c8dc5c..0000000 --- a/tensorlayer/files/dataset_loaders/nietzsche_dataset.py +++ /dev/null @@ -1,43 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import os - -from tensorlayer import logging - -from tensorlayer.files.utils import maybe_download_and_extract - -__all__ = ['load_nietzsche_dataset'] - - -def load_nietzsche_dataset(path='data'): - """Load Nietzsche dataset. - - Parameters - ---------- - path : str - The path that the data is downloaded to, defaults is ``data/nietzsche/``. - - Returns - -------- - str - The content. - - Examples - -------- - >>> see tutorial_generate_text.py - >>> words = tl.files.load_nietzsche_dataset() - >>> words = basic_clean_str(words) - >>> words = words.split() - - """ - logging.info("Load or Download nietzsche dataset > {}".format(path)) - path = os.path.join(path, 'nietzsche') - - filename = "nietzsche.txt" - url = 'https://s3.amazonaws.com/text-datasets/' - filepath = maybe_download_and_extract(filename, path, url) - - with open(filepath, "r") as f: - words = f.read() - return words diff --git a/tensorlayer/files/dataset_loaders/ptb_dataset.py b/tensorlayer/files/dataset_loaders/ptb_dataset.py deleted file mode 100644 index 2ae8f03..0000000 --- a/tensorlayer/files/dataset_loaders/ptb_dataset.py +++ /dev/null @@ -1,74 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import os - -from tensorlayer import nlp -from tensorlayer import logging - -from tensorlayer.files.utils import maybe_download_and_extract - -__all__ = ['load_ptb_dataset'] - - -def load_ptb_dataset(path='data'): - """Load Penn TreeBank (PTB) dataset. - - It is used in many LANGUAGE MODELING papers, - including "Empirical Evaluation and Combination of Advanced Language - Modeling Techniques", "Recurrent Neural Network Regularization". - It consists of 929k training words, 73k validation words, and 82k test - words. It has 10k words in its vocabulary. - - Parameters - ---------- - path : str - The path that the data is downloaded to, defaults is ``data/ptb/``. - - Returns - -------- - train_data, valid_data, test_data : list of int - The training, validating and testing data in integer format. - vocab_size : int - The vocabulary size. - - Examples - -------- - >>> train_data, valid_data, test_data, vocab_size = tl.files.load_ptb_dataset() - - References - --------------- - - ``tensorflow.models.rnn.ptb import reader`` - - `Manual download `__ - - Notes - ------ - - If you want to get the raw data, see the source code. - - """ - path = os.path.join(path, 'ptb') - logging.info("Load or Download Penn TreeBank (PTB) dataset > {}".format(path)) - - #Maybe dowload and uncompress tar, or load exsisting files - filename = 'simple-examples.tgz' - url = 'http://www.fit.vutbr.cz/~imikolov/rnnlm/' - maybe_download_and_extract(filename, path, url, extract=True) - - data_path = os.path.join(path, 'simple-examples', 'data') - train_path = os.path.join(data_path, "ptb.train.txt") - valid_path = os.path.join(data_path, "ptb.valid.txt") - test_path = os.path.join(data_path, "ptb.test.txt") - - word_to_id = nlp.build_vocab(nlp.read_words(train_path)) - - train_data = nlp.words_to_word_ids(nlp.read_words(train_path), word_to_id) - valid_data = nlp.words_to_word_ids(nlp.read_words(valid_path), word_to_id) - test_data = nlp.words_to_word_ids(nlp.read_words(test_path), word_to_id) - vocab_size = len(word_to_id) - - # logging.info(nlp.read_words(train_path)) # ... 'according', 'to', 'mr.', '', ''] - # logging.info(train_data) # ... 214, 5, 23, 1, 2] - # logging.info(word_to_id) # ... 'beyond': 1295, 'anti-nuclear': 9599, 'trouble': 1520, '': 2 ... } - # logging.info(vocabulary) # 10000 - # exit() - return train_data, valid_data, test_data, vocab_size diff --git a/tensorlayer/files/dataset_loaders/voc_dataset.py b/tensorlayer/files/dataset_loaders/voc_dataset.py deleted file mode 100644 index 5cfe272..0000000 --- a/tensorlayer/files/dataset_loaders/voc_dataset.py +++ /dev/null @@ -1,338 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import os - -from lxml import etree - -import tensorflow as tf - -from tensorlayer import logging - -from tensorlayer.files.utils import del_file -from tensorlayer.files.utils import del_folder -from tensorlayer.files.utils import folder_exists -from tensorlayer.files.utils import load_file_list -from tensorlayer.files.utils import maybe_download_and_extract - -from tensorlayer import utils - -__all__ = ['load_voc_dataset'] - - -def load_voc_dataset(path='data', dataset='2012', contain_classes_in_person=False): - """Pascal VOC 2007/2012 Dataset. - - It has 20 objects: - aeroplane, bicycle, bird, boat, bottle, bus, car, cat, chair, cow, diningtable, dog, horse, motorbike, person, pottedplant, sheep, sofa, train, tvmonitor - and additional 3 classes : head, hand, foot for person. - - Parameters - ----------- - path : str - The path that the data is downloaded to, defaults is ``data/VOC``. - dataset : str - The VOC dataset version, `2012`, `2007`, `2007test` or `2012test`. We usually train model on `2007+2012` and test it on `2007test`. - contain_classes_in_person : boolean - Whether include head, hand and foot annotation, default is False. - - Returns - --------- - imgs_file_list : list of str - Full paths of all images. - imgs_semseg_file_list : list of str - Full paths of all maps for semantic segmentation. Note that not all images have this map! - imgs_insseg_file_list : list of str - Full paths of all maps for instance segmentation. Note that not all images have this map! - imgs_ann_file_list : list of str - Full paths of all annotations for bounding box and object class, all images have this annotations. - classes : list of str - Classes in order. - classes_in_person : list of str - Classes in person. - classes_dict : dictionary - Class label to integer. - n_objs_list : list of int - Number of objects in all images in ``imgs_file_list`` in order. - objs_info_list : list of str - Darknet format for the annotation of all images in ``imgs_file_list`` in order. ``[class_id x_centre y_centre width height]`` in ratio format. - objs_info_dicts : dictionary - The annotation of all images in ``imgs_file_list``, ``{imgs_file_list : dictionary for annotation}``, - format from `TensorFlow/Models/object-detection `__. - - Examples - ---------- - >>> imgs_file_list, imgs_semseg_file_list, imgs_insseg_file_list, imgs_ann_file_list, - >>> classes, classes_in_person, classes_dict, - >>> n_objs_list, objs_info_list, objs_info_dicts = tl.files.load_voc_dataset(dataset="2012", contain_classes_in_person=False) - >>> idx = 26 - >>> print(classes) - ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] - >>> print(classes_dict) - {'sheep': 16, 'horse': 12, 'bicycle': 1, 'bottle': 4, 'cow': 9, 'sofa': 17, 'car': 6, 'dog': 11, 'cat': 7, 'person': 14, 'train': 18, 'diningtable': 10, 'aeroplane': 0, 'bus': 5, 'pottedplant': 15, 'tvmonitor': 19, 'chair': 8, 'bird': 2, 'boat': 3, 'motorbike': 13} - >>> print(imgs_file_list[idx]) - data/VOC/VOC2012/JPEGImages/2007_000423.jpg - >>> print(n_objs_list[idx]) - 2 - >>> print(imgs_ann_file_list[idx]) - data/VOC/VOC2012/Annotations/2007_000423.xml - >>> print(objs_info_list[idx]) - 14 0.173 0.461333333333 0.142 0.496 - 14 0.828 0.542666666667 0.188 0.594666666667 - >>> ann = tl.prepro.parse_darknet_ann_str_to_list(objs_info_list[idx]) - >>> print(ann) - [[14, 0.173, 0.461333333333, 0.142, 0.496], [14, 0.828, 0.542666666667, 0.188, 0.594666666667]] - >>> c, b = tl.prepro.parse_darknet_ann_list_to_cls_box(ann) - >>> print(c, b) - [14, 14] [[0.173, 0.461333333333, 0.142, 0.496], [0.828, 0.542666666667, 0.188, 0.594666666667]] - - References - ------------- - - `Pascal VOC2012 Website `__. - - `Pascal VOC2007 Website `__. - - """ - path = os.path.join(path, 'VOC') - - def _recursive_parse_xml_to_dict(xml): - """Recursively parses XML contents to python dict. - - We assume that `object` tags are the only ones that can appear - multiple times at the same level of a tree. - - Args: - xml: xml tree obtained by parsing XML file contents using lxml.etree - - Returns: - Python dictionary holding XML contents. - - """ - if xml is not None: - return {xml.tag: xml.text} - result = {} - for child in xml: - child_result = _recursive_parse_xml_to_dict(child) - if child.tag != 'object': - result[child.tag] = child_result[child.tag] - else: - if child.tag not in result: - result[child.tag] = [] - result[child.tag].append(child_result[child.tag]) - return {xml.tag: result} - - import xml.etree.ElementTree as ET - - if dataset == "2012": - url = "http://pjreddie.com/media/files/" - tar_filename = "VOCtrainval_11-May-2012.tar" - extracted_filename = "VOC2012" #"VOCdevkit/VOC2012" - logging.info(" [============= VOC 2012 =============]") - elif dataset == "2012test": - extracted_filename = "VOC2012test" #"VOCdevkit/VOC2012" - logging.info(" [============= VOC 2012 Test Set =============]") - logging.info( - " \nAuthor: 2012test only have person annotation, so 2007test is highly recommended for testing !\n" - ) - import time - time.sleep(3) - if os.path.isdir(os.path.join(path, extracted_filename)) is False: - logging.info("For VOC 2012 Test data - online registration required") - logging.info( - " Please download VOC2012test.tar from: \n register: http://host.robots.ox.ac.uk:8080 \n voc2012 : http://host.robots.ox.ac.uk:8080/eval/challenges/voc2012/ \ndownload: http://host.robots.ox.ac.uk:8080/eval/downloads/VOC2012test.tar" - ) - logging.info(" unzip VOC2012test.tar,rename the folder to VOC2012test and put it into %s" % path) - exit() - # # http://host.robots.ox.ac.uk:8080/eval/downloads/VOC2012test.tar - # url = "http://host.robots.ox.ac.uk:8080/eval/downloads/" - # tar_filename = "VOC2012test.tar" - elif dataset == "2007": - url = "http://pjreddie.com/media/files/" - tar_filename = "VOCtrainval_06-Nov-2007.tar" - extracted_filename = "VOC2007" - logging.info(" [============= VOC 2007 =============]") - elif dataset == "2007test": - # http://host.robots.ox.ac.uk/pascal/VOC/voc2007/index.html#testdata - # http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar - url = "http://pjreddie.com/media/files/" - tar_filename = "VOCtest_06-Nov-2007.tar" - extracted_filename = "VOC2007test" - logging.info(" [============= VOC 2007 Test Set =============]") - else: - raise Exception("Please set the dataset aug to 2012, 2012test or 2007.") - - # download dataset - if dataset != "2012test": - from sys import platform as _platform - if folder_exists(os.path.join(path, extracted_filename)) is False: - logging.info("[VOC] {} is nonexistent in {}".format(extracted_filename, path)) - maybe_download_and_extract(tar_filename, path, url, extract=True) - del_file(os.path.join(path, tar_filename)) - if dataset == "2012": - if _platform == "win32": - os.system("move {}\VOCdevkit\VOC2012 {}\VOC2012".format(path, path)) - else: - os.system("mv {}/VOCdevkit/VOC2012 {}/VOC2012".format(path, path)) - elif dataset == "2007": - if _platform == "win32": - os.system("move {}\VOCdevkit\VOC2007 {}\VOC2007".format(path, path)) - else: - os.system("mv {}/VOCdevkit/VOC2007 {}/VOC2007".format(path, path)) - elif dataset == "2007test": - if _platform == "win32": - os.system("move {}\VOCdevkit\VOC2007 {}\VOC2007test".format(path, path)) - else: - os.system("mv {}/VOCdevkit/VOC2007 {}/VOC2007test".format(path, path)) - del_folder(os.path.join(path, 'VOCdevkit')) - # object classes(labels) NOTE: YOU CAN CUSTOMIZE THIS LIST - classes = [ - "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", - "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor" - ] - if contain_classes_in_person: - classes_in_person = ["head", "hand", "foot"] - else: - classes_in_person = [] - - classes += classes_in_person # use extra 3 classes for person - - classes_dict = utils.list_string_to_dict(classes) - logging.info("[VOC] object classes {}".format(classes_dict)) - - # 1. image path list - # folder_imgs = path+"/"+extracted_filename+"/JPEGImages/" - folder_imgs = os.path.join(path, extracted_filename, "JPEGImages") - imgs_file_list = load_file_list(path=folder_imgs, regx='\\.jpg', printable=False) - logging.info("[VOC] {} images found".format(len(imgs_file_list))) - - imgs_file_list.sort( - key=lambda s: int(s.replace('.', ' ').replace('_', '').split(' ')[-2]) - ) # 2007_000027.jpg --> 2007000027 - - imgs_file_list = [os.path.join(folder_imgs, s) for s in imgs_file_list] - # logging.info('IM',imgs_file_list[0::3333], imgs_file_list[-1]) - if dataset != "2012test": - ##======== 2. semantic segmentation maps path list - # folder_semseg = path+"/"+extracted_filename+"/SegmentationClass/" - folder_semseg = os.path.join(path, extracted_filename, "SegmentationClass") - imgs_semseg_file_list = load_file_list(path=folder_semseg, regx='\\.png', printable=False) - logging.info("[VOC] {} maps for semantic segmentation found".format(len(imgs_semseg_file_list))) - imgs_semseg_file_list.sort( - key=lambda s: int(s.replace('.', ' ').replace('_', '').split(' ')[-2]) - ) # 2007_000032.png --> 2007000032 - imgs_semseg_file_list = [os.path.join(folder_semseg, s) for s in imgs_semseg_file_list] - # logging.info('Semantic Seg IM',imgs_semseg_file_list[0::333], imgs_semseg_file_list[-1]) - ##======== 3. instance segmentation maps path list - # folder_insseg = path+"/"+extracted_filename+"/SegmentationObject/" - folder_insseg = os.path.join(path, extracted_filename, "SegmentationObject") - imgs_insseg_file_list = load_file_list(path=folder_insseg, regx='\\.png', printable=False) - logging.info("[VOC] {} maps for instance segmentation found".format(len(imgs_semseg_file_list))) - imgs_insseg_file_list.sort( - key=lambda s: int(s.replace('.', ' ').replace('_', '').split(' ')[-2]) - ) # 2007_000032.png --> 2007000032 - imgs_insseg_file_list = [os.path.join(folder_insseg, s) for s in imgs_insseg_file_list] - # logging.info('Instance Seg IM',imgs_insseg_file_list[0::333], imgs_insseg_file_list[-1]) - else: - imgs_semseg_file_list = [] - imgs_insseg_file_list = [] - # 4. annotations for bounding box and object class - # folder_ann = path+"/"+extracted_filename+"/Annotations/" - folder_ann = os.path.join(path, extracted_filename, "Annotations") - imgs_ann_file_list = load_file_list(path=folder_ann, regx='\\.xml', printable=False) - logging.info( - "[VOC] {} XML annotation files for bounding box and object class found".format(len(imgs_ann_file_list)) - ) - imgs_ann_file_list.sort( - key=lambda s: int(s.replace('.', ' ').replace('_', '').split(' ')[-2]) - ) # 2007_000027.xml --> 2007000027 - imgs_ann_file_list = [os.path.join(folder_ann, s) for s in imgs_ann_file_list] - # logging.info('ANN',imgs_ann_file_list[0::3333], imgs_ann_file_list[-1]) - - if dataset == "2012test": # remove unused images in JPEG folder - imgs_file_list_new = [] - for ann in imgs_ann_file_list: - ann = os.path.split(ann)[-1].split('.')[0] - for im in imgs_file_list: - if ann in im: - imgs_file_list_new.append(im) - break - imgs_file_list = imgs_file_list_new - logging.info("[VOC] keep %d images" % len(imgs_file_list_new)) - - # parse XML annotations - def convert(size, box): - dw = 1. / size[0] - dh = 1. / size[1] - x = (box[0] + box[1]) / 2.0 - y = (box[2] + box[3]) / 2.0 - w = box[1] - box[0] - h = box[3] - box[2] - x = x * dw - w = w * dw - y = y * dh - h = h * dh - return x, y, w, h - - def convert_annotation(file_name): - """Given VOC2012 XML Annotations, returns number of objects and info.""" - in_file = open(file_name) - out_file = "" - tree = ET.parse(in_file) - root = tree.getroot() - size = root.find('size') - w = int(size.find('width').text) - h = int(size.find('height').text) - n_objs = 0 - - for obj in root.iter('object'): - if dataset != "2012test": - difficult = obj.find('difficult').text - cls = obj.find('name').text - if cls not in classes or int(difficult) == 1: - continue - else: - cls = obj.find('name').text - if cls not in classes: - continue - cls_id = classes.index(cls) - xmlbox = obj.find('bndbox') - b = ( - float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), - float(xmlbox.find('ymax').text) - ) - bb = convert((w, h), b) - - out_file += str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n' - n_objs += 1 - if cls in "person": - for part in obj.iter('part'): - cls = part.find('name').text - if cls not in classes_in_person: - continue - cls_id = classes.index(cls) - xmlbox = part.find('bndbox') - b = ( - float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), - float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text) - ) - bb = convert((w, h), b) - # out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') - out_file += str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n' - n_objs += 1 - in_file.close() - return n_objs, out_file - - logging.info("[VOC] Parsing xml annotations files") - n_objs_list = [] - objs_info_list = [] # Darknet Format list of string - objs_info_dicts = {} - for idx, ann_file in enumerate(imgs_ann_file_list): - n_objs, objs_info = convert_annotation(ann_file) - n_objs_list.append(n_objs) - objs_info_list.append(objs_info) - with tf.io.gfile.GFile(ann_file, 'r') as fid: - xml_str = fid.read() - xml = etree.fromstring(xml_str) - data = _recursive_parse_xml_to_dict(xml)['annotation'] - objs_info_dicts.update({imgs_file_list[idx]: data}) - - return imgs_file_list, imgs_semseg_file_list, imgs_insseg_file_list, imgs_ann_file_list, classes, classes_in_person, classes_dict, n_objs_list, objs_info_list, objs_info_dicts diff --git a/tensorlayer/files/dataset_loaders/wmt_en_fr_dataset.py b/tensorlayer/files/dataset_loaders/wmt_en_fr_dataset.py deleted file mode 100644 index 585f4c0..0000000 --- a/tensorlayer/files/dataset_loaders/wmt_en_fr_dataset.py +++ /dev/null @@ -1,82 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import os - -import gzip -import tarfile - -from tensorflow.python.platform import gfile - -from tensorlayer import logging - -from tensorlayer.files.utils import maybe_download_and_extract - -__all__ = ['load_wmt_en_fr_dataset'] - - -def load_wmt_en_fr_dataset(path='data'): - """Load WMT'15 English-to-French translation dataset. - - It will download the data from the WMT'15 Website (10^9-French-English corpus), and the 2013 news test from the same site as development set. - Returns the directories of training data and test data. - - Parameters - ---------- - path : str - The path that the data is downloaded to, defaults is ``data/wmt_en_fr/``. - - References - ---------- - - Code modified from /tensorflow/models/rnn/translation/data_utils.py - - Notes - ----- - Usually, it will take a long time to download this dataset. - - """ - path = os.path.join(path, 'wmt_en_fr') - # URLs for WMT data. - _WMT_ENFR_TRAIN_URL = "http://www.statmt.org/wmt10/" - _WMT_ENFR_DEV_URL = "http://www.statmt.org/wmt15/" - - def gunzip_file(gz_path, new_path): - """Unzips from gz_path into new_path.""" - logging.info("Unpacking %s to %s" % (gz_path, new_path)) - with gzip.open(gz_path, "rb") as gz_file: - with open(new_path, "wb") as new_file: - for line in gz_file: - new_file.write(line) - - def get_wmt_enfr_train_set(path): - """Download the WMT en-fr training corpus to directory unless it's there.""" - filename = "training-giga-fren.tar" - maybe_download_and_extract(filename, path, _WMT_ENFR_TRAIN_URL, extract=True) - train_path = os.path.join(path, "giga-fren.release2.fixed") - gunzip_file(train_path + ".fr.gz", train_path + ".fr") - gunzip_file(train_path + ".en.gz", train_path + ".en") - return train_path - - def get_wmt_enfr_dev_set(path): - """Download the WMT en-fr training corpus to directory unless it's there.""" - filename = "dev-v2.tgz" - dev_file = maybe_download_and_extract(filename, path, _WMT_ENFR_DEV_URL, extract=False) - dev_name = "newstest2013" - dev_path = os.path.join(path, "newstest2013") - if not (gfile.Exists(dev_path + ".fr") and gfile.Exists(dev_path + ".en")): - logging.info("Extracting tgz file %s" % dev_file) - with tarfile.open(dev_file, "r:gz") as dev_tar: - fr_dev_file = dev_tar.getmember("dev/" + dev_name + ".fr") - en_dev_file = dev_tar.getmember("dev/" + dev_name + ".en") - fr_dev_file.name = dev_name + ".fr" # Extract without "dev/" prefix. - en_dev_file.name = dev_name + ".en" - dev_tar.extract(fr_dev_file, path) - dev_tar.extract(en_dev_file, path) - return dev_path - - logging.info("Load or Download WMT English-to-French translation > {}".format(path)) - - train_path = get_wmt_enfr_train_set(path) - dev_path = get_wmt_enfr_dev_set(path) - - return train_path, dev_path diff --git a/tensorlayer/files/utils.py b/tensorlayer/files/utils.py deleted file mode 100644 index 12152f0..0000000 --- a/tensorlayer/files/utils.py +++ /dev/null @@ -1,2544 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import os -# import ast -import sys -import gzip -import math -import pickle -import progressbar -import re -import requests -import shutil -import tarfile -import time -import zipfile -import importlib -import h5py -from tqdm import tqdm - -from six.moves import cPickle -# from six.moves import zip - -from lxml import etree -import xml.etree.ElementTree as ET - -if sys.version_info[0] == 2: - from urllib import urlretrieve -else: - from urllib.request import urlretrieve - -# Fix error on OSX, as suggested by: https://stackoverflow.com/a/48374671 -# See: https://docs.python.org/3/library/sys.html#sys.platform -if sys.platform.startswith('darwin'): - import matplotlib - matplotlib.use('TkAgg') - -import matplotlib.pyplot as plt - -import scipy.io as sio -import numpy as np - -import tensorflow as tf -from tensorflow.python.platform import gfile -# import tensorflow.contrib.eager.python.saver as tfes -# TODO: tf2.0 not stable, cannot import tensorflow.contrib.eager.python.saver - -import tensorlayer as tl -from tensorlayer import logging -from tensorlayer import nlp -from tensorlayer import utils -from tensorlayer import visualize - -__all__ = [ - 'assign_weights', - 'del_file', - 'del_folder', - 'download_file_from_google_drive', - 'exists_or_mkdir', - 'file_exists', - 'folder_exists', - 'load_and_assign_npz', - 'load_and_assign_npz_dict', - 'load_ckpt', - 'load_cropped_svhn', - 'load_file_list', - 'load_folder_list', - 'load_npy_to_any', - 'load_npz', - 'maybe_download_and_extract', - 'natural_keys', - 'npz_to_W_pdf', - 'read_file', - 'save_any_to_npy', - 'save_ckpt', - 'save_npz', - 'save_npz_dict', - 'tf_variables_to_numpy', - 'assign_tf_variable', - 'save_weights_to_hdf5', - 'load_hdf5_to_weights_in_order', - 'load_hdf5_to_weights', - #'save_graph', - #'load_graph', - #'save_graph_and_params', - #'load_graph_and_params', -] - - -# Load dataset functions -def load_mnist_dataset(shape=(-1, 784), path='data'): - """Load the original mnist. - - Automatically download MNIST dataset and return the training, validation and test set with 50000, 10000 and 10000 digit images respectively. - - Parameters - ---------- - shape : tuple - The shape of digit images (the default is (-1, 784), alternatively (-1, 28, 28, 1)). - path : str - The path that the data is downloaded to. - - Returns - ------- - X_train, y_train, X_val, y_val, X_test, y_test: tuple - Return splitted training/validation/test set respectively. - - Examples - -------- - >>> X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_mnist_dataset(shape=(-1,784), path='datasets') - >>> X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_mnist_dataset(shape=(-1, 28, 28, 1)) - """ - return _load_mnist_dataset(shape, path, name='mnist', url='http://yann.lecun.com/exdb/mnist/') - - -def load_fashion_mnist_dataset(shape=(-1, 784), path='data'): - """Load the fashion mnist. - - Automatically download fashion-MNIST dataset and return the training, validation and test set with 50000, 10000 and 10000 fashion images respectively, `examples `__. - - Parameters - ---------- - shape : tuple - The shape of digit images (the default is (-1, 784), alternatively (-1, 28, 28, 1)). - path : str - The path that the data is downloaded to. - - Returns - ------- - X_train, y_train, X_val, y_val, X_test, y_test: tuple - Return splitted training/validation/test set respectively. - - Examples - -------- - >>> X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_fashion_mnist_dataset(shape=(-1,784), path='datasets') - >>> X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_fashion_mnist_dataset(shape=(-1, 28, 28, 1)) - """ - return _load_mnist_dataset( - shape, path, name='fashion_mnist', url='http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/' - ) - - -def _load_mnist_dataset(shape, path, name='mnist', url='http://yann.lecun.com/exdb/mnist/'): - """A generic function to load mnist-like dataset. - - Parameters: - ---------- - shape : tuple - The shape of digit images. - path : str - The path that the data is downloaded to. - name : str - The dataset name you want to use(the default is 'mnist'). - url : str - The url of dataset(the default is 'http://yann.lecun.com/exdb/mnist/'). - """ - path = os.path.join(path, name) - - # Define functions for loading mnist-like data's images and labels. - # For convenience, they also download the requested files if needed. - def load_mnist_images(path, filename): - filepath = maybe_download_and_extract(filename, path, url) - - logging.info(filepath) - # Read the inputs in Yann LeCun's binary format. - with gzip.open(filepath, 'rb') as f: - data = np.frombuffer(f.read(), np.uint8, offset=16) - # The inputs are vectors now, we reshape them to monochrome 2D images, - # following the shape convention: (examples, channels, rows, columns) - data = data.reshape(shape) - # The inputs come as bytes, we convert them to float32 in range [0,1]. - # (Actually to range [0, 255/256], for compatibility to the version - # provided at http://deeplearning.net/data/mnist/mnist.pkl.gz.) - return data / np.float32(256) - - def load_mnist_labels(path, filename): - filepath = maybe_download_and_extract(filename, path, url) - # Read the labels in Yann LeCun's binary format. - with gzip.open(filepath, 'rb') as f: - data = np.frombuffer(f.read(), np.uint8, offset=8) - # The labels are vectors of integers now, that's exactly what we want. - return data - - # Download and read the training and test set images and labels. - logging.info("Load or Download {0} > {1}".format(name.upper(), path)) - X_train = load_mnist_images(path, 'train-images-idx3-ubyte.gz') - y_train = load_mnist_labels(path, 'train-labels-idx1-ubyte.gz') - X_test = load_mnist_images(path, 't10k-images-idx3-ubyte.gz') - y_test = load_mnist_labels(path, 't10k-labels-idx1-ubyte.gz') - - # We reserve the last 10000 training examples for validation. - X_train, X_val = X_train[:-10000], X_train[-10000:] - y_train, y_val = y_train[:-10000], y_train[-10000:] - - # We just return all the arrays in order, as expected in main(). - # (It doesn't matter how we do this as long as we can read them again.) - X_train = np.asarray(X_train, dtype=np.float32) - y_train = np.asarray(y_train, dtype=np.int32) - X_val = np.asarray(X_val, dtype=np.float32) - y_val = np.asarray(y_val, dtype=np.int32) - X_test = np.asarray(X_test, dtype=np.float32) - y_test = np.asarray(y_test, dtype=np.int32) - return X_train, y_train, X_val, y_val, X_test, y_test - - -def load_cifar10_dataset(shape=(-1, 32, 32, 3), path='data', plotable=False): - """Load CIFAR-10 dataset. - - It consists of 60000 32x32 colour images in 10 classes, with - 6000 images per class. There are 50000 training images and 10000 test images. - - The dataset is divided into five training batches and one test batch, each with - 10000 images. The test batch contains exactly 1000 randomly-selected images from - each class. The training batches contain the remaining images in random order, - but some training batches may contain more images from one class than another. - Between them, the training batches contain exactly 5000 images from each class. - - Parameters - ---------- - shape : tupe - The shape of digit images e.g. (-1, 3, 32, 32) and (-1, 32, 32, 3). - path : str - The path that the data is downloaded to, defaults is ``data/cifar10/``. - plotable : boolean - Whether to plot some image examples, False as default. - - Examples - -------- - >>> X_train, y_train, X_test, y_test = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3)) - - References - ---------- - - `CIFAR website `__ - - `Data download link `__ - - ``__ - - """ - path = os.path.join(path, 'cifar10') - logging.info("Load or Download cifar10 > {}".format(path)) - - # Helper function to unpickle the data - def unpickle(file): - fp = open(file, 'rb') - if sys.version_info.major == 2: - data = pickle.load(fp) - elif sys.version_info.major == 3: - data = pickle.load(fp, encoding='latin-1') - fp.close() - return data - - filename = 'cifar-10-python.tar.gz' - url = 'https://www.cs.toronto.edu/~kriz/' - # Download and uncompress file - maybe_download_and_extract(filename, path, url, extract=True) - - # Unpickle file and fill in data - X_train = None - y_train = [] - for i in range(1, 6): - data_dic = unpickle(os.path.join(path, 'cifar-10-batches-py/', "data_batch_{}".format(i))) - if i == 1: - X_train = data_dic['data'] - else: - X_train = np.vstack((X_train, data_dic['data'])) - y_train += data_dic['labels'] - - test_data_dic = unpickle(os.path.join(path, 'cifar-10-batches-py/', "test_batch")) - X_test = test_data_dic['data'] - y_test = np.array(test_data_dic['labels']) - - if shape == (-1, 3, 32, 32): - X_test = X_test.reshape(shape) - X_train = X_train.reshape(shape) - elif shape == (-1, 32, 32, 3): - X_test = X_test.reshape(shape, order='F') - X_train = X_train.reshape(shape, order='F') - X_test = np.transpose(X_test, (0, 2, 1, 3)) - X_train = np.transpose(X_train, (0, 2, 1, 3)) - else: - X_test = X_test.reshape(shape) - X_train = X_train.reshape(shape) - - y_train = np.array(y_train) - - if plotable: - logging.info('\nCIFAR-10') - fig = plt.figure(1) - - logging.info('Shape of a training image: X_train[0] %s' % X_train[0].shape) - - plt.ion() # interactive mode - count = 1 - for _ in range(10): # each row - for _ in range(10): # each column - _ = fig.add_subplot(10, 10, count) - if shape == (-1, 3, 32, 32): - # plt.imshow(X_train[count-1], interpolation='nearest') - plt.imshow(np.transpose(X_train[count - 1], (1, 2, 0)), interpolation='nearest') - # plt.imshow(np.transpose(X_train[count-1], (2, 1, 0)), interpolation='nearest') - elif shape == (-1, 32, 32, 3): - plt.imshow(X_train[count - 1], interpolation='nearest') - # plt.imshow(np.transpose(X_train[count-1], (1, 0, 2)), interpolation='nearest') - else: - raise Exception("Do not support the given 'shape' to plot the image examples") - plt.gca().xaxis.set_major_locator(plt.NullLocator()) # 不显示刻度(tick) - plt.gca().yaxis.set_major_locator(plt.NullLocator()) - count = count + 1 - plt.draw() # interactive mode - plt.pause(3) # interactive mode - - logging.info("X_train: %s" % X_train.shape) - logging.info("y_train: %s" % y_train.shape) - logging.info("X_test: %s" % X_test.shape) - logging.info("y_test: %s" % y_test.shape) - - X_train = np.asarray(X_train, dtype=np.float32) - X_test = np.asarray(X_test, dtype=np.float32) - y_train = np.asarray(y_train, dtype=np.int32) - y_test = np.asarray(y_test, dtype=np.int32) - - return X_train, y_train, X_test, y_test - - -def load_cropped_svhn(path='data', include_extra=True): - """Load Cropped SVHN. - - The Cropped Street View House Numbers (SVHN) Dataset contains 32x32x3 RGB images. - Digit '1' has label 1, '9' has label 9 and '0' has label 0 (the original dataset uses 10 to represent '0'), see `ufldl website `__. - - Parameters - ---------- - path : str - The path that the data is downloaded to. - include_extra : boolean - If True (default), add extra images to the training set. - - Returns - ------- - X_train, y_train, X_test, y_test: tuple - Return splitted training/test set respectively. - - Examples - --------- - >>> X_train, y_train, X_test, y_test = tl.files.load_cropped_svhn(include_extra=False) - >>> tl.vis.save_images(X_train[0:100], [10, 10], 'svhn.png') - - """ - start_time = time.time() - - path = os.path.join(path, 'cropped_svhn') - logging.info("Load or Download Cropped SVHN > {} | include extra images: {}".format(path, include_extra)) - url = "http://ufldl.stanford.edu/housenumbers/" - - np_file = os.path.join(path, "train_32x32.npz") - if file_exists(np_file) is False: - filename = "train_32x32.mat" - filepath = maybe_download_and_extract(filename, path, url) - mat = sio.loadmat(filepath) - X_train = mat['X'] / 255.0 # to [0, 1] - X_train = np.transpose(X_train, (3, 0, 1, 2)) - y_train = np.squeeze(mat['y'], axis=1) - y_train[y_train == 10] = 0 # replace 10 to 0 - np.savez(np_file, X=X_train, y=y_train) - del_file(filepath) - else: - v = np.load(np_file) - X_train = v['X'] - y_train = v['y'] - logging.info(" n_train: {}".format(len(y_train))) - - np_file = os.path.join(path, "test_32x32.npz") - if file_exists(np_file) is False: - filename = "test_32x32.mat" - filepath = maybe_download_and_extract(filename, path, url) - mat = sio.loadmat(filepath) - X_test = mat['X'] / 255.0 - X_test = np.transpose(X_test, (3, 0, 1, 2)) - y_test = np.squeeze(mat['y'], axis=1) - y_test[y_test == 10] = 0 - np.savez(np_file, X=X_test, y=y_test) - del_file(filepath) - else: - v = np.load(np_file) - X_test = v['X'] - y_test = v['y'] - logging.info(" n_test: {}".format(len(y_test))) - - if include_extra: - logging.info(" getting extra 531131 images, please wait ...") - np_file = os.path.join(path, "extra_32x32.npz") - if file_exists(np_file) is False: - logging.info(" the first time to load extra images will take long time to convert the file format ...") - filename = "extra_32x32.mat" - filepath = maybe_download_and_extract(filename, path, url) - mat = sio.loadmat(filepath) - X_extra = mat['X'] / 255.0 - X_extra = np.transpose(X_extra, (3, 0, 1, 2)) - y_extra = np.squeeze(mat['y'], axis=1) - y_extra[y_extra == 10] = 0 - np.savez(np_file, X=X_extra, y=y_extra) - del_file(filepath) - else: - v = np.load(np_file) - X_extra = v['X'] - y_extra = v['y'] - # print(X_train.shape, X_extra.shape) - logging.info(" adding n_extra {} to n_train {}".format(len(y_extra), len(y_train))) - t = time.time() - X_train = np.concatenate((X_train, X_extra), 0) - y_train = np.concatenate((y_train, y_extra), 0) - # X_train = np.append(X_train, X_extra, axis=0) - # y_train = np.append(y_train, y_extra, axis=0) - logging.info(" added n_extra {} to n_train {} took {}s".format(len(y_extra), len(y_train), time.time() - t)) - else: - logging.info(" no extra images are included") - logging.info(" image size: %s n_train: %d n_test: %d" % (str(X_train.shape[1:4]), len(y_train), len(y_test))) - logging.info(" took: {}s".format(int(time.time() - start_time))) - return X_train, y_train, X_test, y_test - - -def load_ptb_dataset(path='data'): - """Load Penn TreeBank (PTB) dataset. - - It is used in many LANGUAGE MODELING papers, - including "Empirical Evaluation and Combination of Advanced Language - Modeling Techniques", "Recurrent Neural Network Regularization". - It consists of 929k training words, 73k validation words, and 82k test - words. It has 10k words in its vocabulary. - - Parameters - ---------- - path : str - The path that the data is downloaded to, defaults is ``data/ptb/``. - - Returns - -------- - train_data, valid_data, test_data : list of int - The training, validating and testing data in integer format. - vocab_size : int - The vocabulary size. - - Examples - -------- - >>> train_data, valid_data, test_data, vocab_size = tl.files.load_ptb_dataset() - - References - --------------- - - ``tensorflow.models.rnn.ptb import reader`` - - `Manual download `__ - - Notes - ------ - - If you want to get the raw data, see the source code. - - """ - path = os.path.join(path, 'ptb') - logging.info("Load or Download Penn TreeBank (PTB) dataset > {}".format(path)) - - # Maybe dowload and uncompress tar, or load exsisting files - filename = 'simple-examples.tgz' - url = 'http://www.fit.vutbr.cz/~imikolov/rnnlm/' - maybe_download_and_extract(filename, path, url, extract=True) - - data_path = os.path.join(path, 'simple-examples', 'data') - train_path = os.path.join(data_path, "ptb.train.txt") - valid_path = os.path.join(data_path, "ptb.valid.txt") - test_path = os.path.join(data_path, "ptb.test.txt") - - word_to_id = nlp.build_vocab(nlp.read_words(train_path)) - - train_data = nlp.words_to_word_ids(nlp.read_words(train_path), word_to_id) - valid_data = nlp.words_to_word_ids(nlp.read_words(valid_path), word_to_id) - test_data = nlp.words_to_word_ids(nlp.read_words(test_path), word_to_id) - vocab_size = len(word_to_id) - - # logging.info(nlp.read_words(train_path)) # ... 'according', 'to', 'mr.', '', ''] - # logging.info(train_data) # ... 214, 5, 23, 1, 2] - # logging.info(word_to_id) # ... 'beyond': 1295, 'anti-nuclear': 9599, 'trouble': 1520, '': 2 ... } - # logging.info(vocabulary) # 10000 - # exit() - return train_data, valid_data, test_data, vocab_size - - -def load_matt_mahoney_text8_dataset(path='data'): - """Load Matt Mahoney's dataset. - - Download a text file from Matt Mahoney's website - if not present, and make sure it's the right size. - Extract the first file enclosed in a zip file as a list of words. - This dataset can be used for Word Embedding. - - Parameters - ---------- - path : str - The path that the data is downloaded to, defaults is ``data/mm_test8/``. - - Returns - -------- - list of str - The raw text data e.g. [.... 'their', 'families', 'who', 'were', 'expelled', 'from', 'jerusalem', ...] - - Examples - -------- - >>> words = tl.files.load_matt_mahoney_text8_dataset() - >>> print('Data size', len(words)) - - """ - path = os.path.join(path, 'mm_test8') - logging.info("Load or Download matt_mahoney_text8 Dataset> {}".format(path)) - - filename = 'text8.zip' - url = 'http://mattmahoney.net/dc/' - maybe_download_and_extract(filename, path, url, expected_bytes=31344016) - - with zipfile.ZipFile(os.path.join(path, filename)) as f: - word_list = f.read(f.namelist()[0]).split() - for idx, _ in enumerate(word_list): - word_list[idx] = word_list[idx].decode() - return word_list - - -def load_imdb_dataset( - path='data', nb_words=None, skip_top=0, maxlen=None, test_split=0.2, seed=113, start_char=1, oov_char=2, - index_from=3 -): - """Load IMDB dataset. - - Parameters - ---------- - path : str - The path that the data is downloaded to, defaults is ``data/imdb/``. - nb_words : int - Number of words to get. - skip_top : int - Top most frequent words to ignore (they will appear as oov_char value in the sequence data). - maxlen : int - Maximum sequence length. Any longer sequence will be truncated. - seed : int - Seed for reproducible data shuffling. - start_char : int - The start of a sequence will be marked with this character. Set to 1 because 0 is usually the padding character. - oov_char : int - Words that were cut out because of the num_words or skip_top limit will be replaced with this character. - index_from : int - Index actual words with this index and higher. - - Examples - -------- - >>> X_train, y_train, X_test, y_test = tl.files.load_imdb_dataset( - ... nb_words=20000, test_split=0.2) - >>> print('X_train.shape', X_train.shape) - (20000,) [[1, 62, 74, ... 1033, 507, 27],[1, 60, 33, ... 13, 1053, 7]..] - >>> print('y_train.shape', y_train.shape) - (20000,) [1 0 0 ..., 1 0 1] - - References - ----------- - - `Modified from keras. `__ - - """ - path = os.path.join(path, 'imdb') - - filename = "imdb.pkl" - url = 'https://s3.amazonaws.com/text-datasets/' - maybe_download_and_extract(filename, path, url) - - if filename.endswith(".gz"): - f = gzip.open(os.path.join(path, filename), 'rb') - else: - f = open(os.path.join(path, filename), 'rb') - - X, labels = cPickle.load(f) - f.close() - - np.random.seed(seed) - np.random.shuffle(X) - np.random.seed(seed) - np.random.shuffle(labels) - - if start_char is not None: - X = [[start_char] + [w + index_from for w in x] for x in X] - elif index_from: - X = [[w + index_from for w in x] for x in X] - - if maxlen: - new_X = [] - new_labels = [] - for x, y in zip(X, labels): - if len(x) < maxlen: - new_X.append(x) - new_labels.append(y) - X = new_X - labels = new_labels - if not X: - raise Exception( - 'After filtering for sequences shorter than maxlen=' + str(maxlen) + ', no sequence was kept. ' - 'Increase maxlen.' - ) - if not nb_words: - nb_words = max([max(x) for x in X]) - - # by convention, use 2 as OOV word - # reserve 'index_from' (=3 by default) characters: 0 (padding), 1 (start), 2 (OOV) - if oov_char is not None: - X = [[oov_char if (w >= nb_words or w < skip_top) else w for w in x] for x in X] - else: - nX = [] - for x in X: - nx = [] - for w in x: - if (w >= nb_words or w < skip_top): - nx.append(w) - nX.append(nx) - X = nX - - X_train = np.array(X[:int(len(X) * (1 - test_split))]) - y_train = np.array(labels[:int(len(X) * (1 - test_split))]) - - X_test = np.array(X[int(len(X) * (1 - test_split)):]) - y_test = np.array(labels[int(len(X) * (1 - test_split)):]) - - return X_train, y_train, X_test, y_test - - -def load_nietzsche_dataset(path='data'): - """Load Nietzsche dataset. - - Parameters - ---------- - path : str - The path that the data is downloaded to, defaults is ``data/nietzsche/``. - - Returns - -------- - str - The content. - - Examples - -------- - >>> see tutorial_generate_text.py - >>> words = tl.files.load_nietzsche_dataset() - >>> words = basic_clean_str(words) - >>> words = words.split() - - """ - logging.info("Load or Download nietzsche dataset > {}".format(path)) - path = os.path.join(path, 'nietzsche') - - filename = "nietzsche.txt" - url = 'https://s3.amazonaws.com/text-datasets/' - filepath = maybe_download_and_extract(filename, path, url) - - with open(filepath, "r") as f: - words = f.read() - return words - - -def load_wmt_en_fr_dataset(path='data'): - """Load WMT'15 English-to-French translation dataset. - - It will download the data from the WMT'15 Website (10^9-French-English corpus), and the 2013 news test from the same site as development set. - Returns the directories of training data and test data. - - Parameters - ---------- - path : str - The path that the data is downloaded to, defaults is ``data/wmt_en_fr/``. - - References - ---------- - - Code modified from /tensorflow/models/rnn/translation/data_utils.py - - Notes - ----- - Usually, it will take a long time to download this dataset. - - """ - path = os.path.join(path, 'wmt_en_fr') - # URLs for WMT data. - _WMT_ENFR_TRAIN_URL = "http://www.statmt.org/wmt10/" - _WMT_ENFR_DEV_URL = "http://www.statmt.org/wmt15/" - - def gunzip_file(gz_path, new_path): - """Unzips from gz_path into new_path.""" - logging.info("Unpacking %s to %s" % (gz_path, new_path)) - with gzip.open(gz_path, "rb") as gz_file: - with open(new_path, "wb") as new_file: - for line in gz_file: - new_file.write(line) - - def get_wmt_enfr_train_set(path): - """Download the WMT en-fr training corpus to directory unless it's there.""" - filename = "training-giga-fren.tar" - maybe_download_and_extract(filename, path, _WMT_ENFR_TRAIN_URL, extract=True) - train_path = os.path.join(path, "giga-fren.release2.fixed") - gunzip_file(train_path + ".fr.gz", train_path + ".fr") - gunzip_file(train_path + ".en.gz", train_path + ".en") - return train_path - - def get_wmt_enfr_dev_set(path): - """Download the WMT en-fr training corpus to directory unless it's there.""" - filename = "dev-v2.tgz" - dev_file = maybe_download_and_extract(filename, path, _WMT_ENFR_DEV_URL, extract=False) - dev_name = "newstest2013" - dev_path = os.path.join(path, "newstest2013") - if not (gfile.Exists(dev_path + ".fr") and gfile.Exists(dev_path + ".en")): - logging.info("Extracting tgz file %s" % dev_file) - with tarfile.open(dev_file, "r:gz") as dev_tar: - fr_dev_file = dev_tar.getmember("dev/" + dev_name + ".fr") - en_dev_file = dev_tar.getmember("dev/" + dev_name + ".en") - fr_dev_file.name = dev_name + ".fr" # Extract without "dev/" prefix. - en_dev_file.name = dev_name + ".en" - dev_tar.extract(fr_dev_file, path) - dev_tar.extract(en_dev_file, path) - return dev_path - - logging.info("Load or Download WMT English-to-French translation > {}".format(path)) - - train_path = get_wmt_enfr_train_set(path) - dev_path = get_wmt_enfr_dev_set(path) - - return train_path, dev_path - - -def load_flickr25k_dataset(tag='sky', path="data", n_threads=50, printable=False): - """Load Flickr25K dataset. - - Returns a list of images by a given tag from Flick25k dataset, - it will download Flickr25k from `the official website `__ - at the first time you use it. - - Parameters - ------------ - tag : str or None - What images to return. - - If you want to get images with tag, use string like 'dog', 'red', see `Flickr Search `__. - - If you want to get all images, set to ``None``. - - path : str - The path that the data is downloaded to, defaults is ``data/flickr25k/``. - n_threads : int - The number of thread to read image. - printable : boolean - Whether to print infomation when reading images, default is ``False``. - - Examples - ----------- - Get images with tag of sky - - >>> images = tl.files.load_flickr25k_dataset(tag='sky') - - Get all images - - >>> images = tl.files.load_flickr25k_dataset(tag=None, n_threads=100, printable=True) - - """ - path = os.path.join(path, 'flickr25k') - - filename = 'mirflickr25k.zip' - url = 'http://press.liacs.nl/mirflickr/mirflickr25k/' - - # download dataset - if folder_exists(os.path.join(path, "mirflickr")) is False: - logging.info("[*] Flickr25k is nonexistent in {}".format(path)) - maybe_download_and_extract(filename, path, url, extract=True) - del_file(os.path.join(path, filename)) - - # return images by the given tag. - # 1. image path list - folder_imgs = os.path.join(path, "mirflickr") - path_imgs = load_file_list(path=folder_imgs, regx='\\.jpg', printable=False) - path_imgs.sort(key=natural_keys) - - # 2. tag path list - folder_tags = os.path.join(path, "mirflickr", "meta", "tags") - path_tags = load_file_list(path=folder_tags, regx='\\.txt', printable=False) - path_tags.sort(key=natural_keys) - - # 3. select images - if tag is None: - logging.info("[Flickr25k] reading all images") - else: - logging.info("[Flickr25k] reading images with tag: {}".format(tag)) - images_list = [] - for idx, _v in enumerate(path_tags): - tags = read_file(os.path.join(folder_tags, path_tags[idx])).split('\n') - # logging.info(idx+1, tags) - if tag is None or tag in tags: - images_list.append(path_imgs[idx]) - - images = visualize.read_images(images_list, folder_imgs, n_threads=n_threads, printable=printable) - return images - - -def load_flickr1M_dataset(tag='sky', size=10, path="data", n_threads=50, printable=False): - """Load Flick1M dataset. - - Returns a list of images by a given tag from Flickr1M dataset, - it will download Flickr1M from `the official website `__ - at the first time you use it. - - Parameters - ------------ - tag : str or None - What images to return. - - If you want to get images with tag, use string like 'dog', 'red', see `Flickr Search `__. - - If you want to get all images, set to ``None``. - - size : int - integer between 1 to 10. 1 means 100k images ... 5 means 500k images, 10 means all 1 million images. Default is 10. - path : str - The path that the data is downloaded to, defaults is ``data/flickr25k/``. - n_threads : int - The number of thread to read image. - printable : boolean - Whether to print infomation when reading images, default is ``False``. - - Examples - ---------- - Use 200k images - - >>> images = tl.files.load_flickr1M_dataset(tag='zebra', size=2) - - Use 1 Million images - - >>> images = tl.files.load_flickr1M_dataset(tag='zebra') - - """ - path = os.path.join(path, 'flickr1M') - logging.info("[Flickr1M] using {}% of images = {}".format(size * 10, size * 100000)) - images_zip = [ - 'images0.zip', 'images1.zip', 'images2.zip', 'images3.zip', 'images4.zip', 'images5.zip', 'images6.zip', - 'images7.zip', 'images8.zip', 'images9.zip' - ] - tag_zip = 'tags.zip' - url = 'http://press.liacs.nl/mirflickr/mirflickr1m/' - - # download dataset - for image_zip in images_zip[0:size]: - image_folder = image_zip.split(".")[0] - # logging.info(path+"/"+image_folder) - if folder_exists(os.path.join(path, image_folder)) is False: - # logging.info(image_zip) - logging.info("[Flickr1M] {} is missing in {}".format(image_folder, path)) - maybe_download_and_extract(image_zip, path, url, extract=True) - del_file(os.path.join(path, image_zip)) - # os.system("mv {} {}".format(os.path.join(path, 'images'), os.path.join(path, image_folder))) - shutil.move(os.path.join(path, 'images'), os.path.join(path, image_folder)) - else: - logging.info("[Flickr1M] {} exists in {}".format(image_folder, path)) - - # download tag - if folder_exists(os.path.join(path, "tags")) is False: - logging.info("[Flickr1M] tag files is nonexistent in {}".format(path)) - maybe_download_and_extract(tag_zip, path, url, extract=True) - del_file(os.path.join(path, tag_zip)) - else: - logging.info("[Flickr1M] tags exists in {}".format(path)) - - # 1. image path list - images_list = [] - images_folder_list = [] - for i in range(0, size): - images_folder_list += load_folder_list(path=os.path.join(path, 'images%d' % i)) - images_folder_list.sort(key=lambda s: int(s.split('/')[-1])) # folder/images/ddd - - for folder in images_folder_list[0:size * 10]: - tmp = load_file_list(path=folder, regx='\\.jpg', printable=False) - tmp.sort(key=lambda s: int(s.split('.')[-2])) # ddd.jpg - images_list.extend([os.path.join(folder, x) for x in tmp]) - - # 2. tag path list - tag_list = [] - tag_folder_list = load_folder_list(os.path.join(path, "tags")) - - # tag_folder_list.sort(key=lambda s: int(s.split("/")[-1])) # folder/images/ddd - tag_folder_list.sort(key=lambda s: int(os.path.basename(s))) - - for folder in tag_folder_list[0:size * 10]: - tmp = load_file_list(path=folder, regx='\\.txt', printable=False) - tmp.sort(key=lambda s: int(s.split('.')[-2])) # ddd.txt - tmp = [os.path.join(folder, s) for s in tmp] - tag_list += tmp - - # 3. select images - logging.info("[Flickr1M] searching tag: {}".format(tag)) - select_images_list = [] - for idx, _val in enumerate(tag_list): - tags = read_file(tag_list[idx]).split('\n') - if tag in tags: - select_images_list.append(images_list[idx]) - - logging.info("[Flickr1M] reading images with tag: {}".format(tag)) - images = visualize.read_images(select_images_list, '', n_threads=n_threads, printable=printable) - return images - - -def load_cyclegan_dataset(filename='summer2winter_yosemite', path='data'): - """Load images from CycleGAN's database, see `this link `__. - - Parameters - ------------ - filename : str - The dataset you want, see `this link `__. - path : str - The path that the data is downloaded to, defaults is `data/cyclegan` - - Examples - --------- - >>> im_train_A, im_train_B, im_test_A, im_test_B = load_cyclegan_dataset(filename='summer2winter_yosemite') - - """ - path = os.path.join(path, 'cyclegan') - url = 'https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets/' - - if folder_exists(os.path.join(path, filename)) is False: - logging.info("[*] {} is nonexistent in {}".format(filename, path)) - maybe_download_and_extract(filename + '.zip', path, url, extract=True) - del_file(os.path.join(path, filename + '.zip')) - - def load_image_from_folder(path): - path_imgs = load_file_list(path=path, regx='\\.jpg', printable=False) - return visualize.read_images(path_imgs, path=path, n_threads=10, printable=False) - - im_train_A = load_image_from_folder(os.path.join(path, filename, "trainA")) - im_train_B = load_image_from_folder(os.path.join(path, filename, "trainB")) - im_test_A = load_image_from_folder(os.path.join(path, filename, "testA")) - im_test_B = load_image_from_folder(os.path.join(path, filename, "testB")) - - def if_2d_to_3d(images): # [h, w] --> [h, w, 3] - for i, _v in enumerate(images): - if len(images[i].shape) == 2: - images[i] = images[i][:, :, np.newaxis] - images[i] = np.tile(images[i], (1, 1, 3)) - return images - - im_train_A = if_2d_to_3d(im_train_A) - im_train_B = if_2d_to_3d(im_train_B) - im_test_A = if_2d_to_3d(im_test_A) - im_test_B = if_2d_to_3d(im_test_B) - - return im_train_A, im_train_B, im_test_A, im_test_B - - -def download_file_from_google_drive(ID, destination): - """Download file from Google Drive. - - See ``tl.files.load_celebA_dataset`` for example. - - Parameters - -------------- - ID : str - The driver ID. - destination : str - The destination for save file. - - """ - - def save_response_content(response, destination, chunk_size=32 * 1024): - total_size = int(response.headers.get('content-length', 0)) - with open(destination, "wb") as f: - for chunk in tqdm(response.iter_content(chunk_size), total=total_size, unit='B', unit_scale=True, - desc=destination): - if chunk: # filter out keep-alive new chunks - f.write(chunk) - - def get_confirm_token(response): - for key, value in response.cookies.items(): - if key.startswith('download_warning'): - return value - return None - - URL = "https://docs.google.com/uc?export=download" - session = requests.Session() - - response = session.get(URL, params={'id': ID}, stream=True) - token = get_confirm_token(response) - - if token: - params = {'id': ID, 'confirm': token} - response = session.get(URL, params=params, stream=True) - save_response_content(response, destination) - - -def load_celebA_dataset(path='data'): - """Load CelebA dataset - - Return a list of image path. - - Parameters - ----------- - path : str - The path that the data is downloaded to, defaults is ``data/celebA/``. - - """ - data_dir = 'celebA' - filename, drive_id = "img_align_celeba.zip", "0B7EVK8r0v71pZjFTYXZWM3FlRnM" - save_path = os.path.join(path, filename) - image_path = os.path.join(path, data_dir) - if os.path.exists(image_path): - logging.info('[*] {} already exists'.format(save_path)) - else: - exists_or_mkdir(path) - download_file_from_google_drive(drive_id, save_path) - zip_dir = '' - with zipfile.ZipFile(save_path) as zf: - zip_dir = zf.namelist()[0] - zf.extractall(path) - os.remove(save_path) - os.rename(os.path.join(path, zip_dir), image_path) - - data_files = load_file_list(path=image_path, regx='\\.jpg', printable=False) - for i, _v in enumerate(data_files): - data_files[i] = os.path.join(image_path, data_files[i]) - return data_files - - -def load_voc_dataset(path='data', dataset='2012', contain_classes_in_person=False): - """Pascal VOC 2007/2012 Dataset. - - It has 20 objects: - aeroplane, bicycle, bird, boat, bottle, bus, car, cat, chair, cow, diningtable, dog, horse, motorbike, person, pottedplant, sheep, sofa, train, tvmonitor - and additional 3 classes : head, hand, foot for person. - - Parameters - ----------- - path : str - The path that the data is downloaded to, defaults is ``data/VOC``. - dataset : str - The VOC dataset version, `2012`, `2007`, `2007test` or `2012test`. We usually train model on `2007+2012` and test it on `2007test`. - contain_classes_in_person : boolean - Whether include head, hand and foot annotation, default is False. - - Returns - --------- - imgs_file_list : list of str - Full paths of all images. - imgs_semseg_file_list : list of str - Full paths of all maps for semantic segmentation. Note that not all images have this map! - imgs_insseg_file_list : list of str - Full paths of all maps for instance segmentation. Note that not all images have this map! - imgs_ann_file_list : list of str - Full paths of all annotations for bounding box and object class, all images have this annotations. - classes : list of str - Classes in order. - classes_in_person : list of str - Classes in person. - classes_dict : dictionary - Class label to integer. - n_objs_list : list of int - Number of objects in all images in ``imgs_file_list`` in order. - objs_info_list : list of str - Darknet format for the annotation of all images in ``imgs_file_list`` in order. ``[class_id x_centre y_centre width height]`` in ratio format. - objs_info_dicts : dictionary - The annotation of all images in ``imgs_file_list``, ``{imgs_file_list : dictionary for annotation}``, - format from `TensorFlow/Models/object-detection `__. - - Examples - ---------- - >>> imgs_file_list, imgs_semseg_file_list, imgs_insseg_file_list, imgs_ann_file_list, - >>> classes, classes_in_person, classes_dict, - >>> n_objs_list, objs_info_list, objs_info_dicts = tl.files.load_voc_dataset(dataset="2012", contain_classes_in_person=False) - >>> idx = 26 - >>> print(classes) - ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] - >>> print(classes_dict) - {'sheep': 16, 'horse': 12, 'bicycle': 1, 'bottle': 4, 'cow': 9, 'sofa': 17, 'car': 6, 'dog': 11, 'cat': 7, 'person': 14, 'train': 18, 'diningtable': 10, 'aeroplane': 0, 'bus': 5, 'pottedplant': 15, 'tvmonitor': 19, 'chair': 8, 'bird': 2, 'boat': 3, 'motorbike': 13} - >>> print(imgs_file_list[idx]) - data/VOC/VOC2012/JPEGImages/2007_000423.jpg - >>> print(n_objs_list[idx]) - 2 - >>> print(imgs_ann_file_list[idx]) - data/VOC/VOC2012/Annotations/2007_000423.xml - >>> print(objs_info_list[idx]) - 14 0.173 0.461333333333 0.142 0.496 - 14 0.828 0.542666666667 0.188 0.594666666667 - >>> ann = tl.prepro.parse_darknet_ann_str_to_list(objs_info_list[idx]) - >>> print(ann) - [[14, 0.173, 0.461333333333, 0.142, 0.496], [14, 0.828, 0.542666666667, 0.188, 0.594666666667]] - >>> c, b = tl.prepro.parse_darknet_ann_list_to_cls_box(ann) - >>> print(c, b) - [14, 14] [[0.173, 0.461333333333, 0.142, 0.496], [0.828, 0.542666666667, 0.188, 0.594666666667]] - - References - ------------- - - `Pascal VOC2012 Website `__. - - `Pascal VOC2007 Website `__. - - """ - path = os.path.join(path, 'VOC') - - def _recursive_parse_xml_to_dict(xml): - """Recursively parses XML contents to python dict. - - We assume that `object` tags are the only ones that can appear - multiple times at the same level of a tree. - - Args: - xml: xml tree obtained by parsing XML file contents using lxml.etree - - Returns: - Python dictionary holding XML contents. - - """ - if not xml: - # if xml is not None: - return {xml.tag: xml.text} - result = {} - for child in xml: - child_result = _recursive_parse_xml_to_dict(child) - if child.tag != 'object': - result[child.tag] = child_result[child.tag] - else: - if child.tag not in result: - result[child.tag] = [] - result[child.tag].append(child_result[child.tag]) - return {xml.tag: result} - - if dataset == "2012": - url = "http://host.robots.ox.ac.uk/pascal/VOC/voc2012/" - tar_filename = "VOCtrainval_11-May-2012.tar" - extracted_filename = "VOC2012" # "VOCdevkit/VOC2012" - logging.info(" [============= VOC 2012 =============]") - elif dataset == "2012test": - extracted_filename = "VOC2012test" # "VOCdevkit/VOC2012" - logging.info(" [============= VOC 2012 Test Set =============]") - logging.info( - " \nAuthor: 2012test only have person annotation, so 2007test is highly recommended for testing !\n" - ) - time.sleep(3) - if os.path.isdir(os.path.join(path, extracted_filename)) is False: - logging.info("For VOC 2012 Test data - online registration required") - logging.info( - " Please download VOC2012test.tar from: \n register: http://host.robots.ox.ac.uk:8080 \n voc2012 : http://host.robots.ox.ac.uk:8080/eval/challenges/voc2012/ \ndownload: http://host.robots.ox.ac.uk:8080/eval/downloads/VOC2012test.tar" - ) - logging.info(" unzip VOC2012test.tar,rename the folder to VOC2012test and put it into %s" % path) - exit() - # # http://host.robots.ox.ac.uk:8080/eval/downloads/VOC2012test.tar - # url = "http://host.robots.ox.ac.uk:8080/eval/downloads/" - # tar_filename = "VOC2012test.tar" - elif dataset == "2007": - url = "http://host.robots.ox.ac.uk/pascal/VOC/voc2007/" - tar_filename = "VOCtrainval_06-Nov-2007.tar" - extracted_filename = "VOC2007" - logging.info(" [============= VOC 2007 =============]") - elif dataset == "2007test": - # http://host.robots.ox.ac.uk/pascal/VOC/voc2007/index.html#testdata - # http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar - url = "http://host.robots.ox.ac.uk/pascal/VOC/voc2007/" - tar_filename = "VOCtest_06-Nov-2007.tar" - extracted_filename = "VOC2007test" - logging.info(" [============= VOC 2007 Test Set =============]") - else: - raise Exception("Please set the dataset aug to 2012, 2012test or 2007.") - - # download dataset - if dataset != "2012test": - _platform = sys.platform - if folder_exists(os.path.join(path, extracted_filename)) is False: - logging.info("[VOC] {} is nonexistent in {}".format(extracted_filename, path)) - maybe_download_and_extract(tar_filename, path, url, extract=True) - del_file(os.path.join(path, tar_filename)) - if dataset == "2012": - if _platform == "win32": - os.system("mv {}\VOCdevkit\VOC2012 {}\VOC2012".format(path, path)) - else: - os.system("mv {}/VOCdevkit/VOC2012 {}/VOC2012".format(path, path)) - elif dataset == "2007": - if _platform == "win32": - os.system("mv {}\VOCdevkit\VOC2007 {}\VOC2007".format(path, path)) - else: - os.system("mv {}/VOCdevkit/VOC2007 {}/VOC2007".format(path, path)) - elif dataset == "2007test": - if _platform == "win32": - os.system("mv {}\VOCdevkit\VOC2007 {}\VOC2007test".format(path, path)) - else: - os.system("mv {}/VOCdevkit/VOC2007 {}/VOC2007test".format(path, path)) - del_folder(os.path.join(path, 'VOCdevkit')) - # object classes(labels) NOTE: YOU CAN CUSTOMIZE THIS LIST - classes = [ - "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", - "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor" - ] - if contain_classes_in_person: - classes_in_person = ["head", "hand", "foot"] - else: - classes_in_person = [] - - classes += classes_in_person # use extra 3 classes for person - - classes_dict = utils.list_string_to_dict(classes) - logging.info("[VOC] object classes {}".format(classes_dict)) - - # 1. image path list - # folder_imgs = path+"/"+extracted_filename+"/JPEGImages/" - folder_imgs = os.path.join(path, extracted_filename, "JPEGImages") - imgs_file_list = load_file_list(path=folder_imgs, regx='\\.jpg', printable=False) - logging.info("[VOC] {} images found".format(len(imgs_file_list))) - - imgs_file_list.sort( - key=lambda s: int(s.replace('.', ' ').replace('_', '').split(' ')[-2]) - ) # 2007_000027.jpg --> 2007000027 - - imgs_file_list = [os.path.join(folder_imgs, s) for s in imgs_file_list] - # logging.info('IM',imgs_file_list[0::3333], imgs_file_list[-1]) - if dataset != "2012test": - # ======== 2. semantic segmentation maps path list - # folder_semseg = path+"/"+extracted_filename+"/SegmentationClass/" - folder_semseg = os.path.join(path, extracted_filename, "SegmentationClass") - imgs_semseg_file_list = load_file_list(path=folder_semseg, regx='\\.png', printable=False) - logging.info("[VOC] {} maps for semantic segmentation found".format(len(imgs_semseg_file_list))) - imgs_semseg_file_list.sort( - key=lambda s: int(s.replace('.', ' ').replace('_', '').split(' ')[-2]) - ) # 2007_000032.png --> 2007000032 - imgs_semseg_file_list = [os.path.join(folder_semseg, s) for s in imgs_semseg_file_list] - # logging.info('Semantic Seg IM',imgs_semseg_file_list[0::333], imgs_semseg_file_list[-1]) - # ======== 3. instance segmentation maps path list - # folder_insseg = path+"/"+extracted_filename+"/SegmentationObject/" - folder_insseg = os.path.join(path, extracted_filename, "SegmentationObject") - imgs_insseg_file_list = load_file_list(path=folder_insseg, regx='\\.png', printable=False) - logging.info("[VOC] {} maps for instance segmentation found".format(len(imgs_semseg_file_list))) - imgs_insseg_file_list.sort( - key=lambda s: int(s.replace('.', ' ').replace('_', '').split(' ')[-2]) - ) # 2007_000032.png --> 2007000032 - imgs_insseg_file_list = [os.path.join(folder_insseg, s) for s in imgs_insseg_file_list] - # logging.info('Instance Seg IM',imgs_insseg_file_list[0::333], imgs_insseg_file_list[-1]) - else: - imgs_semseg_file_list = [] - imgs_insseg_file_list = [] - # 4. annotations for bounding box and object class - # folder_ann = path+"/"+extracted_filename+"/Annotations/" - folder_ann = os.path.join(path, extracted_filename, "Annotations") - imgs_ann_file_list = load_file_list(path=folder_ann, regx='\\.xml', printable=False) - logging.info( - "[VOC] {} XML annotation files for bounding box and object class found".format(len(imgs_ann_file_list)) - ) - imgs_ann_file_list.sort( - key=lambda s: int(s.replace('.', ' ').replace('_', '').split(' ')[-2]) - ) # 2007_000027.xml --> 2007000027 - imgs_ann_file_list = [os.path.join(folder_ann, s) for s in imgs_ann_file_list] - # logging.info('ANN',imgs_ann_file_list[0::3333], imgs_ann_file_list[-1]) - - if dataset == "2012test": # remove unused images in JPEG folder - imgs_file_list_new = [] - for ann in imgs_ann_file_list: - ann = os.path.split(ann)[-1].split('.')[0] - for im in imgs_file_list: - if ann in im: - imgs_file_list_new.append(im) - break - imgs_file_list = imgs_file_list_new - logging.info("[VOC] keep %d images" % len(imgs_file_list_new)) - - # parse XML annotations - def convert(size, box): - dw = 1. / size[0] - dh = 1. / size[1] - x = (box[0] + box[1]) / 2.0 - y = (box[2] + box[3]) / 2.0 - w = box[1] - box[0] - h = box[3] - box[2] - x = x * dw - w = w * dw - y = y * dh - h = h * dh - return x, y, w, h - - def convert_annotation(file_name): - """Given VOC2012 XML Annotations, returns number of objects and info.""" - in_file = open(file_name) - out_file = "" - tree = ET.parse(in_file) - root = tree.getroot() - size = root.find('size') - w = int(size.find('width').text) - h = int(size.find('height').text) - n_objs = 0 - - for obj in root.iter('object'): - if dataset != "2012test": - difficult = obj.find('difficult').text - cls = obj.find('name').text - if cls not in classes or int(difficult) == 1: - continue - else: - cls = obj.find('name').text - if cls not in classes: - continue - cls_id = classes.index(cls) - xmlbox = obj.find('bndbox') - b = ( - float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), - float(xmlbox.find('ymax').text) - ) - bb = convert((w, h), b) - - out_file += str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n' - n_objs += 1 - if cls in "person": - for part in obj.iter('part'): - cls = part.find('name').text - if cls not in classes_in_person: - continue - cls_id = classes.index(cls) - xmlbox = part.find('bndbox') - b = ( - float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), - float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text) - ) - bb = convert((w, h), b) - # out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') - out_file += str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n' - n_objs += 1 - in_file.close() - return n_objs, out_file - - logging.info("[VOC] Parsing xml annotations files") - n_objs_list = [] - objs_info_list = [] # Darknet Format list of string - objs_info_dicts = {} - for idx, ann_file in enumerate(imgs_ann_file_list): - n_objs, objs_info = convert_annotation(ann_file) - n_objs_list.append(n_objs) - objs_info_list.append(objs_info) - with tf.io.gfile.GFile(ann_file, 'r') as fid: - xml_str = fid.read() - xml = etree.fromstring(xml_str) - data = _recursive_parse_xml_to_dict(xml)['annotation'] - objs_info_dicts.update({imgs_file_list[idx]: data}) - - return imgs_file_list, imgs_semseg_file_list, imgs_insseg_file_list, imgs_ann_file_list, classes, classes_in_person, classes_dict, n_objs_list, objs_info_list, objs_info_dicts - - -def load_mpii_pose_dataset(path='data', is_16_pos_only=False): - """Load MPII Human Pose Dataset. - - Parameters - ----------- - path : str - The path that the data is downloaded to. - is_16_pos_only : boolean - If True, only return the peoples contain 16 pose keypoints. (Usually be used for single person pose estimation) - - Returns - ---------- - img_train_list : list of str - The image directories of training data. - ann_train_list : list of dict - The annotations of training data. - img_test_list : list of str - The image directories of testing data. - ann_test_list : list of dict - The annotations of testing data. - - Examples - -------- - >>> import pprint - >>> import tensorlayer as tl - >>> img_train_list, ann_train_list, img_test_list, ann_test_list = tl.files.load_mpii_pose_dataset() - >>> image = tl.vis.read_image(img_train_list[0]) - >>> tl.vis.draw_mpii_pose_to_image(image, ann_train_list[0], 'image.png') - >>> pprint.pprint(ann_train_list[0]) - - References - ----------- - - `MPII Human Pose Dataset. CVPR 14 `__ - - `MPII Human Pose Models. CVPR 16 `__ - - `MPII Human Shape, Poselet Conditioned Pictorial Structures and etc `__ - - `MPII Keyponts and ID `__ - """ - path = os.path.join(path, 'mpii_human_pose') - logging.info("Load or Download MPII Human Pose > {}".format(path)) - - # annotation - url = "http://datasets.d2.mpi-inf.mpg.de/andriluka14cvpr/" - tar_filename = "mpii_human_pose_v1_u12_2.zip" - extracted_filename = "mpii_human_pose_v1_u12_2" - if folder_exists(os.path.join(path, extracted_filename)) is False: - logging.info("[MPII] (annotation) {} is nonexistent in {}".format(extracted_filename, path)) - maybe_download_and_extract(tar_filename, path, url, extract=True) - del_file(os.path.join(path, tar_filename)) - - # images - url = "http://datasets.d2.mpi-inf.mpg.de/andriluka14cvpr/" - tar_filename = "mpii_human_pose_v1.tar.gz" - extracted_filename2 = "images" - if folder_exists(os.path.join(path, extracted_filename2)) is False: - logging.info("[MPII] (images) {} is nonexistent in {}".format(extracted_filename, path)) - maybe_download_and_extract(tar_filename, path, url, extract=True) - del_file(os.path.join(path, tar_filename)) - - # parse annotation, format see http://human-pose.mpi-inf.mpg.de/#download - logging.info("reading annotations from mat file ...") - # mat = sio.loadmat(os.path.join(path, extracted_filename, "mpii_human_pose_v1_u12_1.mat")) - - # def fix_wrong_joints(joint): # https://github.com/mitmul/deeppose/blob/master/datasets/mpii_dataset.py - # if '12' in joint and '13' in joint and '2' in joint and '3' in joint: - # if ((joint['12'][0] < joint['13'][0]) and - # (joint['3'][0] < joint['2'][0])): - # joint['2'], joint['3'] = joint['3'], joint['2'] - # if ((joint['12'][0] > joint['13'][0]) and - # (joint['3'][0] > joint['2'][0])): - # joint['2'], joint['3'] = joint['3'], joint['2'] - # return joint - - ann_train_list = [] - ann_test_list = [] - img_train_list = [] - img_test_list = [] - - def save_joints(): - # joint_data_fn = os.path.join(path, 'data.json') - # fp = open(joint_data_fn, 'w') - mat = sio.loadmat(os.path.join(path, extracted_filename, "mpii_human_pose_v1_u12_1.mat")) - - for _, (anno, train_flag) in enumerate( # all images - zip(mat['RELEASE']['annolist'][0, 0][0], mat['RELEASE']['img_train'][0, 0][0])): - - img_fn = anno['image']['name'][0, 0][0] - train_flag = int(train_flag) - - # print(i, img_fn, train_flag) # DEBUG print all images - - if train_flag: - img_train_list.append(img_fn) - ann_train_list.append([]) - else: - img_test_list.append(img_fn) - ann_test_list.append([]) - - head_rect = [] - if 'x1' in str(anno['annorect'].dtype): - head_rect = zip( - [x1[0, 0] for x1 in anno['annorect']['x1'][0]], [y1[0, 0] for y1 in anno['annorect']['y1'][0]], - [x2[0, 0] for x2 in anno['annorect']['x2'][0]], [y2[0, 0] for y2 in anno['annorect']['y2'][0]] - ) - else: - head_rect = [] # TODO - - if 'annopoints' in str(anno['annorect'].dtype): - annopoints = anno['annorect']['annopoints'][0] - head_x1s = anno['annorect']['x1'][0] - head_y1s = anno['annorect']['y1'][0] - head_x2s = anno['annorect']['x2'][0] - head_y2s = anno['annorect']['y2'][0] - - for annopoint, head_x1, head_y1, head_x2, head_y2 in zip(annopoints, head_x1s, head_y1s, head_x2s, - head_y2s): - # if annopoint != []: - # if len(annopoint) != 0: - if annopoint.size: - head_rect = [ - float(head_x1[0, 0]), - float(head_y1[0, 0]), - float(head_x2[0, 0]), - float(head_y2[0, 0]) - ] - - # joint coordinates - annopoint = annopoint['point'][0, 0] - j_id = [str(j_i[0, 0]) for j_i in annopoint['id'][0]] - x = [x[0, 0] for x in annopoint['x'][0]] - y = [y[0, 0] for y in annopoint['y'][0]] - joint_pos = {} - for _j_id, (_x, _y) in zip(j_id, zip(x, y)): - joint_pos[int(_j_id)] = [float(_x), float(_y)] - # joint_pos = fix_wrong_joints(joint_pos) - - # visibility list - if 'is_visible' in str(annopoint.dtype): - vis = [v[0] if v.size > 0 else [0] for v in annopoint['is_visible'][0]] - vis = dict([(k, int(v[0])) if len(v) > 0 else v for k, v in zip(j_id, vis)]) - else: - vis = None - - # if len(joint_pos) == 16: - if ((is_16_pos_only ==True) and (len(joint_pos) == 16)) or (is_16_pos_only == False): - # only use image with 16 key points / or use all - data = { - 'filename': img_fn, - 'train': train_flag, - 'head_rect': head_rect, - 'is_visible': vis, - 'joint_pos': joint_pos - } - # print(json.dumps(data), file=fp) # py3 - if train_flag: - ann_train_list[-1].append(data) - else: - ann_test_list[-1].append(data) - - # def write_line(datum, fp): - # joints = sorted([[int(k), v] for k, v in datum['joint_pos'].items()]) - # joints = np.array([j for i, j in joints]).flatten() - # - # out = [datum['filename']] - # out.extend(joints) - # out = [str(o) for o in out] - # out = ','.join(out) - # - # print(out, file=fp) - - # def split_train_test(): - # # fp_test = open('data/mpii/test_joints.csv', 'w') - # fp_test = open(os.path.join(path, 'test_joints.csv'), 'w') - # # fp_train = open('data/mpii/train_joints.csv', 'w') - # fp_train = open(os.path.join(path, 'train_joints.csv'), 'w') - # # all_data = open('data/mpii/data.json').readlines() - # all_data = open(os.path.join(path, 'data.json')).readlines() - # N = len(all_data) - # N_test = int(N * 0.1) - # N_train = N - N_test - # - # print('N:{}'.format(N)) - # print('N_train:{}'.format(N_train)) - # print('N_test:{}'.format(N_test)) - # - # np.random.seed(1701) - # perm = np.random.permutation(N) - # test_indices = perm[:N_test] - # train_indices = perm[N_test:] - # - # print('train_indices:{}'.format(len(train_indices))) - # print('test_indices:{}'.format(len(test_indices))) - # - # for i in train_indices: - # datum = json.loads(all_data[i].strip()) - # write_line(datum, fp_train) - # - # for i in test_indices: - # datum = json.loads(all_data[i].strip()) - # write_line(datum, fp_test) - - save_joints() - # split_train_test() # - - # read images dir - logging.info("reading images list ...") - img_dir = os.path.join(path, extracted_filename2) - _img_list = load_file_list(path=os.path.join(path, extracted_filename2), regx='\\.jpg', printable=False) - # ann_list = json.load(open(os.path.join(path, 'data.json'))) - for i, im in enumerate(img_train_list): - if im not in _img_list: - print('missing training image {} in {} (remove from img(ann)_train_list)'.format(im, img_dir)) - # img_train_list.remove(im) - del img_train_list[i] - del ann_train_list[i] - for i, im in enumerate(img_test_list): - if im not in _img_list: - print('missing testing image {} in {} (remove from img(ann)_test_list)'.format(im, img_dir)) - # img_test_list.remove(im) - del img_train_list[i] - del ann_train_list[i] - - # check annotation and images - n_train_images = len(img_train_list) - n_test_images = len(img_test_list) - n_images = n_train_images + n_test_images - logging.info("n_images: {} n_train_images: {} n_test_images: {}".format(n_images, n_train_images, n_test_images)) - n_train_ann = len(ann_train_list) - n_test_ann = len(ann_test_list) - n_ann = n_train_ann + n_test_ann - logging.info("n_ann: {} n_train_ann: {} n_test_ann: {}".format(n_ann, n_train_ann, n_test_ann)) - n_train_people = len(sum(ann_train_list, [])) - n_test_people = len(sum(ann_test_list, [])) - n_people = n_train_people + n_test_people - logging.info("n_people: {} n_train_people: {} n_test_people: {}".format(n_people, n_train_people, n_test_people)) - # add path to all image file name - for i, value in enumerate(img_train_list): - img_train_list[i] = os.path.join(img_dir, value) - for i, value in enumerate(img_test_list): - img_test_list[i] = os.path.join(img_dir, value) - return img_train_list, ann_train_list, img_test_list, ann_test_list - - -def save_npz(save_list=None, name='model.npz', sess=None): - # TODO: Documentation needs updating - """Input parameters and the file name, save parameters into .npz file. Use tl.utils.load_npz() to restore. - - Parameters - ---------- - save_list : list of tensor - A list of parameters (tensor) to be saved. - name : str - The name of the `.npz` file. - sess : None or Session - Session may be required in some case. - - Examples - -------- - Save model to npz - - >>> tl.files.save_npz(network.all_params, name='model.npz', sess=sess) - - Load model from npz (Method 1) - - >>> load_params = tl.files.load_npz(name='model.npz') - >>> tl.files.assign_weights(sess, load_params, network) - - Load model from npz (Method 2) - - >>> tl.files.load_and_assign_npz(sess=sess, name='model.npz', network=network) - - Notes - ----- - If you got session issues, you can change the value.eval() to value.eval(session=sess) - - References - ---------- - `Saving dictionary using numpy `__ - - """ - logging.info("[*] Saving TL weights into %s" % name) - if save_list is None: - save_list = [] - - save_list_var = tf_variables_to_numpy(save_list, sess) - # save_list_var = [] - # if sess: - # save_list_var = sess.run(save_list) - # else: - # try: - # save_list_var.extend([v.eval() for v in save_list]) - # except Exception: - # logging.info( - # " Fail to save model, Hint: pass the session into this function, tl.files.save_npz(network.all_params, name='model.npz', sess=sess)" - # ) - np.savez(name, params=save_list_var) - save_list_var = None - del save_list_var - logging.info("[*] Saved") - - -def load_npz(path='', name='model.npz'): - """Load the parameters of a Model saved by tl.files.save_npz(). - - Parameters - ---------- - path : str - Folder path to `.npz` file. - name : str - The name of the `.npz` file. - - Returns - -------- - list of array - A list of parameters in order. - - Examples - -------- - - See ``tl.files.save_npz`` - - References - ---------- - - `Saving dictionary using numpy `__ - - """ - d = np.load(os.path.join(path, name)) - return d['params'] - - -def assign_params(**kwargs): - raise Exception("please change assign_params --> assign_weights") - - -def assign_weights(sess, weights, network): - """Assign the given parameters to the TensorLayer network. - - Parameters - ---------- - sess : Session - TensorFlow Session. In eager mode, it should be none; In graph mode, it should be specified. - weights : list of array - A list of model weights (array) in order. - network : :class:`Layer` - The network to be assigned. - - Returns - -------- - 1) list of operations if in graph mode - A list of tf ops in order that assign weights. Support sess.run(ops) manually. - 2) list of tf variables if in eager mode - A list of tf variables (assigned weights) in order. - - Examples - -------- - - References - ---------- - - `Assign value to a TensorFlow variable `__ - - """ - ops = [] - for idx, param in enumerate(weights): - ops.append(network.weights[idx].assign(param)) - if sess is not None: - sess.run(ops) - return ops - - -def load_and_assign_npz(sess=None, name=None, network=None): - # TODO: Documentation pending - """Load model from npz and assign to a network. - - Parameters - ------------- - sess : Session - TensorFlow Session. - name : str - The name of the `.npz` file. - network : :class:`Layer` - The network to be assigned. - - Returns - -------- - False or network - Returns False, if the model is not exist. - - Examples - -------- - - See ``tl.files.save_npz`` - - """ - if network is None: - raise ValueError("network is None.") - - if not os.path.exists(name): - logging.error("file {} doesn't exist.".format(name)) - return - else: - weights = load_npz(name=name) - assign_weights(sess, weights, network) - logging.info("[*] Load {} SUCCESS!".format(name)) - - -def save_npz_dict(save_list=None, name='model.npz', sess=None): - """Input parameters and the file name, save parameters as a dictionary into .npz file. - - Use ``tl.files.load_and_assign_npz_dict()`` to restore. - - Parameters - ---------- - save_list : list of parameters - A list of parameters (tensor) to be saved. - name : str - The name of the `.npz` file. - sess : Session - TensorFlow Session. - - """ - # if sess is None: - # raise ValueError("session is None.") - if save_list is None: - save_list = [] - - save_list_names = [tensor.name for tensor in save_list] - # save_list_var = sess.run(save_list) - save_list_var = tf_variables_to_numpy(save_list, sess) - save_var_dict = {save_list_names[idx]: val for idx, val in enumerate(save_list_var)} - np.savez(name, **save_var_dict) - save_list_var = None - save_var_dict = None - del save_list_var - del save_var_dict - logging.info("[*] Model saved in npz_dict %s" % name) - - -def load_and_assign_npz_dict(sess=None, name='model.npz', network=None, skip=False): - # TODO: Documentation pending - """Restore the parameters saved by ``tl.files.save_npz_dict()``. - - Parameters - ---------- - name : str - The name of the `.npz` file. - sess : Session - TensorFlow Session. - - """ - # if sess is None: - # raise ValueError("session is None.") - - if not os.path.exists(name): - logging.error("file {} doesn't exist.".format(name)) - return - - weights = np.load(name) - if len(weights.keys()) != len(set(weights.keys())): - raise Exception("Duplication in model npz_dict %s" % name) - - net_weights_name = [w.name for w in network.weights] - - for key in weights.keys(): - if key not in net_weights_name: - if skip: - logging.warning("Weights named '%s' not found in network. Skip it." % key) - else: - raise RuntimeError("Weights named '%s' not found in network. Hint: set argument skip=Ture " - "if you want to skip redundant or mismatch weights." % key) - else: - assign_tf_variable(network.weights[net_weights_name.index(key)], weights[key], sess) - - # try: - # assign_tf_variable(network.weights[net_weights_name.index(key)], weights[key], sess) - # except KeyError: - # logging.info("[!] Warning: Tensor named %s not found in network." % key) - # ops = list() - # for key in weights.keys(): - # try: - # # tensor = tf.get_default_graph().get_tensor_by_name(key) - # # varlist = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=key) - # varlist = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, scope=key) - # if len(varlist) > 1: - # raise Exception("[!] Multiple candidate variables to be assigned for name %s" % key) - # elif len(varlist) == 0: - # raise KeyError - # else: - # ops.append(varlist[0].assign(weights[key])) - # logging.info("[*] weights restored: %s" % key) - # except KeyError: - # logging.info("[!] Warning: Tensor named %s not found in network." % key) - # - # sess.run(ops) - logging.info("[*] Model restored from npz_dict %s" % name) - - -def save_ckpt( - sess=None, mode_name='model.ckpt', save_dir='checkpoint', var_list=None, global_step=None, printable=False -): - """Save parameters into `ckpt` file. - - Parameters - ------------ - sess : Session - TensorFlow Session. - mode_name : str - The name of the model, default is ``model.ckpt``. - save_dir : str - The path / file directory to the `ckpt`, default is ``checkpoint``. - var_list : list of tensor - The parameters / variables (tensor) to be saved. If empty, save all global variables (default). - global_step : int or None - Step number. - printable : boolean - Whether to print all parameters information. - - See Also - -------- - load_ckpt - - """ - # if sess is None: - # raise ValueError("session is None.") - if var_list is None: - if sess is None: - # FIXME: not sure whether global variables can be accessed in eager mode - raise ValueError("If var_list is None, sess must be specified. " - "In eager mode, can not access global variables easily. ") - var_list = [] - - ckpt_file = os.path.join(save_dir, mode_name) - if var_list == []: - var_list = tf.global_variables() - - logging.info("[*] save %s n_weights: %d" % (ckpt_file, len(var_list))) - - if printable: - for idx, v in enumerate(var_list): - logging.info(" param {:3}: {:15} {}".format(idx, v.name, str(v.get_shape()))) - - if sess: - # graph mode - saver = tf.train.Saver(var_list) - saver.save(sess, ckpt_file, global_step=global_step) - else: - # eager mode - # saver = tfes.Saver(var_list) - # saver.save(ckpt_file, global_step=global_step) - # TODO: tf2.0 not stable, cannot import tensorflow.contrib.eager.python.saver - pass - - -def load_ckpt(sess=None, mode_name='model.ckpt', save_dir='checkpoint', var_list=None, is_latest=True, printable=False): - """Load parameters from `ckpt` file. - - Parameters - ------------ - sess : Session - TensorFlow Session. - mode_name : str - The name of the model, default is ``model.ckpt``. - save_dir : str - The path / file directory to the `ckpt`, default is ``checkpoint``. - var_list : list of tensor - The parameters / variables (tensor) to be saved. If empty, save all global variables (default). - is_latest : boolean - Whether to load the latest `ckpt`, if False, load the `ckpt` with the name of ```mode_name``. - printable : boolean - Whether to print all parameters information. - - Examples - ---------- - - Save all global parameters. - - >>> tl.files.save_ckpt(sess=sess, mode_name='model.ckpt', save_dir='model', printable=True) - - - Save specific parameters. - - >>> tl.files.save_ckpt(sess=sess, mode_name='model.ckpt', var_list=net.all_params, save_dir='model', printable=True) - - - Load latest ckpt. - - >>> tl.files.load_ckpt(sess=sess, var_list=net.all_params, save_dir='model', printable=True) - - - Load specific ckpt. - - >>> tl.files.load_ckpt(sess=sess, mode_name='model.ckpt', var_list=net.all_params, save_dir='model', is_latest=False, printable=True) - - """ - # if sess is None: - # raise ValueError("session is None.") - if var_list is None: - if sess is None: - # FIXME: not sure whether global variables can be accessed in eager mode - raise ValueError("If var_list is None, sess must be specified. " - "In eager mode, can not access global variables easily. ") - var_list = [] - - if is_latest: - ckpt_file = tf.train.latest_checkpoint(save_dir) - else: - ckpt_file = os.path.join(save_dir, mode_name) - - if not var_list: - var_list = tf.global_variables() - - logging.info("[*] load %s n_weights: %d" % (ckpt_file, len(var_list))) - - if printable: - for idx, v in enumerate(var_list): - logging.info(" weights {:3}: {:15} {}".format(idx, v.name, str(v.get_shape()))) - - try: - if sess: - # graph mode - saver = tf.train.Saver(var_list) - saver.restore(sess, ckpt_file) - else: - # eager mode - # saver = tfes.Saver(var_list) - # saver.restore(ckpt_file) - # TODO: tf2.0 not stable, cannot import tensorflow.contrib.eager.python.saver - pass - - except Exception as e: - logging.info(e) - logging.info("[*] load ckpt fail ...") - - -''' -def save_graph(network=None, name='graph.pkl'): - """Save the architecture of TL model into a pickle file. No parameters be saved. - - Parameters - ----------- - network : TensorLayer layer - The network to save. - name : str - The name of graph file. - - Examples - -------- - Save the architecture - >>> tl.files.save_graph(net_test, 'graph.pkl') - - Load the architecture in another script (no parameters restore) - >>> net = tl.files.load_graph('graph.pkl') - """ - logging.info("[*] Saving TL graph into {}".format(name)) - graphs = network.all_graphs - with open(name, 'wb') as file: - # pickle.dumps(graphs, protocol=pickle.HIGHEST_PROTOCOL) - pickle.dump(graphs, file, protocol=pickle.HIGHEST_PROTOCOL) - logging.info("[*] Saved graph") - - -def _graph2net(graphs): - """Inputs graphs, returns network.""" - input_list = list() - layer_dict = dict() - # loop every layers - for graph in graphs: - # get current layer class - name, layer_kwargs = graph - layer_kwargs = dict( - layer_kwargs - ) # when InputLayer is used for twice, if we "pop" elements, the second time to use it will have error. - - layer_class = layer_kwargs.pop('class') # class of current layer - prev_layer = layer_kwargs.pop( - 'prev_layer' - ) # name of previous layer : str =one layer list of str = multiple layers - - # convert function dictionary into real function - for key in layer_kwargs: # set input placeholder into the lastest layer - fn_dict = layer_kwargs[key] - if key in ['act']: - module_path = fn_dict['module_path'] - func_name = fn_dict['func_name'] - lib = importlib.import_module(module_path) - fn = getattr(lib, func_name) - layer_kwargs[key] = fn - # print(key, layer_kwargs[key]) - # print(name, prev_layer, layer_class, layer_kwargs) - - if layer_class == 'placeholder': # create placeholder - if name not in input_list: # if placeholder is not exist - dtype = layer_kwargs.pop('dtype') - shape = layer_kwargs.pop('shape') - _placeholder = tf.placeholder(eval('tf.' + dtype), shape, - name=name.split(':')[0]) # globals()['tf.'+dtype] - # _placeholder = tf.placeholder(ast.literal_eval('tf.' + dtype), shape, name=name.split(':')[0]) - # input_dict.update({name: _placeholder}) - input_list.append((name, _placeholder)) - else: # create network - if isinstance(prev_layer, list): # e.g. ConcatLayer, ElementwiseLayer have multiply previous layers - raise NotImplementedError("TL graph does not support this layer at the moment: %s" % (layer_class)) - else: # normal layers e.g. Conv2d - try: # if previous layer is layer - net = layer_dict[prev_layer] - layer_kwargs.update({'prev_layer': net}) - except Exception: # if previous layer is input placeholder - for n, t in input_list: - if n == prev_layer: - _placeholder = t - layer_kwargs.update({'inputs': _placeholder}) - layer_kwargs.update({'name': name}) - net = eval('tl.layers.' + layer_class)(**layer_kwargs) - layer_dict.update({name: net}) - - # rename placeholder e.g. x:0 --> x - for i, (n, t) in enumerate(input_list): - n_new = n.replace(':', '') - if n_new[-1] == '0': - n_new = n_new[:-1] - input_list[i] = (n_new, t) - # print(n_new, t) - - # put placeholder into network attributes - for n, t in input_list: - # print(name, n, t) - layer_dict[name].__dict__.update({n: t}) - logging.info("[*] attributes: {} {} {}".format(n, t.get_shape().as_list(), t.dtype.name)) - # for key in input_dict: # set input placeholder into the lastest layer - # layer_dict[name].globals()[key] = input_dict[key] - # logging.info(" attributes: {:3} {:15} {:15}".format(n, input_dict[key].get_shape().as_list(), input_dict[key].dtype.name)) - logging.info("[*] Load graph finished") - # return the lastest layer as network - return layer_dict[name] - - -def load_graph(name='model.pkl'): - """Restore TL model archtecture from a a pickle file. No parameters be restored. - - Parameters - ----------- - name : str - The name of graph file. - - Returns - -------- - network : TensorLayer layer - The input placeholder will become the attributes of the returned TL layer object. - - Examples - -------- - - see ``tl.files.save_graph`` - """ - logging.info("[*] Loading TL graph from {}".format(name)) - with open(name, 'rb') as file: - graphs = pickle.load(file) - return _graph2net(graphs) - - -def save_graph_and_params(network=None, name='model', sess=None): - """Save TL model architecture and parameters (i.e. whole model) into graph file and npz file, respectively. - - Parameters - ----------- - network : TensorLayer layer - The network to save. - name : str - The folder name to save the graph and parameters. - sess : Session - TensorFlow Session. - - Examples - --------- - Save architecture and parameters - - >>> tl.files.save_graph_and_params(net, 'model', sess) - - Load archtecture and parameters - - >>> net = tl.files.load_graph_and_params('model', sess) - """ - exists_or_mkdir(name, False) - save_graph(network, os.path.join(name, 'graph.pkl')) - save_npz(save_list=network.all_params, name=os.path.join(name, 'params.npz'), sess=sess) - - -def load_graph_and_params(name='model', sess=None): - """Load TL model architecture and parameters from graph file and npz file, respectively. - - Parameters - ----------- - name : str - The folder name to load the graph and parameters. - sess : Session - TensorFlow Session. - """ - network = load_graph(name=os.path.join(name, 'graph.pkl')) - load_and_assign_npz(sess=sess, name=os.path.join(name, 'params.npz'), network=network) - return network -''' - - -def save_any_to_npy(save_dict=None, name='file.npy'): - """Save variables to `.npy` file. - - Parameters - ------------ - save_dict : directory - The variables to be saved. - name : str - File name. - - Examples - --------- - >>> tl.files.save_any_to_npy(save_dict={'data': ['a','b']}, name='test.npy') - >>> data = tl.files.load_npy_to_any(name='test.npy') - >>> print(data) - {'data': ['a','b']} - - """ - if save_dict is None: - save_dict = {} - np.save(name, save_dict) - - -def load_npy_to_any(path='', name='file.npy'): - """Load `.npy` file. - - Parameters - ------------ - path : str - Path to the file (optional). - name : str - File name. - - Examples - --------- - - see tl.files.save_any_to_npy() - - """ - file_path = os.path.join(path, name) - try: - return np.load(file_path).item() - except Exception: - return np.load(file_path) - raise Exception("[!] Fail to load %s" % file_path) - - -def file_exists(filepath): - """Check whether a file exists by given file path.""" - return os.path.isfile(filepath) - - -def folder_exists(folderpath): - """Check whether a folder exists by given folder path.""" - return os.path.isdir(folderpath) - - -def del_file(filepath): - """Delete a file by given file path.""" - os.remove(filepath) - - -def del_folder(folderpath): - """Delete a folder by given folder path.""" - shutil.rmtree(folderpath) - - -def read_file(filepath): - """Read a file and return a string. - - Examples - --------- - >>> data = tl.files.read_file('data.txt') - - """ - with open(filepath, 'r') as afile: - return afile.read() - - -def load_file_list(path=None, regx='\.jpg', printable=True, keep_prefix=False): - r"""Return a file list in a folder by given a path and regular expression. - - Parameters - ---------- - path : str or None - A folder path, if `None`, use the current directory. - regx : str - The regx of file name. - printable : boolean - Whether to print the files infomation. - keep_prefix : boolean - Whether to keep path in the file name. - - Examples - ---------- - >>> file_list = tl.files.load_file_list(path=None, regx='w1pre_[0-9]+\.(npz)') - - """ - if path is None: - path = os.getcwd() - file_list = os.listdir(path) - return_list = [] - for _, f in enumerate(file_list): - if re.search(regx, f): - return_list.append(f) - # return_list.sort() - if keep_prefix: - for i, f in enumerate(return_list): - return_list[i] = os.path.join(path, f) - - if printable: - logging.info('Match file list = %s' % return_list) - logging.info('Number of files = %d' % len(return_list)) - return return_list - - -def load_folder_list(path=""): - """Return a folder list in a folder by given a folder path. - - Parameters - ---------- - path : str - A folder path. - - """ - return [os.path.join(path, o) for o in os.listdir(path) if os.path.isdir(os.path.join(path, o))] - - -def exists_or_mkdir(path, verbose=True): - """Check a folder by given name, if not exist, create the folder and return False, - if directory exists, return True. - - Parameters - ---------- - path : str - A folder path. - verbose : boolean - If True (default), prints results. - - Returns - -------- - boolean - True if folder already exist, otherwise, returns False and create the folder. - - Examples - -------- - >>> tl.files.exists_or_mkdir("checkpoints/train") - - """ - if not os.path.exists(path): - if verbose: - logging.info("[*] creates %s ..." % path) - os.makedirs(path) - return False - else: - if verbose: - logging.info("[!] %s exists ..." % path) - return True - - -def maybe_download_and_extract(filename, working_directory, url_source, extract=False, expected_bytes=None): - """Checks if file exists in working_directory otherwise tries to dowload the file, - and optionally also tries to extract the file if format is ".zip" or ".tar" - - Parameters - ----------- - filename : str - The name of the (to be) dowloaded file. - working_directory : str - A folder path to search for the file in and dowload the file to - url : str - The URL to download the file from - extract : boolean - If True, tries to uncompress the dowloaded file is ".tar.gz/.tar.bz2" or ".zip" file, default is False. - expected_bytes : int or None - If set tries to verify that the downloaded file is of the specified size, otherwise raises an Exception, defaults is None which corresponds to no check being performed. - - Returns - ---------- - str - File path of the dowloaded (uncompressed) file. - - Examples - -------- - >>> down_file = tl.files.maybe_download_and_extract(filename='train-images-idx3-ubyte.gz', - ... working_directory='data/', - ... url_source='http://yann.lecun.com/exdb/mnist/') - >>> tl.files.maybe_download_and_extract(filename='ADEChallengeData2016.zip', - ... working_directory='data/', - ... url_source='http://sceneparsing.csail.mit.edu/data/', - ... extract=True) - - """ - - # We first define a download function, supporting both Python 2 and 3. - def _download(filename, working_directory, url_source): - - progress_bar = progressbar.ProgressBar() - - def _dlProgress(count, blockSize, totalSize, pbar=progress_bar): - if (totalSize != 0): - - if not pbar.max_value: - totalBlocks = math.ceil(float(totalSize) / float(blockSize)) - pbar.max_value = int(totalBlocks) - - pbar.update(count, force=True) - - filepath = os.path.join(working_directory, filename) - - logging.info('Downloading %s...\n' % filename) - - urlretrieve(url_source + filename, filepath, reporthook=_dlProgress) - - exists_or_mkdir(working_directory, verbose=False) - filepath = os.path.join(working_directory, filename) - - if not os.path.exists(filepath): - - _download(filename, working_directory, url_source) - statinfo = os.stat(filepath) - logging.info('Succesfully downloaded %s %s bytes.' % (filename, statinfo.st_size)) # , 'bytes.') - if (not (expected_bytes is None) and (expected_bytes != statinfo.st_size)): - raise Exception('Failed to verify ' + filename + '. Can you get to it with a browser?') - if (extract): - if tarfile.is_tarfile(filepath): - logging.info('Trying to extract tar file') - tarfile.open(filepath, 'r').extractall(working_directory) - logging.info('... Success!') - elif zipfile.is_zipfile(filepath): - logging.info('Trying to extract zip file') - with zipfile.ZipFile(filepath) as zf: - zf.extractall(working_directory) - logging.info('... Success!') - else: - logging.info("Unknown compression_format only .tar.gz/.tar.bz2/.tar and .zip supported") - return filepath - - -def natural_keys(text): - """Sort list of string with number in human order. - - Examples - ---------- - >>> l = ['im1.jpg', 'im31.jpg', 'im11.jpg', 'im21.jpg', 'im03.jpg', 'im05.jpg'] - >>> l.sort(key=tl.files.natural_keys) - ['im1.jpg', 'im03.jpg', 'im05', 'im11.jpg', 'im21.jpg', 'im31.jpg'] - >>> l.sort() # that is what we dont want - ['im03.jpg', 'im05', 'im1.jpg', 'im11.jpg', 'im21.jpg', 'im31.jpg'] - - References - ---------- - - `link `__ - - """ - - # - alist.sort(key=natural_keys) sorts in human order - # http://nedbatchelder.com/blog/200712/human_sorting.html - # (See Toothy's implementation in the comments) - def atoi(text): - return int(text) if text.isdigit() else text - - return [atoi(c) for c in re.split('(\d+)', text)] - - -# Visualizing npz files -def npz_to_W_pdf(path=None, regx='w1pre_[0-9]+\.(npz)'): - r"""Convert the first weight matrix of `.npz` file to `.pdf` by using `tl.visualize.W()`. - - Parameters - ---------- - path : str - A folder path to `npz` files. - regx : str - Regx for the file name. - - Examples - --------- - Convert the first weight matrix of w1_pre...npz file to w1_pre...pdf. - - >>> tl.files.npz_to_W_pdf(path='/Users/.../npz_file/', regx='w1pre_[0-9]+\.(npz)') - - """ - file_list = load_file_list(path=path, regx=regx) - for f in file_list: - W = load_npz(path, f)[0] - logging.info("%s --> %s" % (f, f.split('.')[0] + '.pdf')) - visualize.draw_weights(W, second=10, saveable=True, name=f.split('.')[0], fig_idx=2012) - - -def tf_variables_to_numpy(variables, sess=None): - # TODO : Documentation pending - """""" - if not isinstance(variables, list): - var_list = [variables] - else: - var_list = variables - - results = [] - if sess: - # graph mode - results = sess.run(var_list) - else: - try: - # eager mode - results.extend([v.numpy() for v in var_list]) - except Exception: - logging.error( - "Fail to convert tf variables to numpy array. Hint: pass sess as argument if in graph mode." - ) - return results - - -def assign_tf_variable(variable, value, sess=None): - # TODO : Documentation pending - """""" - if sess: - # graph mode - assign_op = variable.assign(value) - sess.run(assign_op) - else: - # eager mode - try: - variable.assign(value) - except Exception: - logging.error( - "Fail to assign the tensorflow variable {}" - " Hint: pass sess as argument if in graph mode.".format(variable) - ) - - -def save_weights_to_hdf5(filepath, weights, sess=None): - # TODO : Documentation pending - """""" - logging.info("[*] Saving TL weights into %s" % filepath) - - f = h5py.File(filepath, 'w') - - weights_names = [w.name for w in weights] - f.attrs['weights_names'] = weights_names # 'layer_name/weight_name' - - save_val_list = tf_variables_to_numpy(weights, sess) - - for name, val in zip(weights_names, save_val_list): - # each layer as a group - val_dataset = f.create_dataset(name, val.shape, dtype=val.dtype) - - if not val.shape: - # scalar - val_dataset[()] = val - else: - val_dataset[:] = val - - f.flush() - f.close() - - logging.info("[*] Saved") - - -def load_hdf5_to_weights_in_order(filepath, weights, sess=None): - # TODO : Documentation pending - """""" - f = h5py.File(filepath, 'r') - try: - weights_names = list(f.attrs['weights_names']) - except Exception: - raise NameError("The loaded hdf5 file needs to have 'weights_names' as attributes. " - "Please check whether this hdf5 file is saved from TL.") - - if len(weights) != len(weights_names): - logging.warning("Number of weights mismatch." - "Trying to load a weight file with " + str(len(weights)) + - " weights into a model with " + str(len(weights_names)) + - " weights.") - - for idx, name in enumerate(weights_names): - weights_val = np.asarray(f[name]) - assign_tf_variable(weights[idx], weights_val, sess) # FIXME: whether assign in a list way will be faster - - f.close() - logging.info("[*] Load %s SUCCESS!" % filepath) - - -def load_hdf5_to_weights(filepath, weights, sess=None, skip=False): - # TODO : Documentation pending - """""" - f = h5py.File(filepath, 'r') - try: - weights_names = list(f.attrs['weights_names']) - except Exception: - raise NameError("The loaded hdf5 file needs to have 'weights_names' as attributes. " - "Please check whether this hdf5 file is saved from TL.") - - if len(weights) != len(weights_names): - logging.warning("Number of weights mismatch. Trying to load a hdf5 file with {} weights elements" - " into a model with {} weights elements.".format(len(weights_names), len(weights))) - - net_weights_name = [w.name for w in weights] - - # check mismatch form network weights to hdf5 - for name in net_weights_name: - if name not in weights_names: - logging.warning("Network weights named '%s' not found in loaded hdf5 file. It will be skipped." % name) - - # load weights from hdf5 to network - for name in weights_names: - if name not in net_weights_name: - if skip: - logging.warning("Weights named '%s' not found in network. Skip it." % name) - else: - raise RuntimeError("Weights named '%s' not found in network. Hint: set argument skip=Ture " - "if you want to skip redundant or mismatch weights." % name) - else: - weights_val = np.asarray(f[name]) - assign_tf_variable(weights[net_weights_name.index(name)], weights_val, sess) - - f.close() - logging.info("[*] Load %s SUCCESS!" % filepath) diff --git a/tensorlayer/initializers.py b/tensorlayer/initializers.py deleted file mode 100644 index e9df433..0000000 --- a/tensorlayer/initializers.py +++ /dev/null @@ -1,275 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import numpy as np -import tensorflow as tf - -# from tensorlayer.layers.core import LayersConfig - -__all__ = [ - 'Initializer', - 'Zeros', - 'Ones', - 'Constant', - 'RandomUniform', - 'RandomNormal', - 'TruncatedNormal', - 'deconv2d_bilinear_upsampling_initializer' -] - - -class Initializer(object): - """Initializer base class: all initializers inherit from this class. - """ - - def __call__(self, shape, dtype=None): - """Returns a tensor object initialized as specified by the initializer. - - Parameters - ---------- - shape : tuple of int. - The shape of the tensor. - dtype : Optional dtype of the tensor. If not provided will return tensor - of `tf.float32`. - - Returns - ------- - - """ - raise NotImplementedError - - def get_config(self): - """Returns the configuration of the initializer as a JSON-serializable dict. - - Returns - ------- - A JSON-serializable Python dict. - """ - return {} - - @classmethod - def from_config(cls, config): - """Instantiates an initializer from a configuration dictionary. - - Parameters - ---------- - config : A python dictionary. - It will typically be the output of `get_config`. - - Returns - ------- - An Initializer instance. - """ - if 'dtype' in config: - config.pop('dtype') - return cls(**config) - - -class Zeros(Initializer): - """Initializer that generates tensors initialized to 0. - """ - - def __call__(self, shape, dtype=tf.float32): - return tf.zeros(shape, dtype=dtype) - - -class Ones(Initializer): - """Initializer that generates tensors initialized to 1. - """ - - def __call__(self, shape, dtype=tf.float32): - return tf.ones(shape, dtype=dtype) - - -class Constant(Initializer): - """Initializer that generates tensors initialized to a constant value. - - Parameters - ---------- - value : A python scalar, the value of the generated tensor. - """ - def __init__(self, value=0): - self.value = value - - def __call__(self, shape, dtype=None): - return tf.constant(self.value, shape=shape, dtype=dtype) - - def get_config(self): - return {"value": self.value} - - -class RandomUniform(Initializer): - """Initializer that generates tensors with a uniform distribution. - - Parameters - ---------- - minval : A python scalar or a scalar tensor. Lower bound of the range - of random values to generate. - maxval : A python scalar or a scalar tensor. Upper bound of the range - of random values to generate. - seed : A Python integer. Used to seed the random generator. - - """ - - def __init__(self, minval=-0.05, maxval=0.05, seed=None): - self.minval = minval - self.maxval = maxval - self.seed = seed - - def __call__(self, shape, dtype=tf.float32): - return tf.random.uniform(shape, self.minval, self.maxval, - dtype=dtype, seed=self.seed) - - def get_config(self): - return { - "minval": self.minval, - "maxval": self.maxval, - "seed": self.seed - } - - -class RandomNormal(Initializer): - """Initializer that generates tensors with a normal distribution. - - Parameters - ---------- - mean : a python scalar or a scalar tensor. Mean of the random values - to generate. - stddev : a python scalar or a scalar tensor. Standard deviation of the - random values to generate. - seed : A Python integer. Used to seed the random generator. - """ - - def __init__(self, mean=0.0, stddev=0.05, seed=None): - self.mean = mean - self.stddev = stddev - self.seed = seed - - def __call__(self, shape, dtype=tf.float32): - return tf.random.normal(shape, self.mean, self.stddev, - dtype=dtype, seed=self.seed) - - def get_config(self): - return { - "mean": self.mean, - "stddev": self.stddev, - "seed": self.seed - } - - -class TruncatedNormal(Initializer): - """Initializer that generates a truncated normal distribution. - - These values are similar to values from a `RandomNormal` - except that values more than two standard deviations from the mean - are discarded and re-drawn. This is the recommended initializer for - neural network weights and filters. - - - Parameters - ---------- - mean : a python scalar or a scalar tensor. Mean of the random values - to generate. - stddev : a python scalar or a scalar tensor. Standard deviation of the - random values to generate. - seed : A Python integer. Used to seed the random generator. - """ - - def __init__(self, mean=0.0, stddev=0.05, seed=None): - self.mean = mean - self.stddev = stddev - self.seed = seed - - def __call__(self, shape, dtype=tf.float32): - return tf.random.truncated_normal(shape, self.mean, self.stddev, - dtype=dtype, seed=self.seed) - - def get_config(self): - return { - "mean": self.mean, - "stddev": self.stddev, - "seed": self.seed - } - - - -def deconv2d_bilinear_upsampling_initializer(shape): - """Returns the initializer that can be passed to DeConv2dLayer for initializing the - weights in correspondence to channel-wise bilinear up-sampling. - Used in segmentation approaches such as [FCN](https://arxiv.org/abs/1605.06211) - - Parameters - ---------- - shape : tuple of int - The shape of the filters, [height, width, output_channels, in_channels]. - It must match the shape passed to DeConv2dLayer. - - Returns - ------- - ``tf.constant_initializer`` - A constant initializer with weights set to correspond to per channel bilinear upsampling - when passed as W_int in DeConv2dLayer - - Examples - -------- - - Upsampling by a factor of 2, ie e.g 100->200 - >>> import tensorflow as tf - >>> import tensorlayer as tl - >>> rescale_factor = 2 - >>> imsize = 128 - >>> num_channels = 3 - >>> filter_shape = (5, 5) - >>> filter_size = (2 * rescale_factor - rescale_factor % 2) #Corresponding bilinear filter size - >>> num_in_channels = 3 - >>> num_out_channels = 3 - >>> deconv_filter_shape = (filter_size, filter_size, num_out_channels, num_in_channels) - >>> x = tf.placeholder(tf.float32, (1, imsize, imsize, num_channels)) - >>> net = tl.layers.InputLayer(x, name='input_layer') - >>> bilinear_init = deconv2d_bilinear_upsampling_initializer(shape=filter_shape) - >>> net = tl.layers.DeConv2dLayer(net, - ... shape=filter_shape, - ... output_shape=(1, imsize*rescale_factor, imsize*rescale_factor, num_out_channels), - ... strides=(1, rescale_factor, rescale_factor, 1), - ... W_init=bilinear_init, - ... padding='SAME', - ... act=None, name='g/h1/decon2d') - - """ - if shape[0] != shape[1]: - raise Exception('deconv2d_bilinear_upsampling_initializer only supports symmetrical filter sizes') - - if shape[3] < shape[2]: - raise Exception( - 'deconv2d_bilinear_upsampling_initializer behaviour is not defined for num_in_channels < num_out_channels ' - ) - - filter_size = shape[0] - num_out_channels = shape[2] - num_in_channels = shape[3] - - # Create bilinear filter kernel as numpy array - bilinear_kernel = np.zeros([filter_size, filter_size], dtype=np.float32) - scale_factor = (filter_size + 1) // 2 - if filter_size % 2 == 1: - center = scale_factor - 1 - else: - center = scale_factor - 0.5 - for x in range(filter_size): - for y in range(filter_size): - bilinear_kernel[x, y] = (1 - abs(x - center) / scale_factor) * (1 - abs(y - center) / scale_factor) - weights = np.zeros((filter_size, filter_size, num_out_channels, num_in_channels)) - for i in range(num_out_channels): - weights[:, :, i, i] = bilinear_kernel - - # assign numpy array to constant_initalizer and pass to get_variable - # FIXME : How to deal with this? Will LayersConfig be removed? - return tf.constant_initializer(value=weights, dtype=LayersConfig.tf_dtype) - - -# Alias -zeros = Zeros -ones = Ones -constant = Constant -random_uniform = RandomUniform -random_normal = RandomNormal -truncated_normal = TruncatedNormal diff --git a/tensorlayer/iterate.py b/tensorlayer/iterate.py deleted file mode 100644 index 6355fc7..0000000 --- a/tensorlayer/iterate.py +++ /dev/null @@ -1,283 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import numpy as np -from six.moves import xrange - -__all__ = [ - 'minibatches', - 'seq_minibatches', - 'seq_minibatches2', - 'ptb_iterator', -] - - -def minibatches(inputs=None, targets=None, batch_size=None, allow_dynamic_batch_size=False, shuffle=False): - """Generate a generator that input a group of example in numpy.array and - their labels, return the examples and labels by the given batch size. - - Parameters - ---------- - inputs : numpy.array - The input features, every row is a example. - targets : numpy.array - The labels of inputs, every row is a example. - batch_size : int - The batch size. - allow_dynamic_batch_size: boolean - Allow the use of the last data batch in case the number of examples is not a multiple of batch_size, this may result in unexpected behaviour if other functions expect a fixed-sized batch-size. - shuffle : boolean - Indicating whether to use a shuffling queue, shuffle the dataset before return. - - Examples - -------- - >>> X = np.asarray([['a','a'], ['b','b'], ['c','c'], ['d','d'], ['e','e'], ['f','f']]) - >>> y = np.asarray([0,1,2,3,4,5]) - >>> for batch in tl.iterate.minibatches(inputs=X, targets=y, batch_size=2, shuffle=False): - >>> print(batch) - (array([['a', 'a'], ['b', 'b']], dtype=' len(inputs): - if allow_dynamic_batch_size: - end_idx = len(inputs) - else: - break - if shuffle: - excerpt = indices[start_idx:end_idx] - else: - excerpt = slice(start_idx, end_idx) - if (isinstance(inputs, list) or isinstance(targets, list)) and (shuffle ==True): - # zsdonghao: for list indexing when shuffle==True - yield [inputs[i] for i in excerpt], [targets[i] for i in excerpt] - else: - yield inputs[excerpt], targets[excerpt] - - -def seq_minibatches(inputs, targets, batch_size, seq_length, stride=1): - """Generate a generator that return a batch of sequence inputs and targets. - If `batch_size=100` and `seq_length=5`, one return will have 500 rows (examples). - - Parameters - ---------- - inputs : numpy.array - The input features, every row is a example. - targets : numpy.array - The labels of inputs, every element is a example. - batch_size : int - The batch size. - seq_length : int - The sequence length. - stride : int - The stride step, default is 1. - - Examples - -------- - Synced sequence input and output. - - >>> X = np.asarray([['a','a'], ['b','b'], ['c','c'], ['d','d'], ['e','e'], ['f','f']]) - >>> y = np.asarray([0, 1, 2, 3, 4, 5]) - >>> for batch in tl.iterate.seq_minibatches(inputs=X, targets=y, batch_size=2, seq_length=2, stride=1): - >>> print(batch) - (array([['a', 'a'], ['b', 'b'], ['b', 'b'], ['c', 'c']], dtype='>> return_last = True - >>> num_steps = 2 - >>> X = np.asarray([['a','a'], ['b','b'], ['c','c'], ['d','d'], ['e','e'], ['f','f']]) - >>> Y = np.asarray([0,1,2,3,4,5]) - >>> for batch in tl.iterate.seq_minibatches(inputs=X, targets=Y, batch_size=2, seq_length=num_steps, stride=1): - >>> x, y = batch - >>> if return_last: - >>> tmp_y = y.reshape((-1, num_steps) + y.shape[1:]) - >>> y = tmp_y[:, -1] - >>> print(x, y) - [['a' 'a'] - ['b' 'b'] - ['b' 'b'] - ['c' 'c']] [1 2] - [['c' 'c'] - ['d' 'd'] - ['d' 'd'] - ['e' 'e']] [3 4] - - """ - if len(inputs) != len(targets): - raise AssertionError("The length of inputs and targets should be equal") - - n_loads = (batch_size * stride) + (seq_length - stride) - - for start_idx in range(0, len(inputs) - n_loads + 1, (batch_size * stride)): - seq_inputs = np.zeros((batch_size, seq_length) + inputs.shape[1:], dtype=inputs.dtype) - seq_targets = np.zeros((batch_size, seq_length) + targets.shape[1:], dtype=targets.dtype) - for b_idx in xrange(batch_size): - start_seq_idx = start_idx + (b_idx * stride) - end_seq_idx = start_seq_idx + seq_length - seq_inputs[b_idx] = inputs[start_seq_idx:end_seq_idx] - seq_targets[b_idx] = targets[start_seq_idx:end_seq_idx] - flatten_inputs = seq_inputs.reshape((-1, ) + inputs.shape[1:]) - flatten_targets = seq_targets.reshape((-1, ) + targets.shape[1:]) - yield flatten_inputs, flatten_targets - - -def seq_minibatches2(inputs, targets, batch_size, num_steps): - """Generate a generator that iterates on two list of words. Yields (Returns) the source contexts and - the target context by the given batch_size and num_steps (sequence_length). - In TensorFlow's tutorial, this generates the `batch_size` pointers into the raw PTB data, and allows minibatch iteration along these pointers. - - Parameters - ---------- - inputs : list of data - The context in list format; note that context usually be represented by splitting by space, and then convert to unique word IDs. - targets : list of data - The context in list format; note that context usually be represented by splitting by space, and then convert to unique word IDs. - batch_size : int - The batch size. - num_steps : int - The number of unrolls. i.e. sequence length - - Yields - ------ - Pairs of the batched data, each a matrix of shape [batch_size, num_steps]. - - Raises - ------ - ValueError : if batch_size or num_steps are too high. - - Examples - -------- - >>> X = [i for i in range(20)] - >>> Y = [i for i in range(20,40)] - >>> for batch in tl.iterate.seq_minibatches2(X, Y, batch_size=2, num_steps=3): - ... x, y = batch - ... print(x, y) - - [[ 0. 1. 2.] - [ 10. 11. 12.]] - [[ 20. 21. 22.] - [ 30. 31. 32.]] - - [[ 3. 4. 5.] - [ 13. 14. 15.]] - [[ 23. 24. 25.] - [ 33. 34. 35.]] - - [[ 6. 7. 8.] - [ 16. 17. 18.]] - [[ 26. 27. 28.] - [ 36. 37. 38.]] - - Notes - ----- - - Hint, if the input data are images, you can modify the source code `data = np.zeros([batch_size, batch_len)` to `data = np.zeros([batch_size, batch_len, inputs.shape[1], inputs.shape[2], inputs.shape[3]])`. - """ - if len(inputs) != len(targets): - raise AssertionError("The length of inputs and targets should be equal") - - data_len = len(inputs) - batch_len = data_len // batch_size - # data = np.zeros([batch_size, batch_len]) - data = np.zeros((batch_size, batch_len) + inputs.shape[1:], dtype=inputs.dtype) - data2 = np.zeros([batch_size, batch_len]) - - for i in range(batch_size): - data[i] = inputs[batch_len * i:batch_len * (i + 1)] - data2[i] = targets[batch_len * i:batch_len * (i + 1)] - - epoch_size = (batch_len - 1) // num_steps - - if epoch_size == 0: - raise ValueError("epoch_size == 0, decrease batch_size or num_steps") - - for i in range(epoch_size): - x = data[:, i * num_steps:(i + 1) * num_steps] - x2 = data2[:, i * num_steps:(i + 1) * num_steps] - yield (x, x2) - - -def ptb_iterator(raw_data, batch_size, num_steps): - """Generate a generator that iterates on a list of words, see `PTB example `__. - Yields the source contexts and the target context by the given batch_size and num_steps (sequence_length). - - In TensorFlow's tutorial, this generates `batch_size` pointers into the raw - PTB data, and allows minibatch iteration along these pointers. - - Parameters - ---------- - raw_data : a list - the context in list format; note that context usually be - represented by splitting by space, and then convert to unique - word IDs. - batch_size : int - the batch size. - num_steps : int - the number of unrolls. i.e. sequence_length - - Yields - ------ - Pairs of the batched data, each a matrix of shape [batch_size, num_steps]. - The second element of the tuple is the same data time-shifted to the - right by one. - - Raises - ------ - ValueError : if batch_size or num_steps are too high. - - Examples - -------- - >>> train_data = [i for i in range(20)] - >>> for batch in tl.iterate.ptb_iterator(train_data, batch_size=2, num_steps=3): - >>> x, y = batch - >>> print(x, y) - [[ 0 1 2] <---x 1st subset/ iteration - [10 11 12]] - [[ 1 2 3] <---y - [11 12 13]] - - [[ 3 4 5] <--- 1st batch input 2nd subset/ iteration - [13 14 15]] <--- 2nd batch input - [[ 4 5 6] <--- 1st batch target - [14 15 16]] <--- 2nd batch target - - [[ 6 7 8] 3rd subset/ iteration - [16 17 18]] - [[ 7 8 9] - [17 18 19]] - """ - raw_data = np.array(raw_data, dtype=np.int32) - - data_len = len(raw_data) - batch_len = data_len // batch_size - data = np.zeros([batch_size, batch_len], dtype=np.int32) - for i in range(batch_size): - data[i] = raw_data[batch_len * i:batch_len * (i + 1)] - - epoch_size = (batch_len - 1) // num_steps - - if epoch_size == 0: - raise ValueError("epoch_size == 0, decrease batch_size or num_steps") - - for i in range(epoch_size): - x = data[:, i * num_steps:(i + 1) * num_steps] - y = data[:, i * num_steps + 1:(i + 1) * num_steps + 1] - yield (x, y) diff --git a/tensorlayer/layers/__init__.py b/tensorlayer/layers/__init__.py deleted file mode 100644 index 8c48b4f..0000000 --- a/tensorlayer/layers/__init__.py +++ /dev/null @@ -1,39 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- -""" -TensorLayer provides rich layer implementations trailed for -various benchmarks and domain-specific problems. In addition, we also -support transparent access to native TensorFlow parameters. -For example, we provide not only layers for local response normalization, but also -layers that allow user to apply ``tf.nn.lrn`` on ``network.outputs``. -More functions can be found in `TensorFlow API `__. -""" - -from .activation import * -from .convolution import * -from .core import * -from .dense import * -from .dropout import * -from .deprecated import * -from .embedding import * -from .extend import * -# from .flow_control import * # remove for TF 2.0 -from .image_resampling import * -from .importer import * -from .inputs import * -from .lambda_layers import * -from .merge import * -from .noise import * -from .normalization import * -from .object_detection import * -from .padding import * -from .pooling import * -from .quantize import * -# from .reconstruction import * # remove for TF 2.0 -from .recurrent import * -from .scale import * -from .shape import * -from .spatial_transformer import * -from .stack import * -# from .time_distribution import * # remove for TF 2.0 -from .utils import * diff --git a/tensorlayer/layers/activation.py b/tensorlayer/layers/activation.py deleted file mode 100644 index a2fda04..0000000 --- a/tensorlayer/layers/activation.py +++ /dev/null @@ -1,240 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf - -from tensorlayer.layers.core import Layer -# from tensorlayer.layers.core import LayersConfig - -from tensorlayer.activation import leaky_relu6 -from tensorlayer.activation import leaky_twice_relu6 - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -__all__ = [ - 'PRelu', - 'PRelu6', - 'PTRelu6', -] - - -class PRelu(Layer): - """ - The :class:`PRelu` class is Parametric Rectified Linear layer. - - Parameters - ---------- - channel_shared : boolean - If True, single weight is shared by all channels. - a_init : initializer - The initializer for initializing the alpha(s). - a_init_args : dictionary - The arguments for initializing the alpha(s). - name : None or str - A unique layer name. - - References - ----------- - - `Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification `__ - - `Convolutional Deep Belief Networks on CIFAR-10 [A. Krizhevsky, 2010] `__ - - """ - - def __init__( - self, - channel_shared=False, - a_init=tf.compat.v1.initializers.truncated_normal(mean=0.0, stddev=0.1), - a_init_args=None, - name=None # "prelu" - ): - - # super(PRelu, self).__init__(prev_layer=prev_layer, act=tf.nn.leaky_relu, a_init_args=a_init_args, name=name) - super().__init__(name) - self.channel_shared = channel_shared - self.a_init = a_init - self.a_init_args = a_init_args - - logging.info("PRelu %s: channel_shared: %s" % (self.name, self.channel_shared)) - - def build(self, inputs_shape): - if self.channel_shared: - w_shape = (1, ) - else: - w_shape = inputs_shape[-1] - self.alpha_var = self._get_weights("alpha", shape=w_shape, init=self.a_init, init_args=self.a_init_args) - # self.alpha_var = tf.compat.v1.get_variable( - # name=self.name + '/alpha', shape=w_shape, initializer=self.a_init, dtype=LayersConfig.tf_dtype, - # **self.a_init_args - # ) - self.alpha_var_constrained = tf.nn.sigmoid(self.alpha_var, name="constraining_alpha_var_in_0_1") - # self.add_weights(self.alpha_var) - - def forward(self, inputs): - outputs = self._apply_activation(inputs, **{'alpha': self.alpha_var_constrained, 'name': "prelu_activation"}) - - -class PRelu6(Layer): - """ - The :class:`PRelu6` class is Parametric Rectified Linear layer integrating ReLU6 behaviour. - - This Layer is a modified version of the :class:`PRelu`. - - This activation layer use a modified version :func:`tl.act.leaky_relu` introduced by the following paper: - `Rectifier Nonlinearities Improve Neural Network Acoustic Models [A. L. Maas et al., 2013] `__ - - This activation function also use a modified version of the activation function :func:`tf.nn.relu6` introduced by the following paper: - `Convolutional Deep Belief Networks on CIFAR-10 [A. Krizhevsky, 2010] `__ - - This activation layer push further the logic by adding `leaky` behaviour both below zero and above six. - - The function return the following results: - - When x < 0: ``f(x) = alpha_low * x``. - - When x in [0, 6]: ``f(x) = x``. - - When x > 6: ``f(x) = 6``. - - Parameters - ---------- - channel_shared : boolean - If True, single weight is shared by all channels. - a_init : initializer - The initializer for initializing the alpha(s). - a_init_args : dictionary - The arguments for initializing the alpha(s). - name : None or str - A unique layer name. - - References - ----------- - - `Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification `__ - - `Rectifier Nonlinearities Improve Neural Network Acoustic Models [A. L. Maas et al., 2013] `__ - - `Convolutional Deep Belief Networks on CIFAR-10 [A. Krizhevsky, 2010] `__ - - """ - - def __init__( - self, - channel_shared=False, - a_init=tf.compat.v1.initializers.truncated_normal(mean=0.0, stddev=0.1), - a_init_args=None, - name=None # "prelu6" - ): - - # super(PRelu6, self).__init__(prev_layer=prev_layer, act=leaky_relu6, a_init_args=a_init_args, name=name) - super().__init__(name) - self.channel_shared = channel_shared - self.a_init = a_init - self.a_init_args = a_init_args - - logging.info("PRelu6 %s: channel_shared: %s" % (self.name, self.channel_shared)) - - def build(self, inputs_shape): - if self.channel_shared: - w_shape = (1, ) - else: - w_shape = inputs_shape[-1] - self.alpha_var = self._get_weights("alpha", shape=w_shape, init=self.a_init, init_args=self.a_init_args) - # self.alpha_var = tf.compat.v1.get_variable( - # name=self.name + '/alpha', shape=w_shape, initializer=self.a_init, dtype=LayersConfig.tf_dtype, - # **self.a_init_args - # ) - - self.alpha_var_constrained = tf.nn.sigmoid(self.alpha_var, name="constraining_alpha_var_in_0_1") - # self.add_weights(self.alpha_var) - - def forward(self, inputs): - outputs = self._apply_activation(inputs, **{'alpha': self.alpha_var_constrained, 'name': "prelu6_activation"}) - return outputs - - -class PTRelu6(Layer): - """ - The :class:`PTRelu6` class is Parametric Rectified Linear layer integrating ReLU6 behaviour. - - This Layer is a modified version of the :class:`PRelu`. - - This activation layer use a modified version :func:`tl.act.leaky_relu` introduced by the following paper: - `Rectifier Nonlinearities Improve Neural Network Acoustic Models [A. L. Maas et al., 2013] `__ - - This activation function also use a modified version of the activation function :func:`tf.nn.relu6` introduced by the following paper: - `Convolutional Deep Belief Networks on CIFAR-10 [A. Krizhevsky, 2010] `__ - - This activation layer push further the logic by adding `leaky` behaviour both below zero and above six. - - The function return the following results: - - When x < 0: ``f(x) = alpha_low * x``. - - When x in [0, 6]: ``f(x) = x``. - - When x > 6: ``f(x) = 6 + (alpha_high * (x-6))``. - - This version goes one step beyond :class:`PRelu6` by introducing leaky behaviour on the positive side when x > 6. - - Parameters - ---------- - channel_shared : boolean - If True, single weight is shared by all channels. - a_init : initializer - The initializer for initializing the alpha(s). - a_init_args : dictionary - The arguments for initializing the alpha(s). - name : None or str - A unique layer name. - - References - ----------- - - `Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification `__ - - `Convolutional Deep Belief Networks on CIFAR-10 [A. Krizhevsky, 2010] `__ - - `Rectifier Nonlinearities Improve Neural Network Acoustic Models [A. L. Maas et al., 2013] `__ - - """ - - def __init__( - self, - channel_shared=False, - a_init=tf.compat.v1.initializers.truncated_normal(mean=0.0, stddev=0.1), - a_init_args=None, - name=None # "ptreLU6" - ): - - # super(PTRelu6, self).__init__(prev_layer=prev_layer, act=leaky_twice_relu6, a_init_args=a_init_args, name=name) - super().__init__(name) - self.channel_shared = channel_shared - self.a_init = a_init - self.a_init_args = a_init_args - - logging.info("PTRelu6 %s: channel_shared: %s" % (self.name, self.channel_shared)) - - def build(self, inputs_shape): - if self.channel_shared: - w_shape = (1, ) - else: - w_shape = inputs_shape[-1] - - # Alpha for outputs lower than zeros - self.alpha_low = self._get_weights("alpha_low", shape=w_shape, init=self.a_init, init_args=self.a_init_args) - # self.alpha_low = tf.compat.v1.get_variable( - # name=self.name + '/alpha_low', shape=w_shape, initializer=self.a_init, dtype=LayersConfig.tf_dtype, - # **self.a_init_args - # ) - self.alpha_low_constrained = tf.nn.sigmoid(self.alpha_low, name="constraining_alpha_low_in_0_1") - - # Alpha for outputs higher than 6 - self.alpha_high = self._get_weights("alpha_high", shape=w_shape, init=self.a_init, init_args=self.a_init_args) - # self.alpha_high = tf.compat.v1.get_variable( - # name=self.name + '/alpha_high', shape=w_shape, initializer=self.a_init, dtype=LayersConfig.tf_dtype, - # **self.a_init_args - # ) - - self.alpha_high_constrained = tf.nn.sigmoid(self.alpha_high, name="constraining_alpha_high_in_0_1") - - # self.add_weights([self.alpha_low, self.alpha_high]) - - def forward(self, inputs): - outputs = self._apply_activation( - inputs, **{ - 'alpha_low': self.alpha_low_constrained, - 'alpha_high': self.alpha_high_constrained, - 'name': "ptrelu6_activation" - } - ) - return outputs diff --git a/tensorlayer/layers/convolution/__init__.py b/tensorlayer/layers/convolution/__init__.py deleted file mode 100644 index a71c484..0000000 --- a/tensorlayer/layers/convolution/__init__.py +++ /dev/null @@ -1,82 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- -""" -TensorLayer provides rich layer implementations trailed for -various benchmarks and domain-specific problems. In addition, we also -support transparent access to native TensorFlow parameters. -For example, we provide not only layers for local response normalization, but also -layers that allow user to apply ``tf.nn.lrn`` on ``network.outputs``. -More functions can be found in `TensorFlow API `__. -""" - -# from .atrous_conv import * # remove for TF 2.0 -from .binary_conv import * -from .deformable_conv import * -from .depthwise_conv import * -from .dorefa_conv import * -from .expert_conv import * -from .expert_deconv import * -from .group_conv import * -from .separable_conv import * -from .simplified_conv import * -from .simplified_deconv import * -from .super_resolution import * -from .ternary_conv import * -from .quan_conv import * -from .quan_conv_bn import * - -__all__ = [ - - # simplified conv - 'Conv1d', - 'Conv2d', - 'Conv3d', - - # simplified deconv - 'DeConv2d', - 'DeConv3d', - - # expert conv - 'Conv1dLayer', - 'Conv2dLayer', - 'Conv3dLayer', - - # expert conv - 'DeConv2dLayer', - 'DeConv3dLayer', - - # atrous - # 'AtrousConv1dLayer', - # 'AtrousConv2dLayer', - # 'AtrousDeConv2d', - - # binary - 'BinaryConv2d', - - # deformable - 'DeformableConv2d', - - # depthwise - 'DepthwiseConv2d', - - # dorefa - 'DorefaConv2d', - - # group - 'GroupConv2d', - - # separable - 'SeparableConv1d', - 'SeparableConv2d', - - # subpixel - 'SubpixelConv1d', - 'SubpixelConv2d', - - # ternary - 'TernaryConv2d', - - #quan_conv - 'QuanConv2d', - 'QuanConv2dWithBN', -] diff --git a/tensorlayer/layers/convolution/binary_conv.py b/tensorlayer/layers/convolution/binary_conv.py deleted file mode 100644 index f707be7..0000000 --- a/tensorlayer/layers/convolution/binary_conv.py +++ /dev/null @@ -1,159 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf - -from tensorlayer.layers.core import Layer -# from tensorlayer.layers.core import LayersConfig - -from tensorlayer.layers.utils import quantize - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -__all__ = ['BinaryConv2d'] - - -class BinaryConv2d(Layer): - """ - The :class:`BinaryConv2d` class is a 2D binary CNN layer, which weights are either -1 or 1 while inference. - - Note that, the bias vector would not be binarized. - - Parameters - ---------- - n_filter : int - The number of filters. - filter_size : tuple of int - The filter size (height, width). - strides : tuple of int - The sliding window strides of corresponding input dimensions. - It must be in the same order as the ``shape`` parameter. - act : activation function - The activation function of this layer. - padding : str - The padding algorithm type: "SAME" or "VALID". - use_gemm : boolean - If True, use gemm instead of ``tf.matmul`` for inference. (TODO). - W_init : initializer - The initializer for the the weight matrix. - b_init : initializer or None - The initializer for the the bias vector. If None, skip biases. - W_init_args : dictionary - The arguments for the weight matrix initializer. - b_init_args : dictionary - The arguments for the bias vector initializer. - use_cudnn_on_gpu : bool - Default is False. - data_format : str - "NHWC" or "NCHW", default is "NHWC". - name : None or str - A unique layer name. - - Examples - --------- - >>> import tensorflow as tf - >>> import tensorlayer as tl - >>> x = tf.placeholder(tf.float32, [None, 256, 256, 3]) - >>> net = tl.layers.Input(x, name='input') - >>> net = tl.layers.BinaryConv2d(net, 32, (5, 5), (1, 1), padding='SAME', name='bcnn1') - >>> net = tl.layers.MaxPool2d(net, (2, 2), (2, 2), padding='SAME', name='pool1') - >>> net = tl.layers.BatchNorm(net, act=tl.act.htanh, is_train=True, name='bn1') - ... - >>> net = tl.layers.Sign(net) - >>> net = tl.layers.BinaryConv2d(net, 64, (5, 5), (1, 1), padding='SAME', name='bcnn2') - >>> net = tl.layers.MaxPool2d(net, (2, 2), (2, 2), padding='SAME', name='pool2') - >>> net = tl.layers.BatchNorm(net, act=tl.act.htanh, is_train=True, name='bn2') - - """ - - def __init__( - self, - n_filter=32, - filter_size=(3, 3), - strides=(1, 1), - act=None, - padding='SAME', - use_gemm=False, - W_init=tf.compat.v1.initializers.truncated_normal(stddev=0.02), - b_init=tf.compat.v1.initializers.constant(value=0.0), - W_init_args=None, - b_init_args=None, - use_cudnn_on_gpu=None, - data_format=None, - name='binary_cnn2d', - ): - # super(BinaryConv2d, self - # ).__init__(prev_layer=prev_layer, act=act, W_init_args=W_init_args, b_init_args=b_init_args, name=name) - super().__init__(name) - self.n_filter = n_filter - self.filter_size = filter_size - self.strides = strides - self.act = act - self.padding = padding - self.use_gemm = use_gemm - self.W_init = W_init - self.b_init = b_init - self.W_init_args = W_init_args - self.b_init_args = b_init_args - self.use_cudnn_on_gpu = use_cudnn_on_gpu - self.data_format = data_format - logging.info( - "BinaryConv2d %s: n_filter: %d filter_size: %s strides: %s pad: %s act: %s" % ( - self.name, n_filter, str(filter_size), str(strides), padding, - self.act.__name__ if self.act is not None else 'No Activation' - ) - ) - - if use_gemm: - raise Exception("TODO. The current version use tf.matmul for inferencing.") - - if len(strides) != 2: - raise ValueError("len(strides) should be 2.") - - def build(self, inputs_shape): - if inputs_shape[-1] is None: - logging.warning("unknown input channels, set to 1") - pre_channel = 1 - - self.shape = (self.filter_size[0], self.filter_size[1], pre_channel, self.n_filter) - self.strides = (1, strides[0], strides[1], 1) - - # self.W = tf.compat.v1.get_variable( - # name=self.name + '\W_conv2d', shape=self.shape, initializer=self.W_init, dtype=LayersConfig.tf_dtype, - # **self.W_init_args - # ) - self.W = self._get_weights( - scope_name=self.name, var_name="filters", shape=self.shape, init=self.W_init, init_args=self.W_init_args - ) - if self.b_init: - self.b = self._get_weights("biases", shape=(self.shape[-1]), init=self.b_init, init_args=self.b_init_args) - # self.b = tf.compat.v1.get_variable( - # name=self.name + '\b_conv2d', shape=(self.shape[-1]), initializer=self.b_init, - # dtype=LayersConfig.tf_dtype, **self.b_init_args - # ) - - # if self.b_init: - # self.add_weights([self.W, self.b]) - # else: - # self.add_weights(self.W) - - def forward(self, inputs): - """ - prev_layer : :class:`Layer` - Previous layer. - """ - self.W = quantize(self.W) - - outputs = tf.nn.conv2d( - inputs, self.W, strides=self.strides, padding=self.padding, use_cudnn_on_gpu=self.use_cudnn_on_gpu, - data_format=self.data_format - ) - - if self.b_init: - outputs = tf.nn.bias_add(outputs, self.b, name='bias_add') - - if self.act: - outputs = self.act(outputs) - return outputs diff --git a/tensorlayer/layers/convolution/deformable_conv.py b/tensorlayer/layers/convolution/deformable_conv.py deleted file mode 100644 index 43ca5b7..0000000 --- a/tensorlayer/layers/convolution/deformable_conv.py +++ /dev/null @@ -1,297 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf - -from tensorlayer.layers.core import Layer -# from tensorlayer.layers.core import LayersConfig - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias -from tensorlayer.decorators import private_method - -__all__ = [ - 'DeformableConv2d', -] - - -class DeformableConv2d(Layer): - """The :class:`DeformableConv2d` class is a 2D - `Deformable Convolutional Networks `__. - - Parameters - ---------- - prev_layer : :class:`Layer` - Previous layer. - offset_layer : :class:`Layer` - To predict the offset of convolution operations. - The output shape is (batchsize, input height, input width, 2*(number of element in the convolution kernel)) - e.g. if apply a 3*3 kernel, the number of the last dimension should be 18 (2*3*3) - n_filter : int - The number of filters. - filter_size : tuple of int - The filter size (height, width). - act : activation function - The activation function of this layer. - W_init : initializer - The initializer for the weight matrix. - b_init : initializer or None - The initializer for the bias vector. If None, skip biases. - W_init_args : dictionary - The arguments for the weight matrix initializer. - b_init_args : dictionary - The arguments for the bias vector initializer. - name : str - A unique layer name. - - Examples - -------- - >>> net = tl.layers.InputLayer(x, name='input_layer') - >>> offset1 = tl.layers.Conv2d(net, 18, (3, 3), (1, 1), act=act, padding='SAME', name='offset1') - >>> net = tl.layers.DeformableConv2d(net, offset1, 32, (3, 3), act=act, name='deformable1') - >>> offset2 = tl.layers.Conv2d(net, 18, (3, 3), (1, 1), act=act, padding='SAME', name='offset2') - >>> net = tl.layers.DeformableConv2d(net, offset2, 64, (3, 3), act=act, name='deformable2') - - References - ---------- - - The deformation operation was adapted from the implementation in `here `__ - - Notes - ----- - - The padding is fixed to 'SAME'. - - The current implementation is not optimized for memory usgae. Please use it carefully. - - """ - - @deprecated_alias(layer='prev_layer', end_support_version=1.9) # TODO remove this line for the 1.9 release - def __init__( - self, - prev_layer, - offset_layer=None, - # shape=(3, 3, 1, 100), - n_filter=32, - filter_size=(3, 3), - act=None, - name='deformable_conv_2d', - W_init=tf.compat.v1.initializers.truncated_normal(stddev=0.02), - b_init=tf.compat.v1.initializers.constant(value=0.0), - W_init_args=None, - b_init_args=None - ): - - super(DeformableConv2d, self - ).__init__(prev_layer=prev_layer, act=act, W_init_args=W_init_args, b_init_args=b_init_args, name=name) - - logging.info( - "DeformableConv2d %s: n_filter: %d, filter_size: %s act: %s" % - (self.name, n_filter, str(filter_size), self.act.__name__ if self.act is not None else 'No Activation') - ) - - self.offset_layer = offset_layer - - try: - pre_channel = int(prev_layer.outputs.get_shape()[-1]) - except Exception: # if pre_channel is ?, it happens when using Spatial Transformer Net - pre_channel = 1 - logging.info("[warnings] unknow input channels, set to 1") - shape = (filter_size[0], filter_size[1], pre_channel, n_filter) - - with tf.compat.v1.variable_scope(name): - offset = self.offset_layer.outputs - - if offset.get_shape()[-1] != 2 * shape[0] * shape[1]: - raise AssertionError("offset.get_shape()[-1] is not equal to: %d" % 2 * shape[0] * shape[1]) - - # Grid initialisation - input_h = int(self.inputs.get_shape()[1]) - input_w = int(self.inputs.get_shape()[2]) - kernel_n = shape[0] * shape[1] - initial_offsets = tf.stack( - tf.meshgrid(tf.range(shape[0]), tf.range(shape[1]), indexing='ij') - ) # initial_offsets --> (kh, kw, 2) - initial_offsets = tf.reshape(initial_offsets, (-1, 2)) # initial_offsets --> (n, 2) - initial_offsets = tf.expand_dims(initial_offsets, 0) # initial_offsets --> (1, n, 2) - initial_offsets = tf.expand_dims(initial_offsets, 0) # initial_offsets --> (1, 1, n, 2) - initial_offsets = tf.tile(initial_offsets, [input_h, input_w, 1, 1]) # initial_offsets --> (h, w, n, 2) - initial_offsets = tf.cast(initial_offsets, 'float32') - grid = tf.meshgrid( - tf.range(-int((shape[0] - 1) / 2.0), int(input_h - int((shape[0] - 1) / 2.0)), 1), - tf.range(-int((shape[1] - 1) / 2.0), int(input_w - int((shape[1] - 1) / 2.0)), 1), indexing='ij' - ) - - grid = tf.stack(grid, axis=-1) - grid = tf.cast(grid, 'float32') # grid --> (h, w, 2) - grid = tf.expand_dims(grid, 2) # grid --> (h, w, 1, 2) - grid = tf.tile(grid, [1, 1, kernel_n, 1]) # grid --> (h, w, n, 2) - grid_offset = grid + initial_offsets # grid_offset --> (h, w, n, 2) - - input_deform = self._tf_batch_map_offsets(self.inputs, offset, grid_offset) - - W = tf.compat.v1.get_variable( - name='W_deformableconv2d', shape=[1, 1, shape[0] * shape[1], shape[-2], shape[-1]], initializer=W_init, - dtype=LayersConfig.tf_dtype, **self.W_init_args - ) - - _tensor = tf.nn.conv3d(input_deform, W, strides=[1, 1, 1, 1, 1], padding='VALID', name=None) - - if b_init: - b = tf.compat.v1.get_variable( - name='b_deformableconv2d', shape=(shape[-1]), initializer=b_init, dtype=LayersConfig.tf_dtype, - **self.b_init_args - ) - - _tensor = tf.nn.bias_add(_tensor, b, name='bias_add') - - self.outputs = tf.reshape( - tensor=self._apply_activation(_tensor), - shape=[tf.shape(input=self.inputs)[0], input_h, input_w, shape[-1]] - ) - - self._add_layers(self.outputs) - - if b_init: - self._add_params([W, b]) - else: - self._add_params(W) - - @private_method - def _to_bc_h_w(self, x, x_shape): - """(b, h, w, c) -> (b*c, h, w)""" - x = tf.transpose(a=x, perm=[0, 3, 1, 2]) - x = tf.reshape(x, (-1, x_shape[1], x_shape[2])) - return x - - @private_method - def _to_b_h_w_n_c(self, x, x_shape): - """(b*c, h, w, n) -> (b, h, w, n, c)""" - x = tf.reshape(x, (-1, x_shape[4], x_shape[1], x_shape[2], x_shape[3])) - x = tf.transpose(a=x, perm=[0, 2, 3, 4, 1]) - return x - - @private_method - def tf_flatten(self, a): - """Flatten tensor""" - return tf.reshape(a, [-1]) - - @private_method - def _get_vals_by_coords(self, inputs, coords, idx, out_shape): - indices = tf.stack( - [idx, self.tf_flatten(coords[:, :, :, :, 0]), - self.tf_flatten(coords[:, :, :, :, 1])], axis=-1 - ) - vals = tf.gather_nd(inputs, indices) - vals = tf.reshape(vals, out_shape) - return vals - - @private_method - def _tf_repeat(self, a, repeats): - """Tensorflow version of np.repeat for 1D""" - # https://github.com/tensorflow/tensorflow/issues/8521 - - if len(a.get_shape()) != 1: - raise AssertionError("This is not a 1D Tensor") - - a = tf.expand_dims(a, -1) - a = tf.tile(a, [1, repeats]) - a = self.tf_flatten(a) - return a - - @private_method - def _tf_batch_map_coordinates(self, inputs, coords): - """Batch version of tf_map_coordinates - - Only supports 2D feature maps - - Parameters - ---------- - inputs : ``tf.Tensor`` - shape = (b*c, h, w) - coords : ``tf.Tensor`` - shape = (b*c, h, w, n, 2) - - Returns - ------- - ``tf.Tensor`` - A Tensor with the shape as (b*c, h, w, n) - - """ - inputs_shape = inputs.get_shape() - coords_shape = coords.get_shape() - batch_channel = tf.shape(input=inputs)[0] - input_h = int(inputs_shape[1]) - input_w = int(inputs_shape[2]) - kernel_n = int(coords_shape[3]) - n_coords = input_h * input_w * kernel_n - - coords_lt = tf.cast(tf.floor(coords), 'int32') - coords_rb = tf.cast(tf.math.ceil(coords), 'int32') - coords_lb = tf.stack([coords_lt[:, :, :, :, 0], coords_rb[:, :, :, :, 1]], axis=-1) - coords_rt = tf.stack([coords_rb[:, :, :, :, 0], coords_lt[:, :, :, :, 1]], axis=-1) - - idx = self._tf_repeat(tf.range(batch_channel), n_coords) - - vals_lt = self._get_vals_by_coords(inputs, coords_lt, idx, (batch_channel, input_h, input_w, kernel_n)) - vals_rb = self._get_vals_by_coords(inputs, coords_rb, idx, (batch_channel, input_h, input_w, kernel_n)) - vals_lb = self._get_vals_by_coords(inputs, coords_lb, idx, (batch_channel, input_h, input_w, kernel_n)) - vals_rt = self._get_vals_by_coords(inputs, coords_rt, idx, (batch_channel, input_h, input_w, kernel_n)) - - coords_offset_lt = coords - tf.cast(coords_lt, 'float32') - - vals_t = vals_lt + (vals_rt - vals_lt) * coords_offset_lt[:, :, :, :, 0] - vals_b = vals_lb + (vals_rb - vals_lb) * coords_offset_lt[:, :, :, :, 0] - mapped_vals = vals_t + (vals_b - vals_t) * coords_offset_lt[:, :, :, :, 1] - - return mapped_vals - - @private_method - def _tf_batch_map_offsets(self, inputs, offsets, grid_offset): - """Batch map offsets into input - - Parameters - ------------ - inputs : ``tf.Tensor`` - shape = (b, h, w, c) - offsets: ``tf.Tensor`` - shape = (b, h, w, 2*n) - grid_offset: `tf.Tensor`` - Offset grids shape = (h, w, n, 2) - - Returns - ------- - ``tf.Tensor`` - A Tensor with the shape as (b, h, w, c) - - """ - inputs_shape = inputs.get_shape() - batch_size = tf.shape(input=inputs)[0] - kernel_n = int(int(offsets.get_shape()[3]) / 2) - input_h = inputs_shape[1] - input_w = inputs_shape[2] - channel = inputs_shape[3] - - # inputs (b, h, w, c) --> (b*c, h, w) - inputs = self._to_bc_h_w(inputs, inputs_shape) - - # offsets (b, h, w, 2*n) --> (b, h, w, n, 2) - offsets = tf.reshape(offsets, (batch_size, input_h, input_w, kernel_n, 2)) - # offsets (b, h, w, n, 2) --> (b*c, h, w, n, 2) - # offsets = tf.tile(offsets, [channel, 1, 1, 1, 1]) - - coords = tf.expand_dims(grid_offset, 0) # grid_offset --> (1, h, w, n, 2) - coords = tf.tile(coords, [batch_size, 1, 1, 1, 1]) + offsets # grid_offset --> (b, h, w, n, 2) - - # clip out of bound - coords = tf.stack( - [ - tf.clip_by_value(coords[:, :, :, :, 0], 0.0, tf.cast(input_h - 1, 'float32')), - tf.clip_by_value(coords[:, :, :, :, 1], 0.0, tf.cast(input_w - 1, 'float32')) - ], axis=-1 - ) - coords = tf.tile(coords, [channel, 1, 1, 1, 1]) - - mapped_vals = self._tf_batch_map_coordinates(inputs, coords) - # (b*c, h, w, n) --> (b, h, w, n, c) - mapped_vals = self._to_b_h_w_n_c(mapped_vals, [batch_size, input_h, input_w, kernel_n, channel]) - - return mapped_vals diff --git a/tensorlayer/layers/convolution/depthwise_conv.py b/tensorlayer/layers/convolution/depthwise_conv.py deleted file mode 100644 index 6a9180d..0000000 --- a/tensorlayer/layers/convolution/depthwise_conv.py +++ /dev/null @@ -1,148 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf - -from tensorlayer.layers.core import Layer -# from tensorlayer.layers.core import LayersConfig - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -__all__ = [ - 'DepthwiseConv2d', -] - - -class DepthwiseConv2d(Layer): - """Separable/Depthwise Convolutional 2D layer, see `tf.nn.depthwise_conv2d `__. - - Input: - 4-D Tensor (batch, height, width, in_channels). - Output: - 4-D Tensor (batch, new height, new width, in_channels * depth_multiplier). - - Parameters - ------------ - filter_size : tuple of int - The filter size (height, width). - stride : tuple of int - The stride step (height, width). - act : activation function - The activation function of this layer. - padding : str - The padding algorithm type: "SAME" or "VALID". - dilation_rate: tuple of 2 int - The dilation rate in which we sample input values across the height and width dimensions in atrous convolution. If it is greater than 1, then all values of strides must be 1. - depth_multiplier : int - The number of channels to expand to. - W_init : initializer - The initializer for the weight matrix. - b_init : initializer or None - The initializer for the bias vector. If None, skip bias. - W_init_args : dictionary - The arguments for the weight matrix initializer. - b_init_args : dictionary - The arguments for the bias vector initializer. - name : str - A unique layer name. - - Examples - --------- - >>> net = InputLayer(x, name='input') - >>> net = Conv2d(net, 32, (3, 3), (2, 2), b_init=None, name='cin') - >>> net = BatchNormLayer(net, act=tf.nn.relu, is_train=is_train, name='bnin') - ... - >>> net = DepthwiseConv2d(net, (3, 3), (1, 1), b_init=None, name='cdw1') - >>> net = BatchNormLayer(net, act=tf.nn.relu, is_train=is_train, name='bn11') - >>> net = Conv2d(net, 64, (1, 1), (1, 1), b_init=None, name='c1') - >>> net = BatchNormLayer(net, act=tf.nn.relu, is_train=is_train, name='bn12') - ... - >>> net = DepthwiseConv2d(net, (3, 3), (2, 2), b_init=None, name='cdw2') - >>> net = BatchNormLayer(net, act=tf.nn.relu, is_train=is_train, name='bn21') - >>> net = Conv2d(net, 128, (1, 1), (1, 1), b_init=None, name='c2') - >>> net = BatchNormLayer(net, act=tf.nn.relu, is_train=is_train, name='bn22') - - References - ----------- - - tflearn's `grouped_conv_2d `__ - - keras's `separableconv2d `__ - - """ - - # https://zhuanlan.zhihu.com/p/31551004 https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/CNNs/MobileNet.py - def __init__( - self, - filter_size=(3, 3), - strides=(1, 1), - act=None, - padding='SAME', - dilation_rate=(1, 1), - depth_multiplier=1, - W_init=tf.compat.v1.initializers.truncated_normal(stddev=0.02), - b_init=tf.compat.v1.initializers.constant(value=0.0), - W_init_args=None, - b_init_args=None, - name=None, #'depthwise_conv2d', - ): - # super(DepthwiseConv2d, self - # ).__init__(prev_layer=prev_layer, act=act, W_init_args=W_init_args, b_init_args=b_init_args, name=name) - super().__init__(name) - self.filter_size = filter_size - self.stride = stride - self.act = act - self.padding = padding - self.dilation_rate = dilation_rate - self.depth_multiplier = depth_multiplier - self.W_init = W_init - self.b_init = b_init - self.W_init_args = W_init_args - self.b_init_args = b_init_args - - logging.info( - "DepthwiseConv2d %s: filter_size: %s strides: %s pad: %s act: %s" % ( - self.name, str(filter_size), str(strides), padding, - self.act.__name__ if self.act is not None else 'No Activation' - ) - ) - - def build(self, inputs_shape): - self.pre_channel = inputs_shape[-1] - if self.pre_channel is None: # if pre_channel is ?, it happens when using Spatial Transformer Net - self.pre_channel = 1 - logging.info("[warnings] unknown input channels, set to 1") - - self.filter_size = [self.filter_size[0], self.filter_size[1], self.pre_channel, self.depth_multiplier] - - if len(self.strides) == 2: - self.strides = [1, self.strides[0], self.strides[1], 1] - - if len(self.strides) != 4: - raise AssertionError("len(strides) should be 4.") - - # self.W = tf.compat.v1.get_variable( - # name=self.name + '\W_depthwise2d', shape=self.filter_size, initializer=self.W_init, - # dtype=LayersConfig.tf_dtype, **self.W_init_args - # ) # [filter_height, filter_width, in_channels, depth_multiplier] - self.W = self._get_weights("filters", shape=self.filter_size, init=self.W_init, init_args=self.W_init_args) - if self.b_init: - self.b = self._get_weights( - "biases", shape=(self.pre_channel * self.depth_multiplier), init=self.b_init, init_args=self.b_init_args - ) - # self.b = tf.compat.v1.get_variable( - # name=self.name + '\b_depthwise2d', shape=(self.pre_channel * self.depth_multiplier), - # initializer=self.b_init, dtype=LayersConfig.tf_dtype, **self.b_init_args - # ) - # self.add_weights([self.W, self.b]) - # else: - # self.add_weights(self.W) - - def forward(self, inputs): - outputs = tf.nn.depthwise_conv2d( - input=inputs, filter=self.W, strides=self.strides, padding=self.padding, dilations=self.dilation_rate - ) - if self.b_init: - outputs = tf.nn.bias_add(outputs, self.b, name='bias_add') - outputs = self.act(outputs) - return outputs diff --git a/tensorlayer/layers/convolution/dorefa_conv.py b/tensorlayer/layers/convolution/dorefa_conv.py deleted file mode 100644 index bb8d077..0000000 --- a/tensorlayer/layers/convolution/dorefa_conv.py +++ /dev/null @@ -1,152 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf - -from tensorlayer.layers.core import Layer -# from tensorlayer.layers.core import LayersConfig - -from tensorlayer.layers.utils import cabs -from tensorlayer.layers.utils import quantize_active -from tensorlayer.layers.utils import quantize_weight - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -__all__ = ['DorefaConv2d'] - - -class DorefaConv2d(Layer): - """The :class:`DorefaConv2d` class is a 2D quantized convolutional layer, which weights are 'bitW' bits and the output of the previous layer - are 'bitA' bits while inferencing. - - Note that, the bias vector would not be binarized. - - Parameters - ---------- - bitW : int - The bits of this layer's parameter - bitA : int - The bits of the output of previous layer - n_filter : int - The number of filters. - filter_size : tuple of int - The filter size (height, width). - strides : tuple of int - The sliding window strides of corresponding input dimensions. - It must be in the same order as the ``shape`` parameter. - act : activation function - The activation function of this layer. - padding : str - The padding algorithm type: "SAME" or "VALID". - use_gemm : boolean - If True, use gemm instead of ``tf.matmul`` for inferencing. (TODO). - W_init : initializer - The initializer for the the weight matrix. - b_init : initializer or None - The initializer for the the bias vector. If None, skip biases. - W_init_args : dictionary - The arguments for the weight matrix initializer. - b_init_args : dictionary - The arguments for the bias vector initializer. - use_cudnn_on_gpu : bool - Default is False. - data_format : str - "NHWC" or "NCHW", default is "NHWC". - name : None or str - A unique layer name. - - """ - - def __init__( - self, - bitW=1, - bitA=3, - n_filter=32, - filter_size=(3, 3), - strides=(1, 1), - act=None, - padding='SAME', - data_format=None, - use_gemm=False, - W_init=tf.compat.v1.initializers.truncated_normal(stddev=0.02), - b_init=tf.compat.v1.initializers.constant(value=0.0), - W_init_args=None, - b_init_args=None, - use_cudnn_on_gpu=None, - name=None, #'dorefa_cnn2d', - ): - # super(DorefaConv2d, self - # ).__init__(prev_layer=prev_layer, act=act, W_init_args=W_init_args, b_init_args=b_init_args, name=name) - super().__init__(name) - self.bitW = bitW - self.bitA = bitA - self.n_filter = n_filter - self.filter_size = filter_size - self.strides = strides - self.act = act - self.padding = padding - self.data_format = data_format - self.use_gemm = use_gemm - self.W_init = W_init - self.b_init = b_init - self.W_init_args = W_init_args - self.b_init_args = b_init_args - self.use_cudnn_on_gpu = use_cudnn_on_gpu - logging.info( - "DorefaConv2d %s: n_filter: %d filter_size: %s strides: %s pad: %s act: %s" % ( - self.name, n_filter, str(filter_size), str(strides), padding, - self.act.__name__ if self.act is not None else 'No Activation' - ) - ) - - def build(self, inputs_shape): - - if self.use_gemm: - raise Exception("TODO. The current version use tf.matmul for inferencing.") - - if len(self.strides) != 2: - raise ValueError("len(strides) should be 2.") - - try: - self.pre_channel = inputs_shape[-1] - except Exception: # if pre_channel is ?, it happens when using Spatial Transformer Net - self.pre_channel = 1 - logging.warning("[warnings] unknow input channels, set to 1") - - self.shape = (self.filter_size[0], self.filter_size[1], self.pre_channel, self.n_filter) - self.strides = (1, self.strides[0], self.strides[1], 1) - - # self.W = tf.compat.v1.get_variable( - # name=self.name + '\kernel', shape=self.shape, initializer=self.W_init, dtype=LayersConfig.tf_dtype, - # **self.W_init_args - # ) - self.W = self._get_weights("filters", shape=self.shape, init=self.W_init, init_args=self.W_init_args) - if self.b_init: - self.b = self._get_weights("biases", shape=(self.n_filter), init=self.b_init, init_args=self.b_init_args) - # self.b = tf.compat.v1.get_variable( - # name=self.name + '\bias', shape=(self.shape[-1]), initializer=self.b_init, dtype=LayersConfig.tf_dtype, - # **self.b_init_args - # ) - # self.add_weights([self.W, self.b]) - # else: - # self.add_weights(self.W) - - def forward(self, inputs): - - inputs = quantize_active(cabs(inputs), self.bitA) # Do not remove - - W_ = quantize_weight(self.W, self.bitW) - - outputs = tf.nn.conv2d( - inputs, W_, strides=self.strides, padding=self.padding, use_cudnn_on_gpu=self.use_cudnn_on_gpu, - data_format=self.data_format - ) - - if self.b_init: - outputs = tf.nn.bias_add(outputs, self.b, name='bias_add') - if self.act: - outputs = self.act(outputs) - - return outputs diff --git a/tensorlayer/layers/convolution/expert_conv.py b/tensorlayer/layers/convolution/expert_conv.py deleted file mode 100644 index 0199056..0000000 --- a/tensorlayer/layers/convolution/expert_conv.py +++ /dev/null @@ -1,378 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf - -from tensorlayer.layers.core import Layer -# from tensorlayer.layers.core import LayersConfig - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -__all__ = [ - 'Conv1dLayer', - 'Conv2dLayer', - 'Conv3dLayer', -] - - -class Conv1dLayer(Layer): - """ - The :class:`Conv1dLayer` class is a 1D CNN layer, see `tf.nn.convolution `__. - - Parameters - ---------- - act : activation function - The activation function of this layer. - shape : tuple of int - The shape of the filters: (filter_length, in_channels, out_channels). - stride : int - The number of entries by which the filter is moved right at a step. - padding : str - The padding algorithm type: "SAME" or "VALID". - data_format : str - Default is 'NWC' as it is a 1D CNN. - dilation_rate : int - Filter up-sampling/input down-sampling rate. - W_init : initializer - The initializer for the weight matrix. - b_init : initializer or None - The initializer for the bias vector. If None, skip biases. - W_init_args : dictionary - The arguments for the weight matrix initializer. - b_init_args : dictionary - The arguments for the bias vector initializer. - name : None or str - A unique layer name - - """ - - def __init__( - self, - act=None, - shape=(5, 1, 5), - stride=1, - padding='SAME', - data_format='NWC', - dilation_rate=1, - W_init=tf.compat.v1.initializers.truncated_normal(stddev=0.02), - b_init=tf.compat.v1.initializers.constant(value=0.0), - W_init_args=None, - b_init_args=None, - name=None, #'cnn1d', - ): - # super(Conv1dLayer, self - # ).__init__(prev_layer=prev_layer, act=act, W_init_args=W_init_args, b_init_args=b_init_args, name=name) - super().__init__(name) - self.act = act, - self.shape = shape - self.stride = stride - self.dilation_rate = dilation_rate - self.padding = padding - self.data_format = data_format - self.W_init = W_init - self.b_init = b_init - self.W_init_args = W_init_args - self.b_init_args = b_init_args - - logging.info( - "Conv1dLayer %s: shape: %s stride: %s pad: %s act: %s" % ( - self.name, str(shape), str(stride), padding, - self.act.__name__ if self.act is not None else 'No Activation' - ) - ) - - def build(self, inputs_shape): - # self.W = tf.compat.v1.get_variable( - # name=self.name + '\W_conv1d', shape=self.shape, initializer=self.W_init, dtype=LayersConfig.tf_dtype, - # **self.W_init_args - # ) - self.W = self._get_weights("filters", shape=self.shape, init=self.W_init, init_args=self.W_init_args) - if self.b_init: - self.b = self._get_weights( - "biases", - shape=(self.n_filter), #self.shape[-1]), - init=self.b_init, - init_args=self.b_init_args - ) - # self.b = tf.compat.v1.get_variable( - # name=self.name + '\b_conv1d', shape=(self.shape[-1]), initializer=self.b_init, - # dtype=LayersConfig.tf_dtype, **self.b_init_args - # ) - # self.add_weights([self.W, self.b]) - # else: - # self.add_weights(self.W) - - def forward(self, inputs): - - outputs = tf.nn.convolution( - input=inputs, - filters=self.W, - strides=(self.stride, ), - padding=self.padding, - dilations=(self.dilation_rate, ), - data_format=self.data_format, - name=self.name, - ) - - if self.b_init: - outputs = tf.nn.bias_add(outputs, self.b, name='bias_add') - - outputs = self.act(outputs) - return outputs - - -class Conv2dLayer(Layer): - """ - The :class:`Conv2dLayer` class is a 2D CNN layer, see `tf.nn.conv2d `__. - - Parameters - ---------- - act : activation function - The activation function of this layer. - shape : tuple of int - The shape of the filters: (filter_height, filter_width, in_channels, out_channels). - strides : tuple of int - The sliding window strides of corresponding input dimensions. - It must be in the same order as the ``shape`` parameter. - padding : str - The padding algorithm type: "SAME" or "VALID". - data_format : str - "NHWC" or "NCHW", default is "NHWC". - dilation_rate : int - Filter up-sampling/input down-sampling rate. - W_init : initializer - The initializer for the weight matrix. - b_init : initializer or None - The initializer for the bias vector. If None, skip biases. - W_init_args : dictionary - The arguments for the weight matrix initializer. - b_init_args : dictionary - The arguments for the bias vector initializer. - use_cudnn_on_gpu : bool - Default is False. - name : None or str - A unique layer name. - - Notes - ----- - - shape = [h, w, the number of output channel of previous layer, the number of output channels] - - the number of output channel of a layer is its last dimension. - - Examples - -------- - With TensorLayer - - >>> x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) - >>> net = tl.layers.Input(x, name='input_layer') - >>> net = tl.layers.Conv2dLayer(net, - ... act = tf.nn.relu, - ... shape = (5, 5, 1, 32), # 32 features for each 5x5 patch - ... strides = (1, 1, 1, 1), - ... padding='SAME', - ... W_init=tf.truncated_normal_initializer(stddev=5e-2), - ... b_init = tf.constant_initializer(value=0.0), - ... name ='cnn_layer1') # output: (?, 28, 28, 32) - >>> net = tl.layers.Pool(net, - ... ksize=(1, 2, 2, 1), - ... strides=(1, 2, 2, 1), - ... padding='SAME', - ... pool = tf.nn.max_pool, - ... name ='pool_layer1',) # output: (?, 14, 14, 32) - - Without TensorLayer, you can implement 2D convolution as follow. - - >>> W = tf.Variable(W_init(shape=[5, 5, 1, 32], ), name='W_conv') - >>> b = tf.Variable(b_init(shape=[32], ), name='b_conv') - >>> outputs = tf.nn.relu( tf.nn.conv2d(inputs, W, - ... strides=[1, 1, 1, 1], - ... padding='SAME') + b ) - - """ - - def __init__( - self, - act=None, - shape=(5, 5, 1, 100), - strides=(1, 1, 1, 1), - padding='SAME', - data_format=None, - dilations=[1, 1, 1, 1], - W_init=tf.compat.v1.initializers.truncated_normal(stddev=0.02), - b_init=tf.compat.v1.initializers.constant(value=0.0), - W_init_args=None, - b_init_args=None, - use_cudnn_on_gpu=None, - name=None, #'cnn_layer', - ): - # super(Conv2dLayer, self - # ).__init__(prev_layer=prev_layer, act=act, W_init_args=W_init_args, b_init_args=b_init_args, name=name) - super().__init__(name) - self.act = act, - self.shape = shape - self.stride = stride - self.dilation_rate = dilation_rate - self.padding = padding - self.data_format = data_format - self.W_init = W_init - self.b_init = b_init - self.W_init_args = W_init_args - self.b_init_args = b_init_args - - logging.info( - "Conv2dLayer %s: shape: %s strides: %s pad: %s act: %s" % ( - self.name, str(shape), str(strides), padding, - self.act.__name__ if self.act is not None else 'No Activation' - ) - ) - - def build(self, inputs): - self.W = self._get_weights("filters", shape=self.shape, init=self.W_init, init_args=self.W_init_args) - if self.b_init: - self.b = self._get_weights("biases", shape=(self.n_filter), init=self.b_init, init_args=self.b_init_args) - - # self.W = tf.compat.v1.get_variable( - # name=self.name + '\W_conv2d', shape=self.shape, initializer=self.W_init, dtype=LayersConfig.tf_dtype, - # **self.W_init_args - # ) - # if self.b_init: - # self.b = tf.compat.v1.get_variable( - # name=self.name + '\b_conv2d', shape=(self.shape[-1]), initializer=self.b_init, - # dtype=LayersConfig.tf_dtype, **self.b_init_args - # ) - # self.add_weights([self.W, self.b]) - # else: - # self.add_weights(self.W) - - def forward(self, inputs): - outputs = tf.nn.conv2d( - inputs, - self.W, - strides=self.strides, - padding=self.padding, - use_cudnn_on_gpu=self.use_cudnn_on_gpu, - data_format=self.data_format, - dilations=self.dilations, - name=self.name, - ) - - if self.b_init: - outputs = tf.nn.bias_add(outputs, self.b, name='bias_add') - - if self.act: - outputs = self.act(outputs) - return outputs - - -class Conv3dLayer(Layer): - """ - The :class:`Conv3dLayer` class is a 3D CNN layer, see `tf.nn.conv3d `__. - - Parameters - ---------- - act : activation function - The activation function of this layer. - shape : tuple of int - Shape of the filters: (filter_depth, filter_height, filter_width, in_channels, out_channels). - strides : tuple of int - The sliding window strides for corresponding input dimensions. - Must be in the same order as the shape dimension. - padding : str - The padding algorithm type: "SAME" or "VALID". - data_format : str - "NHWC" or "NCDHW", default is "NDHWC". - dilation_rate : int - Filter up-sampling/input down-sampling rate. - W_init : initializer - The initializer for the weight matrix. - b_init : initializer or None - The initializer for the bias vector. If None, skip biases. - W_init_args : dictionary - The arguments for the weight matrix initializer. - b_init_args : dictionary - The arguments for the bias vector initializer. - name : None or str - A unique layer name. - - Examples - --------- - >>> x = tf.placeholder(tf.float32, (None, 100, 100, 100, 3)) - >>> n = tl.layers.Input(x, name='in3') - >>> n = tl.layers.Conv3dLayer(n, shape=(2, 2, 2, 3, 32), strides=(1, 2, 2, 2, 1)) - [None, 50, 50, 50, 32] - """ - - def __init__( - self, - act=None, - shape=(2, 2, 2, 3, 32), - strides=(1, 2, 2, 2, 1), - padding='SAME', - data_format='NDHWC', - dilations=[1, 1, 1, 1, 1], - W_init=tf.compat.v1.initializers.truncated_normal(stddev=0.02), - b_init=tf.compat.v1.initializers.constant(value=0.0), - W_init_args=None, - b_init_args=None, - name=None, #'cnn3d_layer', - ): - # super(Conv3dLayer, self - # ).__init__(prev_layer=prev_layer, act=act, W_init_args=W_init_args, b_init_args=b_init_args, name=name) - super().__init__(name) - self.act = act, - self.shape = shape - self.stride = stride - self.padding = padding - self.data_format = data_format - self.dilation_rate = dilation_rate - self.W_init = W_init - self.b_init = b_init - self.W_init_args = W_init_args - self.b_init_args = b_init_args - - logging.info( - "Conv3dLayer %s: shape: %s strides: %s pad: %s act: %s" % ( - self.name, str(shape), str(strides), padding, - self.act.__name__ if self.act is not None else 'No Activation' - ) - ) - - def build(self, inputs): - - self.W = self._get_weights("filters", shape=self.shape, init=self.W_init, init_args=self.W_init_args) - if self.b_init: - self.b = self._get_weights("biases", shape=(self.n_filter), init=self.b_init, init_args=self.b_init_args) - - # self.W = tf.compat.v1.get_variable( - # name=self.name + '\W_conv3d', shape=self.shape, initializer=self.W_init, dtype=LayersConfig.tf_dtype, - # **self.W_init_args - # ) - # - # if self.b_init: - # self.b = tf.compat.v1.get_variable( - # name=self.name + '\b_conv3d', shape=(self.shape[-1]), initializer=self.b_init, - # dtype=LayersConfig.tf_dtype, **self.b_init_args - # ) - # self.add_weights([self.W, self.b]) - # else: - # self.add_weights(self.W) - - def forward(self, inputs): - outputs = tf.nn.conv3d( - input=inputs, - filter=self.W, - strides=self.strides, - padding=self.padding, - # use_cudnn_on_gpu=self.use_cudnn_on_gpu, #True, - data_format=self.data_format, #'NDHWC', - dilations=self.dilation_rate, #[1, 1, 1, 1, 1], - name=self.name, - ) - - if self.b_init: - outputs = tf.nn.bias_add(outputs, self.b, name='bias_add') - - if self.act: - outputs = self.act(outputs) - return outputs diff --git a/tensorlayer/layers/convolution/expert_deconv.py b/tensorlayer/layers/convolution/expert_deconv.py deleted file mode 100644 index 078abde..0000000 --- a/tensorlayer/layers/convolution/expert_deconv.py +++ /dev/null @@ -1,237 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf - -from tensorlayer.layers.core import Layer -# from tensorlayer.layers.core import LayersConfig - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -__all__ = [ - 'DeConv2dLayer', - 'DeConv3dLayer', -] - - -class DeConv2dLayer(Layer): - """A de-convolution 2D layer. - - See `tf.nn.conv2d_transpose `__. - - Parameters - ---------- - act : activation function or None - The activation function of this layer. - shape : tuple of int - Shape of the filters: (height, width, output_channels, in_channels). - The filter's ``in_channels`` dimension must match that of value. - outputs_shape : tuple of int - Output shape of the deconvolution, - strides : tuple of int - The sliding window strides for corresponding input dimensions. - padding : str - The padding algorithm type: "SAME" or "VALID". - W_init : initializer - The initializer for the weight matrix. - b_init : initializer or None - The initializer for the bias vector. If None, skip biases. - W_init_args : dictionary - The arguments for initializing the weight matrix. - b_init_args : dictionary - The arguments for initializing the bias vector. - name : None or str - A unique layer name. - - Notes - ----- - - We recommend to use `DeConv2d` with TensorFlow version higher than 1.3. - - shape = [h, w, the number of output channels of this layer, the number of output channel of the previous layer]. - - outputs_shape = [batch_size, any, any, the number of output channels of this layer]. - - the number of output channel of a layer is its last dimension. - - Examples - -------- - A part of the generator in DCGAN example - - >>> batch_size = 64 - >>> inputs = tf.placeholder(tf.float32, [batch_size, 100], name='z_noise') - >>> net_in = tl.layers.InputLayer(inputs, name='g/in') - >>> net_h0 = tl.layers.DenseLayer(net_in, n_units = 8192, - ... W_init = tf.random_normal_initializer(stddev=0.02), - ... act = None, name='g/h0/lin') - >>> print(net_h0.outputs._shape) - (64, 8192) - >>> net_h0 = tl.layers.ReshapeLayer(net_h0, shape=(-1, 4, 4, 512), name='g/h0/reshape') - >>> net_h0 = tl.layers.BatchNormLayer(net_h0, act=tf.nn.relu, is_train=is_train, name='g/h0/batch_norm') - >>> print(net_h0.outputs._shape) - (64, 4, 4, 512) - >>> net_h1 = tl.layers.DeConv2dLayer(net_h0, - ... shape=(5, 5, 256, 512), - ... outputs_shape=(batch_size, 8, 8, 256), - ... strides=(1, 2, 2, 1), - ... act=None, name='g/h1/decon2d') - >>> net_h1 = tl.layers.BatchNormLayer(net_h1, act=tf.nn.relu, is_train=is_train, name='g/h1/batch_norm') - >>> print(net_h1.outputs._shape) - (64, 8, 8, 256) - - U-Net - - >>> .... - >>> conv10 = tl.layers.Conv2dLayer(conv9, act=tf.nn.relu, - ... shape=(3,3,1024,1024), strides=(1,1,1,1), padding='SAME', - ... W_init=w_init, b_init=b_init, name='conv10') - >>> print(conv10.outputs) - (batch_size, 32, 32, 1024) - >>> deconv1 = tl.layers.DeConv2dLayer(conv10, act=tf.nn.relu, - ... shape=(3,3,512,1024), strides=(1,2,2,1), outputs_shape=(batch_size,64,64,512), - ... padding='SAME', W_init=w_init, b_init=b_init, name='devcon1_1') - - """ - - def __init__( - self, - act=None, - shape=(3, 3, 128, 256), - outputs_shape=(1, 256, 256, 128), - strides=(1, 2, 2, 1), - padding='SAME', - W_init=tf.compat.v1.initializers.truncated_normal(stddev=0.02), - b_init=tf.compat.v1.initializers.constant(value=0.0), - W_init_args=None, - b_init_args=None, - name=None, #'decnn2d_layer', - ): - # super(DeConv2dLayer, self - # ).__init__(prev_layer=prev_layer, act=act, W_init_args=W_init_args, b_init_args=b_init_args, name=name) - super().__init__(name) - self.act = act - self.shape = shape - self.outputs_shape = outputs_shape - self.strides = strides - self.padding = padding - self.W_init = W_init - self.b_init = b_init - self.W_init_args = W_init_args - self.b_init_args = b_init_args - logging.info( - "DeConv2dLayer %s: shape: %s out_shape: %s strides: %s pad: %s act: %s" % ( - self.name, str(shape), str(outputs_shape), str(strides), padding, - self.act.__name__ if self.act is not None else 'No Activation' - ) - ) - - def build(self, inputs): - # self.W = tf.compat.v1.get_variable( - # name=self.name + '\kernel', shape=self.shape, initializer=self.W_init, dtype=LayersConfig.tf_dtype, - # **self.W_init_args - # ) - self.W = self._get_weights("filters", shape=self.shape, init=self.W_init, init_args=self.W_init_args) - if self.b_init: - self.b = self._get_weights("biases", shape=(self.shape[-2]), init=self.b_init, init_args=self.b_init_args) - # if self.b_init: - # self.b = tf.compat.v1.get_variable( - # name=self.name + '\bias', shape=(self.shape[-2]), initializer=self.b_init, dtype=LayersConfig.tf_dtype, - # **self.b_init_args - # ) - # self.add_weights([self.W, self.b]) - # else: - # self.add_weights(self.W) - - def forward(self, inputs): - outputs = tf.nn.conv2d_transpose( - inputs, self.W, outputs_shape=self.outputs_shape, strides=self.strides, padding=self.padding - ) - if self.b_init: - outputs = tf.nn.bias_add(outputs, self.b, name='bias_add') - if self.act: - outputs = self.act(outputs) - return outputs - - -class DeConv3dLayer(Layer): - """The :class:`DeConv3dLayer` class is deconvolutional 3D layer, see `tf.nn.conv3d_transpose `__. - - Parameters - ---------- - act : activation function or None - The activation function of this layer. - shape : tuple of int - The shape of the filters: (depth, height, width, output_channels, in_channels). - The filter's in_channels dimension must match that of value. - outputs_shape : tuple of int - The output shape of the deconvolution. - strides : tuple of int - The sliding window strides for corresponding input dimensions. - padding : str - The padding algorithm type: "SAME" or "VALID". - W_init : initializer - The initializer for the weight matrix. - b_init : initializer or None - The initializer for the bias vector. If None, skip biases. - W_init_args : dictionary - The arguments for the weight matrix initializer. - b_init_args : dictionary - The arguments for the bias vector initializer. - name : None or str - A unique layer name. - - """ - - def __init__( - self, - act=None, - shape=(2, 2, 2, 128, 256), - outputs_shape=(1, 12, 32, 32, 128), - strides=(1, 2, 2, 2, 1), - padding='SAME', - W_init=tf.compat.v1.initializers.truncated_normal(stddev=0.02), - b_init=tf.compat.v1.initializers.constant(value=0.0), - W_init_args=None, - b_init_args=None, - name=None, #'decnn3d_layer', - ): - # super(DeConv3dLayer, self - # ).__init__(prev_layer=prev_layer, act=act, W_init_args=W_init_args, b_init_args=b_init_args, name=name) - super().__init__(name) - self.act = act - self.shape = shape - self.outputs_shape = outputs_shape - self.strides = strides - self.padding = padding - self.W_init = W_init - self.b_init = b_init - self.W_init_args = W_init_args - self.b_init_args = b_init_args - logging.info( - "DeConv3dLayer %s: shape: %s out_shape: %s strides: %s pad: %s act: %s" % ( - self.name, str(shape), str(outputs_shape), str(strides), padding, - self.act.__name__ if self.act is not None else 'No Activation' - ) - ) - - def build(self, inputs): - # self.W = tf.compat.v1.get_variable( - # name=self.name + '\kernel', shape=self.shape, initializer=self.W_init, dtype=LayersConfig.tf_dtype, - # **self.W_init_args - # ) - self.W = self._get_weights("filters", shape=self.shape, init=self.W_init, init_args=self.W_init_args) - if self.b_init: - self.b = self._get_weights("biases", shape=(self.shape[-2]), init=self.b_init, init_args=self.b_init_args) - # if self.b_init: - # self.b = tf.compat.v1.get_variable( - # name=self.name + '\kernel', shape=(self.shape[-2]), initializer=self.b_init, - # dtype=LayersConfig.tf_dtype, **self.b_init_args - # ) - - def forward(self, inputs): - outputs = tf.nn.conv3d_transpose( - inputs, self.W, outputs_shape=self.outputs_shape, strides=self.strides, padding=self.padding - ) - if self.b_init: - outputs = tf.nn.bias_add(outputs, self.b, name='bias_add') - if self.act: - outputs = self.act(outputs) - return outputs diff --git a/tensorlayer/layers/convolution/group_conv.py b/tensorlayer/layers/convolution/group_conv.py deleted file mode 100644 index 393ab97..0000000 --- a/tensorlayer/layers/convolution/group_conv.py +++ /dev/null @@ -1,120 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf - -from tensorlayer.layers.core import Layer -# from tensorlayer.layers.core import LayersConfig - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -__all__ = [ - 'GroupConv2d', -] - - -class GroupConv2d(Layer): - """The :class:`GroupConv2d` class is 2D grouped convolution, see `here `__. - - Parameters - -------------- - n_filter : int - The number of filters. - filter_size : int - The filter size. - stride : int - The stride step. - n_group : int - The number of groups. - act : activation function - The activation function of this layer. - padding : str - The padding algorithm type: "SAME" or "VALID". - W_init : initializer - The initializer for the weight matrix. - b_init : initializer or None - The initializer for the bias vector. If None, skip biases. - W_init_args : dictionary - The arguments for the weight matrix initializer. - b_init_args : dictionary - The arguments for the bias vector initializer. - name : None or str - A unique layer name. - """ - - def __init__( - self, - n_filter=32, - filter_size=(3, 3), - strides=(2, 2), - n_group=2, - act=None, - padding='SAME', - W_init=tf.compat.v1.initializers.truncated_normal(stddev=0.02), - b_init=tf.compat.v1.initializers.constant(value=0.0), - W_init_args=None, - b_init_args=None, - name=None, #'groupconv', - ): # Windaway - - # super(GroupConv2d, self - # ).__init__(prev_layer=prev_layer, act=act, W_init_args=W_init_args, b_init_args=b_init_args, name=name) - super().__init__(name) - self.n_filter = n_filter - self.filter_size = filter_size - self.strides = strides - self.n_group = n_group - self.act = act - self.padding = padding - self.W_init = W_init - self.b_init = b_init - self.W_init_args = W_init_args - self.b_init_args = b_init_args - logging.info( - "GroupConv2d %s: n_filter: %d size: %s strides: %s n_group: %d pad: %s act: %s" % ( - self.name, n_filter, str(filter_size), str(strides), n_group, padding, - self.act.__name__ if self.act is not None else 'No Activation' - ) - ) - - def build(self, inputs): - self.groupConv = lambda i, k: tf.nn.conv2d( - i, k, strides=[1, self.strides[0], self.strides[1], 1], padding=self.padding - ) - channels = int(inputs.get_shape()[-1]) - - # self.We = tf.compat.v1.get_variable( - # name=self.name + '\W', - # shape=[self.filter_size[0], self.filter_size[1], channels / self.n_group, self.n_filter], - # initializer=self.W_init, dtype=LayersConfig.tf_dtype, trainable=True, **self.W_init_args - # ) - self.We = self._get_weights( - "filters", shape=[self.filter_size[0], self.filter_size[1], channels / self.n_group, self.n_filter], - init=self.W_init, init_args=self.W_init_args - ) - if self.b_init: - self.b = self._get_weights("biases", shape=self.n_filter, init=self.b_init, init_args=self.b_init_args) - # if self.b_init: - # self.b = tf.compat.v1.get_variable( - # name=self.name + '\b', shape=self.n_filter, initializer=self.b_init, dtype=LayersConfig.tf_dtype, - # trainable=True, **self.b_init_args - # ) - # self.add_weights([self.We, self.b]) - # else: - # self.add_weights(self.We) - - def forward(self, inputs): - if self.n_group == 1: - outputs = self.groupConv(inputs, self.We) - else: - inputGroups = tf.split(axis=3, num_or_size_splits=self.n_group, value=self.inputs) - weightsGroups = tf.split(axis=3, num_or_size_splits=self.n_group, value=self.We) - convGroups = [groupConv(i, k) for i, k in zip(inputGroups, weightsGroups)] - outputs = tf.concat(axis=3, values=convGroups) - if self.b_init: - outputs = tf.nn.bias_add(outputs, self.b, name='bias_add') - if self.act: - outputs = self.act(outputs) - return outputs diff --git a/tensorlayer/layers/convolution/quan_conv.py b/tensorlayer/layers/convolution/quan_conv.py deleted file mode 100644 index c3a2313..0000000 --- a/tensorlayer/layers/convolution/quan_conv.py +++ /dev/null @@ -1,164 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf - -from tensorlayer.layers.core import Layer -# from tensorlayer.layers.core import LayersConfig - -from tensorlayer.layers.utils import quantize_active_overflow -from tensorlayer.layers.utils import quantize_weight_overflow - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -__all__ = ['QuanConv2d'] - - -class QuanConv2d(Layer): - """The :class:`QuanConv2d` class is a quantized convolutional layer without BN, which weights are 'bitW' bits and the output of the previous layer - are 'bitA' bits while inferencing. - Note that, the bias vector would not be binarized. - - Parameters - ---------- - n_filter : int - The number of filters. - filter_size : tuple of int - The filter size (height, width). - strides : tuple of int - The sliding window strides of corresponding input dimensions. - It must be in the same order as the ``shape`` parameter. - act : activation function - The activation function of this layer. - padding : str - The padding algorithm type: "SAME" or "VALID". - bitW : int - The bits of this layer's parameter - bitA : int - The bits of the output of previous layer - data_format : str - "NHWC" or "NCHW", default is "NHWC". - use_gemm : boolean - If True, use gemm instead of ``tf.matmul`` for inferencing. (TODO). - W_init : initializer - The initializer for the the weight matrix. - b_init : initializer or None - The initializer for the the bias vector. If None, skip biases. - W_init_args : dictionary - The arguments for the weight matrix initializer. - b_init_args : dictionary - The arguments for the bias vector initializer. - use_cudnn_on_gpu : bool - Default is False. - name : None or str - A unique layer name. - - Examples - --------- - >>> import tensorflow as tf - >>> import tensorlayer as tl - >>> x = tf.placeholder(tf.float32, [None, 256, 256, 3]) - >>> net = tl.layers.Input(x, name='input') - >>> net = tl.layers.QuanConv2d(net, 32, (5, 5), (1, 1), padding='SAME', act=tf.nn.relu, name='qcnn1') - >>> net = tl.layers.MaxPool2d(net, (2, 2), (2, 2), padding='SAME', name='pool1') - >>> net = tl.layers.BatchNormLayer(net, act=tl.act.htanh, is_train=True, name='bn1') - ... - >>> net = tl.layers.QuanConv2d(net, 64, (5, 5), (1, 1), padding='SAME', act=tf.nn.relu, name='qcnn2') - >>> net = tl.layers.MaxPool2d(net, (2, 2), (2, 2), padding='SAME', name='pool2') - >>> net = tl.layers.BatchNormLayer(net, act=tl.act.htanh, is_train=True, name='bn2') - - """ - - def __init__( - self, - n_filter=32, - filter_size=(3, 3), - strides=(1, 1), - act=None, - padding='SAME', - bitW=8, - bitA=8, - data_format=None, - use_gemm=False, - W_init=tf.compat.v1.initializers.truncated_normal(stddev=0.02), - b_init=tf.compat.v1.initializers.constant(value=0.0), - W_init_args=None, - b_init_args=None, - use_cudnn_on_gpu=None, - name=None, #'quan_cnn2d', - ): - # super(QuanConv2d, self - # ).__init__(prev_layer=prev_layer, act=act, W_init_args=W_init_args, b_init_args=b_init_args, name=name) - super().__init__(name) - self.n_filter = n_filter - self.filter_size = filter_size - self.strides = strides - self.act = act - self.padding = padding - self.bitW = bitW - self.bitA = bitA - self.data_format = data_format - self.use_gemm = use_gemm - self.W_init = W_init - self.b_init = b_init - self.W_init_args = W_init_args - self.b_init_args = b_init_args - self.use_cudnn_on_gpu = use_cudnn_on_gpu - - logging.info( - "QuanConv2d %s: n_filter: %d filter_size: %s strides: %s pad: %s act: %s" % ( - self.name, n_filter, str(filter_size), str(strides), padding, - self.act.__name__ if self.act is not None else 'No Activation' - ) - ) - - def build(self, inputs_shape): - - if self.use_gemm: - raise Exception("TODO. The current version use tf.matmul for inferencing.") - - if len(self.strides) != 2: - raise ValueError("len(strides) should be 2.") - - try: - self.pre_channel = inputs_shape[-1] - except Exception: # if pre_channel is ?, it happens when using Spatial Transformer Net - self.pre_channel = 1 - logging.warning("[warnings] unknow input channels, set to 1") - - self.shape = (self.filter_size[0], self.filter_size[1], self.pre_channel, self.n_filter) - self.strides = (1, self.strides[0], self.strides[1], 1) - - # self.W = tf.compat.v1.get_variable( - # name=self.name + '\kernel', shape=self.shape, initializer=self.W_init, dtype=LayersConfig.tf_dtype, - # **self.W_init_args - # ) - self.W = self._get_weights("filters", shape=self.shape, init=self.W_init, init_args=self.W_init_args) - if self.b_init: - self.b = self._get_weights("biases", shape=(self.shape[-1]), init=self.b_init, init_args=self.b_init_args) - # if self.b_init: - # self.b = tf.compat.v1.get_variable( - # name=self.name + '\bias', shape=(self.shape[-1]), initializer=self.b_init, dtype=LayersConfig.tf_dtype, - # **self.b_init_args - # ) - # self.add_weights([self.W, self.b]) - # else: - # self.add_weights(self.W) - - def forward(self, inputs): - inputs = quantize_active_overflow(inputs, self.bitA) # Do not remove - - W_ = quantize_weight_overflow(self.W, self.bitW) - - outputs = tf.nn.conv2d( - inputs, W_, strides=self.strides, padding=self.padding, use_cudnn_on_gpu=self.use_cudnn_on_gpu, - data_format=self.data_format - ) - - if self.b_init: - outputs = tf.nn.bias_add(outputs, self.b, name='bias_add') - if self.act: - outputs = self.act(outputs) - return outputs diff --git a/tensorlayer/layers/convolution/quan_conv_bn.py b/tensorlayer/layers/convolution/quan_conv_bn.py deleted file mode 100644 index 6612974..0000000 --- a/tensorlayer/layers/convolution/quan_conv_bn.py +++ /dev/null @@ -1,229 +0,0 @@ -# /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf - -from tensorlayer.layers.core import Layer -# from tensorlayer.layers.core import LayersConfig - -from tensorlayer.layers.utils import quantize_active_overflow -from tensorlayer.layers.utils import quantize_weight_overflow - -from tensorflow.python.training import moving_averages -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -__all__ = ['QuanConv2dWithBN'] - - -class QuanConv2dWithBN(Layer): - """The :class:`QuanConv2dWithBN` class is a quantized convolutional layer with BN, which weights are 'bitW' bits and the output of the previous layer - are 'bitA' bits while inferencing. - - Note that, the bias vector would keep the same. - - Parameters - ---------- - prev_layer : :class:`Layer` - Previous layer. - n_filter : int - The number of filters. - filter_size : tuple of int - The filter size (height, width). - strides : tuple of int - The sliding window strides of corresponding input dimensions. - It must be in the same order as the ``shape`` parameter. - padding : str - The padding algorithm type: "SAME" or "VALID". - act : activation function - The activation function of this layer. - decay : float - A decay factor for `ExponentialMovingAverage`. - Suggest to use a large value for large dataset. - epsilon : float - Eplison. - is_train : boolean - Is being used for training or inference. - beta_init : initializer or None - The initializer for initializing beta, if None, skip beta. - Usually you should not skip beta unless you know what happened. - gamma_init : initializer or None - The initializer for initializing gamma, if None, skip gamma. - bitW : int - The bits of this layer's parameter - bitA : int - The bits of the output of previous layer - decay : float - A decay factor for `ExponentialMovingAverage`. - Suggest to use a large value for large dataset. - epsilon : float - Eplison. - is_train : boolean - Is being used for training or inference. - beta_init : initializer or None - The initializer for initializing beta, if None, skip beta. - Usually you should not skip beta unless you know what happened. - gamma_init : initializer or None - The initializer for initializing gamma, if None, skip gamma. - use_gemm : boolean - If True, use gemm instead of ``tf.matmul`` for inferencing. (TODO). - W_init : initializer - The initializer for the the weight matrix. - W_init_args : dictionary - The arguments for the weight matrix initializer. - use_cudnn_on_gpu : bool - Default is False. - data_format : str - "NHWC" or "NCHW", default is "NHWC". - name : str - A unique layer name. - - Examples - --------- - >>> import tensorflow as tf - >>> import tensorlayer as tl - >>> x = tf.placeholder(tf.float32, [None, 256, 256, 3]) - >>> net = tl.layers.InputLayer(x, name='input') - >>> net = tl.layers.QuanConv2dWithBN(net, 64, (5, 5), (1, 1), act=tf.nn.relu, padding='SAME', is_train=is_train, bitW=bitW, bitA=bitA, name='qcnnbn1') - >>> net = tl.layers.MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool1') - ... - >>> net = tl.layers.QuanConv2dWithBN(net, 64, (5, 5), (1, 1), padding='SAME', act=tf.nn.relu, is_train=is_train, bitW=bitW, bitA=bitA, name='qcnnbn2') - >>> net = tl.layers.MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool2') - ... - """ - - @deprecated_alias(layer='prev_layer', end_support_version=1.9) # TODO remove this line for the 1.9 release - def __init__( - self, - prev_layer, - n_filter=32, - filter_size=(3, 3), - strides=(1, 1), - padding='SAME', - act=None, - decay=0.9, - epsilon=1e-5, - is_train=False, - gamma_init=tf.compat.v1.initializers.ones, - beta_init=tf.compat.v1.initializers.zeros, - bitW=8, - bitA=8, - use_gemm=False, - W_init=tf.compat.v1.initializers.truncated_normal(stddev=0.02), - W_init_args=None, - use_cudnn_on_gpu=None, - data_format=None, - name='quan_cnn2d_bn', - ): - super(QuanConv2dWithBN, self).__init__(prev_layer=prev_layer, act=act, W_init_args=W_init_args, name=name) - - logging.info( - "QuanConv2dWithBN %s: n_filter: %d filter_size: %s strides: %s pad: %s act: %s " % ( - self.name, n_filter, filter_size, str(strides), padding, - self.act.__name__ if self.act is not None else 'No Activation' - ) - ) - - x = self.inputs - self.inputs = quantize_active_overflow(self.inputs, bitA) # Do not remove - - if use_gemm: - raise Exception("TODO. The current version use tf.matmul for inferencing.") - - if len(strides) != 2: - raise ValueError("len(strides) should be 2.") - - try: - pre_channel = int(prev_layer.outputs.get_shape()[-1]) - except Exception: # if pre_channel is ?, it happens when using Spatial Transformer Net - pre_channel = 1 - logging.warning("[warnings] unknow input channels, set to 1") - - shape = (filter_size[0], filter_size[1], pre_channel, n_filter) - strides = (1, strides[0], strides[1], 1) - - with tf.compat.v1.variable_scope(name): - W = tf.compat.v1.get_variable( - name='W_conv2d', shape=shape, initializer=W_init, dtype=LayersConfig.tf_dtype, **self.W_init_args - ) - - conv = tf.nn.conv2d( - x, W, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format - ) - - para_bn_shape = conv.get_shape()[-1:] - - if gamma_init: - scale_para = tf.compat.v1.get_variable( - name='scale_para', shape=para_bn_shape, initializer=gamma_init, dtype=LayersConfig.tf_dtype, - trainable=is_train - ) - else: - scale_para = None - - if beta_init: - offset_para = tf.compat.v1.get_variable( - name='offset_para', shape=para_bn_shape, initializer=beta_init, dtype=LayersConfig.tf_dtype, - trainable=is_train - ) - else: - offset_para = None - - moving_mean = tf.compat.v1.get_variable( - 'moving_mean', para_bn_shape, initializer=tf.compat.v1.initializers.constant(1.), - dtype=LayersConfig.tf_dtype, trainable=False - ) - - moving_variance = tf.compat.v1.get_variable( - 'moving_variance', - para_bn_shape, - initializer=tf.compat.v1.initializers.constant(1.), - dtype=LayersConfig.tf_dtype, - trainable=False, - ) - - mean, variance = tf.nn.moments(x=conv, axes=list(range(len(conv.get_shape()) - 1))) - - update_moving_mean = moving_averages.assign_moving_average( - moving_mean, mean, decay, zero_debias=False - ) # if zero_debias=True, has bias - - update_moving_variance = moving_averages.assign_moving_average( - moving_variance, variance, decay, zero_debias=False - ) # if zero_debias=True, has bias - - def mean_var_with_update(): - with tf.control_dependencies([update_moving_mean, update_moving_variance]): - return tf.identity(mean), tf.identity(variance) - - if is_train: - mean, var = mean_var_with_update() - else: - mean, var = moving_mean, moving_variance - - w_fold = _w_fold(W, scale_para, var, epsilon) - bias_fold = _bias_fold(offset_para, scale_para, mean, var, epsilon) - - W = quantize_weight_overflow(w_fold, bitW) - - conv_fold = tf.nn.conv2d( - self.inputs, W, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu, - data_format=data_format - ) - - self.outputs = tf.nn.bias_add(conv_fold, bias_fold, name='bn_bias_add') - - self.outputs = self._apply_activation(self.outputs) - - self._add_layers(self.outputs) - - self._add_params([W, scale_para, offset_para, moving_mean, moving_variance]) - - -def _w_fold(w, gama, var, epsilon): - return tf.compat.v1.div(tf.multiply(gama, w), tf.sqrt(var + epsilon)) - - -def _bias_fold(beta, gama, mean, var, epsilon): - return tf.subtract(beta, tf.compat.v1.div(tf.multiply(gama, mean), tf.sqrt(var + epsilon))) diff --git a/tensorlayer/layers/convolution/separable_conv.py b/tensorlayer/layers/convolution/separable_conv.py deleted file mode 100644 index c41c133..0000000 --- a/tensorlayer/layers/convolution/separable_conv.py +++ /dev/null @@ -1,277 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf - -from tensorlayer.layers.core import Layer -from tensorlayer.layers.utils import get_collection_trainable - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -__all__ = [ - 'SeparableConv1d', - 'SeparableConv2d', -] - - -class SeparableConv1d(Layer): - """The :class:`SeparableConv1d` class is a 1D depthwise separable convolutional layer. - - This layer performs a depthwise convolution that acts separately on channels, followed by a pointwise convolution that mixes channels. - - Parameters - ------------ - # prev_layer : :class:`Layer` - # Previous layer. - n_filter : int - The dimensionality of the output space (i.e. the number of filters in the convolution). - filter_size : int - Specifying the spatial dimensions of the filters. Can be a single integer to specify the same value for all spatial dimensions. - strides : int - Specifying the stride of the convolution. Can be a single integer to specify the same value for all spatial dimensions. Specifying any stride value != 1 is incompatible with specifying any dilation_rate value != 1. - padding : str - One of "valid" or "same" (case-insensitive). - data_format : str - One of channels_last (default) or channels_first. The ordering of the dimensions in the inputs. channels_last corresponds to inputs with shape (batch, height, width, channels) while channels_first corresponds to inputs with shape (batch, channels, height, width). - dilation_rate : int - Specifying the dilation rate to use for dilated convolution. Can be a single integer to specify the same value for all spatial dimensions. Currently, specifying any dilation_rate value != 1 is incompatible with specifying any stride value != 1. - depth_multiplier : int - The number of depthwise convolution output channels for each input channel. The total number of depthwise convolution output channels will be equal to num_filters_in * depth_multiplier. - depthwise_init : initializer - for the depthwise convolution kernel. - pointwise_init : initializer - For the pointwise convolution kernel. - b_init : initializer - For the bias vector. If None, ignore bias in the pointwise part only. - name : None or str - A unique layer name. - - """ - - # @deprecated_alias(layer='prev_layer', end_support_version=1.9) # TODO remove this line for the 1.9 release - def __init__( - self, - prev_layer, - n_filter=100, - filter_size=3, - strides=1, - act=None, - padding='valid', - data_format='channels_last', - dilation_rate=1, - depth_multiplier=1, - # activation=None, - # use_bias=True, - depthwise_init=None, - pointwise_init=None, - b_init=tf.compat.v1.initializers.zeros(), - # depthwise_regularizer=None, - # pointwise_regularizer=None, - # bias_regularizer=None, - # activity_regularizer=None, - # depthwise_constraint=None, - # pointwise_constraint=None, - # W_init=tf.truncated_normal_initializer(stddev=0.1), - # b_init=tf.constant_initializer(value=0.0), - W_init_args=None, # TODO: Remove when TF <1.3 not supported - b_init_args=None, # TODO: Remove when TF <1.3 not supported - name=None, #'seperable1d', - ): - # super(SeparableConv1d, self - # ).__init__(prev_layer=prev_layer, act=act, W_init_args=W_init_args, b_init_args=b_init_args, name=name) - super().__init__(name) - self.n_filter = n_filter - self.filter_size = filter_size - self.strides = strides - self.act = act - self.padding = padding - self.data_format = data_format - self.dilation_rate = dilation_rate - self.depth_multiplier = depth_multiplier - self.depthwise_init = depthwise_init - self.pointwise_init = pointwise_init - self.b_init = b_init - self.W_init_args = W_init_args - self.b_init_args = b_init_args - - logging.info( - "SeparableConv1d %s: n_filter: %d filter_size: %s filter_size: %s depth_multiplier: %d act: %s" % ( - self.name, n_filter, str(filter_size), str(strides), depth_multiplier, - self.act.__name__ if self.act is not None else 'No Activation' - ) - ) - - def build(self, inputs_shape): - self.layer = tf.keras.SeparableConv1D( - filters=self.n_filter, - kernel_size=self.filter_size, - strides=self.strides, - padding=self.padding, - data_format=self.data_format, - dilation_rate=self.dilation_rate, - depth_multiplier=self.depth_multiplier, - activation=self.act, - use_bias=(True if self.b_init is not None else False), - depthwise_initializer=self.depthwise_init, - pointwise_initializer=self.pointwise_init, - bias_initializer=self.b_init, - # depthwise_regularizer=None, - # pointwise_regularizer=None, - # bias_regularizer=None, - # activity_regularizer=None, - # depthwise_constraint=None, - # pointwise_constraint=None, - # bias_constraint=None, - trainable=True, - name=self.name - ) - - _out = self.layer(np.random.uniform([1] + list(inputs_shape))) # initialize weights - outputs_shape = _out.shape - self._add_weights(self.layer.weights) - - def forward(self, inputs): - outputs = self.layer(inputs) - return outputs - - # new_variables = nn.weights - # new_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=self.name) #vs.name) - # new_variables = get_collection_trainable(self.name) - # - # self._add_layers(self.outputs) - # self._add_params(new_variables) - - -class SeparableConv2d(Layer): - """The :class:`SeparableConv2d` class is a 2D depthwise separable convolutional layer. - - This layer performs a depthwise convolution that acts separately on channels, followed by a pointwise convolution that mixes channels. - While :class:`DepthwiseConv2d` performs depthwise convolution only, which allow us to add batch normalization between depthwise and pointwise convolution. - - Parameters - ------------ - # prev_layer : :class:`Layer` - # Previous layer. - n_filter : int - The dimensionality of the output space (i.e. the number of filters in the convolution). - filter_size : tuple/list of 2 int - Specifying the spatial dimensions of the filters. Can be a single integer to specify the same value for all spatial dimensions. - strides : tuple/list of 2 int - Specifying the strides of the convolution. Can be a single integer to specify the same value for all spatial dimensions. Specifying any stride value != 1 is incompatible with specifying any dilation_rate value != 1. - padding : str - One of "valid" or "same" (case-insensitive). - data_format : str - One of channels_last (default) or channels_first. The ordering of the dimensions in the inputs. channels_last corresponds to inputs with shape (batch, height, width, channels) while channels_first corresponds to inputs with shape (batch, channels, height, width). - dilation_rate : integer or tuple/list of 2 int - Specifying the dilation rate to use for dilated convolution. Can be a single integer to specify the same value for all spatial dimensions. Currently, specifying any dilation_rate value != 1 is incompatible with specifying any stride value != 1. - depth_multiplier : int - The number of depthwise convolution output channels for each input channel. The total number of depthwise convolution output channels will be equal to num_filters_in * depth_multiplier. - depthwise_init : initializer - for the depthwise convolution kernel. - pointwise_init : initializer - For the pointwise convolution kernel. - b_init : initializer - For the bias vector. If None, ignore bias in the pointwise part only. - name : None or str - A unique layer name. - - """ - - # @deprecated_alias(layer='prev_layer', end_support_version=1.9) # TODO remove this line for the 1.9 release - def __init__( - self, - # prev_layer, - n_filter=100, - filter_size=(3, 3), - strides=(1, 1), - act=None, - padding='valid', - data_format='channels_last', - dilation_rate=(1, 1), - depth_multiplier=1, - # activation=None, - # use_bias=True, - depthwise_init=None, - pointwise_init=None, - b_init=tf.compat.v1.initializers.zeros(), - # depthwise_regularizer=None, - # pointwise_regularizer=None, - # bias_regularizer=None, - # activity_regularizer=None, - # depthwise_constraint=None, - # pointwise_constraint=None, - # W_init=tf.truncated_normal_initializer(stddev=0.1), - # b_init=tf.constant_initializer(value=0.0), - W_init_args=None, # TODO: Remove when TF <1.3 not supported - b_init_args=None, # TODO: Remove when TF <1.3 not supported - name=None, #'seperable', - ): - # if W_init_args is None: - # W_init_args = {} - # if b_init_args is None: - # b_init_args = {} - - # super(SeparableConv2d, self - # ).__init__(prev_layer=prev_layer, act=act, W_init_args=W_init_args, b_init_args=b_init_args, name=name) - super().__init__(name) - self.n_filter = n_filter - self.filter_size = filter_size - self.strides = strides - self.act = act - self.padding = padding - self.data_format = data_format - self.dilation_rate = dilation_rate - self.depth_multiplier = depth_multiplier - self.depthwise_init = depthwise_init - self.pointwise_init = pointwise_init - self.b_init = b_init - self.W_init_args = W_init_args - self.b_init_args = b_init_args - - logging.info( - "SeparableConv2d %s: n_filter: %d filter_size: %s filter_size: %s depth_multiplier: %d act: %s" % ( - self.name, n_filter, str(filter_size), str(strides), depth_multiplier, - self.act.__name__ if self.act is not None else 'No Activation' - ) - ) - - def build(self, inputs_shape): - self.layer = tf.keras.layers.SeparableConv2D( - filters=self.n_filter, - kernel_size=self.filter_size, - strides=self.strides, - padding=self.padding, - data_format=self.data_format, - dilation_rate=self.dilation_rate, - depth_multiplier=self.depth_multiplier, - activation=self.self.act, - use_bias=(True if self.b_init is not None else False), - depthwise_initializer=self.depthwise_init, - pointwise_initializer=self.pointwise_init, - bias_initializer=self.b_init, - # depthwise_regularizer=None, - # pointwise_regularizer=None, - # bias_regularizer=None, - # activity_regularizer=None, - # depthwise_constraint=None, - # pointwise_constraint=None, - # bias_constraint=None, - trainable=True, - name=self.name - ) - _out = self.layer(np.random.uniform([1] + list(inputs_shape))) # initialize weights - outputs_shape = _out.shape - self._add_weights(self.layer.weights) - - def forward(self, inputs): - outputs = self.layer(inputs) - return outputs - - # new_variables = nn.weights - # new_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=self.name) #vs.name) - # new_variables = get_collection_trainable(self.name) - # - # self._add_layers(self.outputs) - # self._add_params(new_variables) diff --git a/tensorlayer/layers/convolution/simplified_conv.py b/tensorlayer/layers/convolution/simplified_conv.py deleted file mode 100644 index e30d96f..0000000 --- a/tensorlayer/layers/convolution/simplified_conv.py +++ /dev/null @@ -1,535 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf -import tensorlayer as tl - -from tensorlayer.layers.core import Layer -from tensorlayer.layers.utils import get_collection_trainable - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -__all__ = [ - 'Conv1d', - 'Conv2d', - 'Conv3d', -] - - -class Conv1d(Layer): - """Simplified version of :class:`Conv1dLayer`. - - Parameters - ---------- - prev_layer : :class:`Layer` - Previous layer - n_filter : int - The number of filters - filter_size : int - The filter size - stride : int - The stride step - dilation_rate : int - Specifying the dilation rate to use for dilated convolution. - act : activation function - The function that is applied to the layer activations - padding : str - The padding algorithm type: "SAME" or "VALID". - data_format : str - channels_last 'channel_last' (default) or channels_first. - W_init : initializer - The initializer for the weight matrix. - b_init : initializer or None - The initializer for the bias vector. If None, skip biases. - W_init_args : dictionary - The arguments for the weight matrix initializer (deprecated). - b_init_args : dictionary - The arguments for the bias vector initializer (deprecated). - use_cudnn_on_gpu : bool - Default is False. - name : None or str - A unique layer name - - Examples - --------- - >>> x = tf.placeholder(tf.float32, (batch_size, width)) - >>> y_ = tf.placeholder(tf.int64, shape=(batch_size,)) - >>> n = InputLayer(x, name='in') - >>> n = ReshapeLayer(n, (-1, width, 1), name='rs') - >>> n = Conv1d(n, 64, 3, 1, act=tf.nn.relu, name='c1') - >>> n = MaxPool1d(n, 2, 2, padding='valid', name='m1') - >>> n = Conv1d(n, 128, 3, 1, act=tf.nn.relu, name='c2') - >>> n = MaxPool1d(n, 2, 2, padding='valid', name='m2') - >>> n = Conv1d(n, 128, 3, 1, act=tf.nn.relu, name='c3') - >>> n = MaxPool1d(n, 2, 2, padding='valid', name='m3') - >>> n = FlattenLayer(n, name='f') - >>> n = DenseLayer(n, 500, tf.nn.relu, name='d1') - >>> n = DenseLayer(n, 100, tf.nn.relu, name='d2') - >>> n = DenseLayer(n, 2, None, name='o') - - """ - - def __init__( - self, #prev_layer, - n_filter=32, - filter_size=5, - stride=1, - dilation_rate=1, - act=None, - padding='SAME', - data_format="channels_last", - W_init=tl.initializers.truncated_normal(stddev=0.02), - b_init=tl.initializers.constant(value=0.0), - # W_init=tf.compat.v1.initializers.truncated_normal(stddev=0.02), - # b_init=tf.compat.v1.initializers.constant(value=0.0), - # W_init_args=None, - # b_init_args=None, - use_cudnn_on_gpu=None, - in_channels=None, - name=None, #'conv1d' - ): - # super(Conv1d, self - # ).__init__(prev_layer=prev_layer, act=act, W_init_args=W_init_args, b_init_args=b_init_args, name=name) - super().__init__(name) - self.n_filter = n_filter - self.filter_size = filter_size - self.stride = stride - self.act = act - self.padding = padding - self.dilation_rate = dilation_rate - self.W_init = W_init - self.b_init = b_init - self.in_channels = in_channels - - if self.in_channels: - self.build(None) - self._built = True - - # self.W_init_args = W_init_args - # self.b_init_args = b_init_args - # FIXME: Don't know the use of use_cudnn_on_gpu - self.use_cudnn_on_gpu = use_cudnn_on_gpu - logging.info( - "Conv1d %s: n_filter: %d filter_size: %s stride: %d pad: %s act: %s dilation_rate: %d" % ( - self.name, n_filter, filter_size, stride, padding, - self.act.__name__ if self.act is not None else 'No Activation', dilation_rate - ) - ) - - def __repr__(self): - actstr = self.act.__name__ if self.act is not None else 'No Activation' - s = ('{classname}(in_channels={in_channels}, out_channels={n_filter}, kernel_size={filter_size}' - ', stride={stride}, padding={padding}') - if self.dilation_rate != 1: - s += ', dilation={dilation_rate}' - if self.b_init is None: - s += ', bias=False' - s += (', ' + actstr) - if self.name is not None: - s += ', name=\'{name}\'' - s += ')' - return s.format(classname=self.__class__.__name__, **self.__dict__) - - def build(self, inputs_shape): - if self.data_format == 'channels_last': - self.data_format == 'HWC' - if self.in_channels: - self.pre_channel = self.in_channels - else: - self.pre_channel = inputs_shape[-1] - self.in_channels = self.pre_channel - elif self.data_format == 'channels_first': - self.data_format == 'HCW' - if self.in_channels: - self.pre_channel = self.in_channels - else: - self.pre_channel = inputs_shape[1] - self.in_channels = self.pre_channel - else: - raise Exception("data_format should be either channels_last or channels_first") - - self.filter_size = (self.filter_size, self.pre_channel, self.n_filter) - - # TODO : check - self.W = self._get_weights("filters", shape=self.filter_size, init=self.W_init, init_args=self.W_init_args) - if self.b_init: - self.b = self._get_weights("biases", shape=(self.n_filter), init=self.b_init, init_args=self.b_init_args) - - def forward(self, inputs): - outputs = tf.nn.conv1d( - value=inputs, - filters=self.W, - stride=self.stride, - padding=self.padding, - use_cudnn_on_gpu=None, - data_format=self.data_format, - name=self.name, - ) - if self.b_init: - outputs = tf.nn.bias_add(outputs, self.b, name='bias_add') - outputs = self.act(outputs) - return outputs - # _conv1d = tf.compat.v1.layers.Conv1D( - # filters=n_filter, kernel_size=filter_size, strides=stride, padding=padding, data_format=data_format, - # dilation_rate=dilation_rate, activation=self.act, use_bias=(True if b_init else False), - # kernel_initializer=W_init, bias_initializer=b_init, name=name - # ) - - # _conv1d.dtype = LayersConfig.tf_dtype # unsupport, it will use the same dtype of inputs - # self.outputs = _conv1d(self.inputs) - # # new_variables = _conv1d.weights # new_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) - # # new_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=self.name) #vs.name) - # new_variables = get_collection_trainable(self.name) - # - # self._add_layers(self.outputs) - # self._add_params(new_variables) - - -class Conv2d(Layer): - """Simplified version of :class:`Conv2dLayer`. - - Parameters - ---------- - prev_layer : :class:`Layer` - Previous layer. - n_filter : int - The number of filters. - filter_size : tuple of int - The filter size (height, width). - strides : tuple of int - The sliding window strides of corresponding input dimensions. - It must be in the same order as the ``shape`` parameter. - act : activation function - The activation function of this layer. - padding : str - The padding algorithm type: "SAME" or "VALID". - data_format : str - "channels_last" (NHWC, default) or "channels_first" (NCHW). - W_init : initializer - The initializer for the the weight matrix. - b_init : initializer or None - The initializer for the the bias vector. If None, skip biases. - W_init_args : dictionary - The arguments for the weight matrix initializer (for TF < 1.5). - b_init_args : dictionary - The arguments for the bias vector initializer (for TF < 1.5). - use_cudnn_on_gpu : bool - Default is False. - name : None or str - A unique layer name. - - Examples - -------- - >>> x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) - >>> net = InputLayer(x, name='inputs') - >>> net = Conv2d(net, 64, (3, 3), act=tf.nn.relu, name='conv1_1') - >>> net = Conv2d(net, 64, (3, 3), act=tf.nn.relu, name='conv1_2') - >>> net = MaxPool2d(net, (2, 2), name='pool1') - >>> net = Conv2d(net, 128, (3, 3), act=tf.nn.relu, name='conv2_1') - >>> net = Conv2d(net, 128, (3, 3), act=tf.nn.relu, name='conv2_2') - >>> net = MaxPool2d(net, (2, 2), name='pool2') - - """ - - def __init__( - self, - # prev_layer, - n_filter=32, - filter_size=(3, 3), - strides=(1, 1), - act=None, - padding='SAME', - data_format='channels_last', - dilation_rate=(1, 1), - W_init=tl.initializers.truncated_normal(stddev=0.02), - b_init=tl.initializers.constant(value=0.0), - # W_init = tf.truncated_normal_initializer(stddev=0.02), - # b_init = tf.constant(value=0.0), - # W_init=tf.compat.v1.initializers.truncated_normal(stddev=0.02), - # b_init=tf.compat.v1.initializers.constant(value=0.0), - # W_init_args=None, - # b_init_args=None, - use_cudnn_on_gpu=None, - in_channels=None, - name=None, #'conv2d', - ): - # if len(strides) != 2: - # raise ValueError("len(strides) should be 2, Conv2d and Conv2dLayer are different.") - - # try: - # pre_channel = int(layer.outputs.get_shape()[-1]) - - # except Exception: # if pre_channel is ?, it happens when using Spatial Transformer Net - # pre_channel = 1 - # logging.info("[warnings] unknow input channels, set to 1") - - # super(Conv2d, self - # ).__init__(prev_layer=prev_layer, act=act, W_init_args=W_init_args, b_init_args=b_init_args, name=name) - super().__init__(name) - self.n_filter = n_filter - self.filter_size = filter_size - self._strides = self.strides = strides - self.act = act - self.padding = padding - self._dilation_rate = self.dilation_rate = dilation_rate - self.data_format = data_format - self.W_init = W_init - self.b_init = b_init - # self.W_init_args = W_init_args - # self.b_init_args = b_init_args - self.use_cudnn_on_gpu = use_cudnn_on_gpu - self.in_channels = in_channels - - if self.in_channels: - self.build(None) - self._built = True - - logging.info( - "Conv2d %s: n_filter: %d filter_size: %s strides: %s pad: %s act: %s" % ( - self.name, n_filter, str(filter_size), str(strides), padding, - self.act.__name__ if self.act is not None else 'No Activation' - ) - ) - - def __repr__(self): - actstr = self.act.__name__ if self.act is not None else 'No Activation' - s = ('{classname}(in_channels={in_channels}, out_channels={n_filter}, kernel_size={filter_size}' - ', strides={strides}, padding={padding}') - if self.dilation_rate != (1,) * len(self.dilation_rate): - s += ', dilation={dilation_rate}' - if self.b_init is None: - s += ', bias=False' - s += (', ' + actstr) - if self.name is not None: - s += ', name=\'{name}\'' - s += ')' - return s.format(classname=self.__class__.__name__, **self.__dict__) - - def build(self, inputs_shape): - if self.data_format == 'channels_last': - self.data_format = 'NHWC' - if self.in_channels: - self.pre_channel = self.in_channels - else: - self.pre_channel = inputs_shape[-1] - self.in_channels = self.pre_channel - self._strides = [1, self._strides[0], self._strides[1], 1] - self._dilation_rate = [1, self._dilation_rate[0], self._dilation_rate[1], 1] - elif self.data_format == 'channels_first': - self.data_format = 'NCHW' - if self.in_channels: - self.pre_channel = self.in_channels - else: - self.pre_channel = inputs_shape[1] - self.in_channels = self.pre_channel - self._strides = [1, 1, self._strides[0], self._strides[1]] - self._dilation_rate = [1, 1, self._dilation_rate[0], self._dilation_rate[1]] - else: - raise Exception("data_format should be either channels_last or channels_first") - - self.filter_shape = (self.filter_size[0], self.filter_size[1], self.pre_channel, self.n_filter) - - self.W = self._get_weights("filters", shape=self.filter_shape, init=self.W_init) - - if self.b_init: - self.b = self._get_weights("biases", shape=(self.n_filter,), init=self.b_init) - - def forward(self, inputs): - outputs = tf.nn.conv2d( - input=inputs, - filter=self.W, - strides=self._strides, - padding=self.padding, - use_cudnn_on_gpu=self.use_cudnn_on_gpu, #True, - data_format=self.data_format, #'NHWC', - dilations=self._dilation_rate, #[1, 1, 1, 1], - name=self.name, - ) - if self.b_init: - outputs = tf.nn.bias_add(outputs, self.b, name='bias_add') - if self.act: - outputs = self.act(outputs) - return outputs - - # # with tf.variable_scope(name) as vs: - # conv2d = tf.compat.v1.layers.Conv2D( - # # inputs=self.inputs, - # filters=n_filter, - # kernel_size=filter_size, - # strides=strides, - # padding=padding, - # data_format=data_format, - # dilation_rate=dilation_rate, - # activation=self.act, - # use_bias=(False if b_init is None else True), - # kernel_initializer=W_init, # None, - # bias_initializer=b_init, # f.zeros_initializer(), - # kernel_regularizer=None, - # bias_regularizer=None, - # activity_regularizer=None, - # kernel_constraint=None, - # bias_constraint=None, - # trainable=True, - # name=name, - # # reuse=None, - # ) - # self.outputs = conv2d(self.inputs) # must put before ``new_variables`` - # # new_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=self.name) #vs.name) - # new_variables = get_collection_trainable(self.name) - # # new_variables = [] - # # for p in tf.trainable_variables(): - # # # print(p.name.rpartition('/')[0], self.name) - # # if p.name.rpartition('/')[0] == self.name: - # # new_variables.append(p) - # # exit() - # # TF_GRAPHKEYS_VARIABLES TF_GRAPHKEYS_VARIABLES - # # print(self.name, name) - # # print(tf.trainable_variables())#tf.GraphKeys.TRAINABLE_VARIABLES) - # # print(new_variables) - # # print(conv2d.weights) - # - # self._add_layers(self.outputs) - # self._add_params(new_variables) # conv2d.weights) - - -class Conv3d(Layer): - """Simplified version of :class:`Conv2dLayer`. - - Parameters - ---------- - prev_layer : :class:`Layer` - Previous layer. - n_filter : int - The number of filters. - filter_size : tuple of int - The filter size (height, width). - strides : tuple of int - The sliding window strides of corresponding input dimensions. - It must be in the same order as the ``shape`` parameter. - act : activation function - The activation function of this layer. - padding : str - The padding algorithm type: "SAME" or "VALID". - data_format : str - "channels_last" (NDHWC, default) or "channels_first" (NCDHW). - W_init : initializer - The initializer for the the weight matrix. - b_init : initializer or None - The initializer for the the bias vector. If None, skip biases. - W_init_args : dictionary - The arguments for the weight matrix initializer (for TF < 1.5). - b_init_args : dictionary - The arguments for the bias vector initializer (for TF < 1.5). - name : None or str - A unique layer name. - - """ - - def __init__( - self, - # prev_layer, - n_filter=32, - filter_size=(3, 3, 3), - strides=(1, 1, 1), - act=None, - padding='SAME', - data_format='channels_last', - dilation_rate=(1, 1, 1), - W_init=tl.initializers.truncated_normal(stddev=0.02), - b_init=tl.initializers.constant(value=0.0), - # W_init=tf.compat.v1.initializers.truncated_normal(stddev=0.02), - # b_init=tf.compat.v1.initializers.constant(value=0.0), - # W_init_args=None, - # b_init_args=None, - # use_cudnn_on_gpu=None, - in_channels=None, - name=None, #'conv3d', - ): - super().__init__(name) - self.n_filter = n_filter - self.filter_size = filter_size - self._strides = self.strides = strides - self.act = act - self.padding = padding - self._dilation_rate = self.dilation_rate = dilation_rate - self.data_format = data_format - self.W_init = W_init - self.b_init = b_init - self.in_channels = in_channels - - if self.in_channels: - self.build(None) - self._built = True - # self.W_init_args = W_init_args - # self.b_init_args = b_init_args - # self.use_cudnn_on_gpu = use_cudnn_on_gpu - logging.info( - "Conv3d %s: n_filter: %d filter_size: %s strides: %s pad: %s act: %s" % ( - self.name, n_filter, str(filter_size), str(strides), padding, - self.act.__name__ if self.act is not None else 'No Activation' - ) - ) - - def __repr__(self): - actstr = self.act.__name__ if self.act is not None else 'No Activation' - s = ('{classname}(in_channels={in_channels}, out_channels={n_filter}, kernel_size={filter_size}' - ', strides={strides}, padding={padding}') - if self.dilation_rate != (1,) * len(self.dilation_rate): - s += ', dilation={dilation_rate}' - if self.b_init is None: - s += ', bias=False' - s += (', ' + actstr) - if self.name is not None: - s += ', name=\'{name}\'' - s += ')' - return s.format(classname=self.__class__.__name__, **self.__dict__) - - def build(self, inputs_shape): - if self.data_format == 'channels_last': - self.data_format == 'NDHWC' - if self.in_channels: - self.pre_channel = self.in_channels - else: - self.pre_channel = inputs_shape[-1] - self.in_channels = self.pre_channel - self._strides = [1, self._strides[0], self._strides[1], self._strides[2], 1] - self.dilation_rate = [1, self.dilation_rate[0], self.dilation_rate[1], self.dilation_rate[2], 1] - elif self.data_format == 'channels_first': - self.data_format == 'NCDHW' - if self.in_channels: - self.pre_channel = self.in_channels - else: - self.pre_channel = inputs_shape[1] - self.in_channels = self.pre_channel - self._strides = [1, 1, self._strides[0], self._strides[1], self._strides[2]] - self._dilation_rate = [1, 1, self._dilation_rate[0], self._dilation_rate[1], self._dilation_rate[2]] - else: - raise Exception("data_format should be either channels_last or channels_first") - - self.filter_shape = ( - self.filter_size[0], self.filter_size[1], self.filter_size[2], self.pre_channel, self.n_filter - ) - - self.W = self._get_weights("filters", shape=self.filter_size, init=self.W_init, init_args=self.W_init_args) - if self.b_init: - self.b = self._get_weights( - "biases", shape=(self.n_filter,), init=self.b_init, init_args=self.b_init_args - ) - - def forward(self, inputs): - outputs = tf.nn.conv3d( - input=inputs, - filter=self.W, - strides=self._strides, - padding=self.padding, - # use_cudnn_on_gpu=self.use_cudnn_on_gpu, #True, - data_format=self.data_format, #'NDHWC', - dilations=self._dilation_rate, #[1, 1, 1, 1, 1], - name=self.name, - ) - if self.b_init: - outputs = tf.nn.bias_add(outputs, self.b, name='bias_add') - outputs = self.act(outputs) - return outputs diff --git a/tensorlayer/layers/convolution/simplified_deconv.py b/tensorlayer/layers/convolution/simplified_deconv.py deleted file mode 100644 index 9fc7744..0000000 --- a/tensorlayer/layers/convolution/simplified_deconv.py +++ /dev/null @@ -1,231 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import numpy as np - -import tensorflow as tf - -from tensorlayer.layers.core import Layer - -from tensorlayer.layers.utils import get_collection_trainable - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -__all__ = [ - # 'DeConv1d' # TODO: Shall be implemented - 'DeConv2d', - 'DeConv3d', -] - - -class DeConv2d(Layer): - """Simplified version of :class:`DeConv2dLayer`. - - Parameters - ---------- - # prev_layer : :class:`Layer` - # Previous layer. - n_filter : int - The number of filters. - filter_size : tuple of int - The filter size (height, width). - out_size : tuple of int - Require if TF version < 1.3, (height, width) of output. - strides : tuple of int - The stride step (height, width). - padding : str - The padding algorithm type: "SAME" or "VALID". - act : activation function - The activation function of this layer. - data_format : str - "channels_last" (NHWC, default) or "channels_first" (NCHW). - dilation_rate : int of tuple of int - The dilation rate to use for dilated convolution - W_init : initializer - The initializer for the weight matrix. - b_init : initializer or None - The initializer for the bias vector. If None, skip biases. - W_init_args : dictionary - The arguments for the weight matrix initializer (For TF < 1.3). - b_init_args : dictionary - The arguments for the bias vector initializer (For TF < 1.3). - name : None or str - A unique layer name. - - """ - - def __init__( - self, - n_filter=32, - filter_size=(3, 3), - strides=(2, 2), - act=None, - padding='SAME', - dilation_rate=(1, 1), - data_format='channels_last', - W_init=tf.compat.v1.initializers.truncated_normal(stddev=0.02), - b_init=tf.compat.v1.initializers.constant(value=0.0), - W_init_args=None, # TODO: Remove when TF <1.3 not supported - b_init_args=None, # TODO: Remove when TF <1.3 not supported - name=None, #'decnn2d' - ): - # super(DeConv2d, self - # ).__init__(prev_layer=prev_layer, act=act, W_init_args=W_init_args, b_init_args=b_init_args, name=name) - super().__init__(name) - self.n_filter = n_filter - self.filter_size = filter_size - self.strides = strides - self.padding = padding - self.act = act - self.data_format = data_format - self.dilation_rate = dilation_rate - self.W_init = W_init - self.b_init = b_init - self.W_init_args = W_init_args # TODO: Remove when TF <1.3 not supported - self.b_init_args = b_init_args # TODO: Remove when TF <1.3 not supported - - logging.info( - "DeConv2d {}: n_filters: {} strides: {} padding: {} act: {} dilation: {}".format( - self.name, str(n_filter), str(strides), padding, - self.act.__name__ if self.act is not None else 'No Activation', - dilation_rate, - ) - ) - - if len(strides) != 2: - raise ValueError("len(strides) should be 2, DeConv2d and DeConv2dLayer are different.") - - def build(self, inputs_shape): - self.layer = tf.keras.layers.Conv2DTranspose( - filters=self.n_filter, - kernel_size=self.filter_size, - strides=self.strides, - padding=self.padding, - data_format=self.data_format, - dilation_rate=self.dilation_rate, - activation=self.act, - use_bias=(True if self.b_init is not None else False), - kernel_initializer=self.W_init, - bias_initializer=self.b_init, - # dtype=tf.float32, - name=self.name, - ) - # print(inputs_shape) - # print(np.random.uniform(size=inputs_shape).shape) - # exit() - _out = self.layer(tf.convert_to_tensor(np.random.uniform(size=inputs_shape), dtype=np.float32))#np.random.uniform([1] + list(inputs_shape))) # initialize weights - outputs_shape = _out.shape - self._weights = self.layer.weights - - def forward(self, inputs): - outputs = self.layer(inputs) - return outputs - # self.outputs = conv2d_transpose(self.inputs) - # # new_variables = conv2d_transpose.weights # new_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) - # # new_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=self.name) #vs.name) - # new_variables = get_collection_trainable(self.name) - # - # self._add_layers(self.outputs) - # self._add_params(new_variables) - - -class DeConv3d(Layer): - """Simplified version of The :class:`DeConv3dLayer`, see `tf.contrib.layers.conv3d_transpose `__. - - Parameters - ---------- - # prev_layer : :class:`Layer` - # Previous layer. - n_filter : int - The number of filters. - filter_size : tuple of int - The filter size (depth, height, width). - stride : tuple of int - The stride step (depth, height, width). - padding : str - The padding algorithm type: "SAME" or "VALID". - act : activation function - The activation function of this layer. - data_format : str - "channels_last" (NDHWC, default) or "channels_first" (NCDHW). - W_init : initializer - The initializer for the weight matrix. - b_init : initializer or None - The initializer for the bias vector. If None, skip bias. - W_init_args : dictionary - The arguments for the weight matrix initializer (For TF < 1.3). - b_init_args : dictionary - The arguments for the bias vector initializer (For TF < 1.3). - name : None or str - A unique layer name. - - """ - - def __init__( - self, - # prev_layer, - n_filter=32, - filter_size=(3, 3, 3), - strides=(2, 2, 2), - padding='SAME', - act=None, - data_format='channels_last', - W_init=tf.compat.v1.initializers.truncated_normal(stddev=0.02), - b_init=tf.compat.v1.initializers.constant(value=0.0), - W_init_args=None, # TODO: Remove when TF <1.3 not supported - b_init_args=None, # TODO: Remove when TF <1.3 not supported - name=None, #'decnn3d' - ): - # super(DeConv3d, self - # ).__init__(prev_layer=prev_layer, act=act, W_init_args=W_init_args, b_init_args=b_init_args, name=name) - super().__init__(name) - self.n_filter = n_filter - self.filter_size = filter_size - self.strides = strides - self.padding = padding - self.act = act - self.data_format = data_format - self.W_init = W_init - self.b_init = b_init - self.W_init_args = W_init_args # TODO: Remove when TF <1.3 not supported - self.b_init_args = b_init_args # TODO: Remove when TF <1.3 not supported - - logging.info( - "DeConv3d %s: n_filters: %s strides: %s pad: %s act: %s" % ( - self.name, str(n_filter), str(strides), padding, - self.act.__name__ if self.act is not None else 'No Activation' - ) - ) - - def build(self, inputs_shape): - # with tf.variable_scope(name) as vs: - self.layer = tf.keras.layers.Conv3DTranspose( - filters=self.n_filter, - kernel_size=self.filter_size, - strides=self.strides, - padding=self.padding, - activation=self.act, - use_bias=(True if self.b_init is not None else False), - data_format=self.data_format, - kernel_initializer=self.W_init, - bias_initializer=self.b_init, - name=self.name, - ) - - _out = self.layer(tf.convert_to_tensor(np.random.uniform(size=inputs_shape), dtype=np.float32)) #self.layer(np.random.uniform([1] + list(inputs_shape))) # initialize weights - outputs_shape = _out.shape - # self._add_weights(self.layer.weights) - self._weights = self.layer.weights - - def forward(self, inputs): - outputs = self.layer(inputs) - return outputs - # self.outputs = nn(self.inputs) - # # new_variables = nn.weights # tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) - # # new_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=self.name) #vs.name) - # new_variables = get_collection_trainable(self.name) - # - # self._add_layers(self.outputs) - # self._add_params(new_variables) diff --git a/tensorlayer/layers/convolution/super_resolution.py b/tensorlayer/layers/convolution/super_resolution.py deleted file mode 100644 index 159ef4d..0000000 --- a/tensorlayer/layers/convolution/super_resolution.py +++ /dev/null @@ -1,192 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf - -from tensorlayer.layers.core import Layer - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias -from tensorlayer.decorators import private_method - -__all__ = [ - 'SubpixelConv1d', - 'SubpixelConv2d', -] - - -class SubpixelConv2d(Layer): - """It is a 2D sub-pixel up-sampling layer, usually be used - for Super-Resolution applications, see `SRGAN `__ for example. - - Parameters - ------------ - scale : int - The up-scaling ratio, a wrong setting will lead to dimension size error. - n_out_channel : int or None - The number of output channels. - - If None, automatically set n_out_channel == the number of input channels / (scale x scale). - - The number of input channels == (scale x scale) x The number of output channels. - act : activation function - The activation function of this layer. - name : str - A unique layer name. - - Examples - --------- - >>> # examples here just want to tell you how to set the n_out_channel. - >>> import numpy as np - >>> import tensorflow as tf - >>> import tensorlayer as tl - >>> x = np.random.rand(2, 16, 16, 4) - >>> X = tf.placeholder("float32", shape=(2, 16, 16, 4), name="X") - >>> net = tl.layers.InputLayer(X, name='input') - >>> net = tl.layers.SubpixelConv2d(net, scale=2, n_out_channel=1, name='subpixel_conv2d') - >>> sess = tf.Session() - >>> y = sess.run(net.outputs, feed_dict={X: x}) - >>> print(x.shape, y.shape) - (2, 16, 16, 4) (2, 32, 32, 1) - - >>> x = np.random.rand(2, 16, 16, 4*10) - >>> X = tf.placeholder("float32", shape=(2, 16, 16, 4*10), name="X") - >>> net = tl.layers.InputLayer(X, name='input2') - >>> net = tl.layers.SubpixelConv2d(net, scale=2, n_out_channel=10, name='subpixel_conv2d2') - >>> y = sess.run(net.outputs, feed_dict={X: x}) - >>> print(x.shape, y.shape) - (2, 16, 16, 40) (2, 32, 32, 10) - - >>> x = np.random.rand(2, 16, 16, 25*10) - >>> X = tf.placeholder("float32", shape=(2, 16, 16, 25*10), name="X") - >>> net = tl.layers.InputLayer(X, name='input3') - >>> net = tl.layers.SubpixelConv2d(net, scale=5, n_out_channel=None, name='subpixel_conv2d3') - >>> y = sess.run(net.outputs, feed_dict={X: x}) - >>> print(x.shape, y.shape) - (2, 16, 16, 250) (2, 80, 80, 10) - - References - ------------ - - `Real-Time Single Image and Video Super-Resolution Using an Efficient Sub-Pixel Convolutional Neural Network `__ - - """ - - # github/Tetrachrome/subpixel https://github.com/Tetrachrome/subpixel/blob/master/subpixel.py - def __init__(self, scale=2, n_out_channel=None, act=None, name=None): #'subpixel_conv2d'): - - # super(SubpixelConv2d, self).__init__(prev_layer=prev_layer, act=act, name=name) - super().__init__(name) - self.scale = scale - self.n_out_channel = n_out_channel - self.act = act - if n_out_channel is None: - - if int(self.inputs.get_shape()[-1]) / (scale**2) % 1 != 0: - raise Exception( - "SubpixelConv2d: The number of input channels == (scale x scale) x The number of output channels" - ) - - n_out_channel = int(int(self.inputs.get_shape()[-1]) / (scale**2)) - - logging.info( - "SubpixelConv2d %s: scale: %d n_out_channel: %s act: %s" % - (self.name, scale, n_out_channel, self.act.__name__ if self.act is not None else 'No Activation') - ) - - def build(self, inputs_shape): - pass - - def forward(self, inputs): - """ - prev_layer : :class:`Layer` - Previous layer, - """ - # with tf.variable_scope(name): - # self.outputs = self._apply_activation(self._PS(self.inputs, r=scale, n_out_channels=n_out_channel)) - outputs = self.act(self._PS(inputs, r=self.scale, n_out_channels=self.n_out_channel)) - return outputs - - @private_method - def _PS(self, X, r, n_out_channels): - - _err_log = "SubpixelConv2d: The number of input channels == (scale x scale) x The number of output channels" - - if n_out_channels >= 1: - if int(X.get_shape()[-1]) != (r**2) * n_out_channels: - raise Exception(_err_log) - # bsize, a, b, c = X.get_shape().as_list() - # bsize = tf.shape(X)[0] # Handling Dimension(None) type for undefined batch dim - # Xs=tf.split(X,r,3) #b*h*w*r*r - # Xr=tf.concat(Xs,2) #b*h*(r*w)*r - # X=tf.reshape(Xr,(bsize,r*a,r*b,n_out_channel)) # b*(r*h)*(r*w)*c - - X = tf.compat.v1.depth_to_space(input=X, block_size=r) - else: - raise RuntimeError(_err_log) - - return X - - -class SubpixelConv1d(Layer): - """It is a 1D sub-pixel up-sampling layer. - - Calls a TensorFlow function that directly implements this functionality. - We assume input has dim (batch, width, r) - - Parameters - ------------ - scale : int - The up-scaling ratio, a wrong setting will lead to Dimension size error. - act : activation function - The activation function of this layer. - name : str - A unique layer name. - - Examples - ---------- - >>> import tensorflow as tf - >>> import tensorlayer as tl - >>> t_signal = tf.placeholder('float32', [10, 100, 4], name='x') - >>> n = tl.layers.InputLayer(t_signal, name='in') - >>> n = tl.layers.SubpixelConv1d(n, scale=2, name='s') - >>> print(n.outputs.shape) - (10, 200, 2) - - References - ----------- - `Audio Super Resolution Implementation `__. - - """ - - def __init__(self, scale=2, act=None, name=None): #'subpixel_conv1d'): - - # super(SubpixelConv1d, self).__init__(prev_layer=prev_layer, act=act, name=name) - super().__init__(name) - self.scale = scale - self.act = act - logging.info( - "SubpixelConv1d %s: scale: %d act: %s" % - (self.name, scale, self.act.__name__ if self.act is not None else 'No Activation') - ) - - def build(self, inputs_shape): - pass - - def forward(self, inputs): - """ - Parameters - ------------ - net : :class:`Layer` - Previous layer with output shape of (batch, width, r). - """ - # with tf.name_scope(name): - # self.outputs = self._apply_activation(self._PS(self.inputs, r=scale)) - - outputs = self.act(self._PS(inputs, r=self.scale)) - return outputs - - @private_method - def _PS(self, I, r): - X = tf.transpose(a=I, perm=[2, 1, 0]) # (r, w, b) - X = tf.batch_to_space(X, [r], [[0, 0]]) # (1, r*w, b) - X = tf.transpose(a=X, perm=[2, 1, 0]) - return X diff --git a/tensorlayer/layers/convolution/ternary_conv.py b/tensorlayer/layers/convolution/ternary_conv.py deleted file mode 100644 index 6142d9f..0000000 --- a/tensorlayer/layers/convolution/ternary_conv.py +++ /dev/null @@ -1,161 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf - -from tensorlayer.layers.core import Layer -# from tensorlayer.layers.core import LayersConfig - -from tensorlayer.layers.utils import compute_alpha -from tensorlayer.layers.utils import ternary_operation - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -__all__ = ['TernaryConv2d'] - - -class TernaryConv2d(Layer): - """ - The :class:`TernaryConv2d` class is a 2D binary CNN layer, which weights are either -1 or 1 or 0 while inference. - - Note that, the bias vector would not be tenarized. - - Parameters - ---------- - n_filter : int - The number of filters. - filter_size : tuple of int - The filter size (height, width). - strides : tuple of int - The sliding window strides of corresponding input dimensions. - It must be in the same order as the ``shape`` parameter. - act : activation function - The activation function of this layer. - padding : str - The padding algorithm type: "SAME" or "VALID". - data_format : str - "NHWC" or "NCHW", default is "NHWC". - use_gemm : boolean - If True, use gemm instead of ``tf.matmul`` for inference. (TODO). - W_init : initializer - The initializer for the the weight matrix. - b_init : initializer or None - The initializer for the the bias vector. If None, skip biases. - W_init_args : dictionary - The arguments for the weight matrix initializer. - b_init_args : dictionary - The arguments for the bias vector initializer. - use_cudnn_on_gpu : bool - Default is False. - name : None or str - A unique layer name. - - Examples - --------- - >>> import tensorflow as tf - >>> import tensorlayer as tl - >>> x = tf.placeholder(tf.float32, [None, 256, 256, 3]) - >>> net = tl.layers.Input(x, name='input') - >>> net = tl.layers.TernaryConv2d(net, 32, (5, 5), (1, 1), padding='SAME', name='bcnn1') - >>> net = tl.layers.MaxPool2d(net, (2, 2), (2, 2), padding='SAME', name='pool1') - >>> net = tl.layers.BatchNorm(net, act=tl.act.htanh, is_train=True, name='bn1') - ... - >>> net = tl.layers.Sign(net) - >>> net = tl.layers.TernaryConv2d(net, 64, (5, 5), (1, 1), padding='SAME', name='bcnn2') - >>> net = tl.layers.MaxPool2d(net, (2, 2), (2, 2), padding='SAME', name='pool2') - >>> net = tl.layers.BatchNorm(net, act=tl.act.htanh, is_train=True, name='bn2') - - """ - - def __init__( - self, - n_filter=32, - filter_size=(3, 3), - strides=(1, 1), - act=None, - padding='SAME', - data_format=None, - use_gemm=False, - W_init=tf.compat.v1.initializers.truncated_normal(stddev=0.02), - b_init=tf.compat.v1.initializers.constant(value=0.0), - W_init_args=None, - b_init_args=None, - use_cudnn_on_gpu=None, - name=None, #'ternary_cnn2d', - ): - # super(TernaryConv2d, self - # ).__init__(prev_layer=prev_layer, act=act, W_init_args=W_init_args, b_init_args=b_init_args, name=name) - super().__init__(name) - self.n_filter = n_filter - self.filter_size = filter_size - self.strides = strides - self.act = act - self.padding = padding - self.data_format = data_format - self.use_gemm = use_gemm - self.W_init = W_init - self.b_init = b_init - self.W_init_args = W_init_args - self.b_init_args = b_init_args - self.use_cudnn_on_gpu = use_cudnn_on_gpu - - logging.info( - "TernaryConv2d %s: n_filter: %d filter_size: %s strides: %s pad: %s act: %s" % ( - self.name, n_filter, str(filter_size), str(strides), padding, - self.act.__name__ if self.act is not None else 'No Activation' - ) - ) - - def build(self, inputs): - if len(self.strides) != 2: - raise ValueError("len(strides) should be 2.") - - if self.use_gemm: - raise Exception("TODO. The current version use tf.matmul for inferencing.") - - try: - self.pre_channel = int(inputs.get_shape()[-1]) - except Exception: # if pre_channel is ?, it happens when using Spatial Transformer Net - sefl.pre_channel = 1 - logging.warning("unknow input channels, set to 1") - - self.shape = (self.filter_size[0], self.filter_size[1], self.pre_channel, self.n_filter) - self.strides = (1, self.strides[0], self.strides[1], 1) - - # self.W = tf.compat.v1.get_variable( - # name=self.name + '\kernel', shape=self.shape, initializer=self.W_init, dtype=LayersConfig.tf_dtype, - # **self.W_init_args - # ) - self.W = self._get_weights("filters", shape=self.shape, init=self.W_init, init_args=self.W_init_args) - if self.b_init: - self.b = self._get_weights("biases", shape=(self.shape[-1]), init=self.b_init, init_args=self.b_init_args) - # if self.b_init: - # self.b = tf.compat.v1.get_variable( - # name=self.name + '\bias', shape=(self.shape[-1]), initializer=self.b_init, dtype=LayersConfig.tf_dtype, - # **self.b_init_args - # ) - # self.add_weights([self.W, self.b]) - # else: - # self.add_weights(self.W) - - def forward(self, inputs): - - alpha = compute_alpha(self.W) - - W_ = ternary_operation(self.W) - W_ = tf.multiply(alpha, W_) - - outputs = tf.nn.conv2d( - inputs, W_, strides=self.strides, padding=self.padding, use_cudnn_on_gpu=self.use_cudnn_on_gpu, - data_format=self.data_format - ) - - if self.b_init: - outputs = tf.nn.bias_add(outputs, self.b, name='bias_add') - - if self.act: - outputs = self.act(outputs) - - return outputs diff --git a/tensorlayer/layers/core.py b/tensorlayer/layers/core.py deleted file mode 100644 index bd39e36..0000000 --- a/tensorlayer/layers/core.py +++ /dev/null @@ -1,767 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import inspect -import six - -from abc import ABCMeta, abstractmethod - -import numpy as np - -import tensorflow as tf -import tensorlayer as tl - -from tensorlayer.layers.utils import list_remove_repeat, get_variable_with_initializer - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias -from tensorlayer.decorators import protected_method -from tensorlayer.decorators import private_method - -__all__ = [ - # 'LayersConfig', # TODO : remove this?? - # 'TF_GRAPHKEYS_VARIABLES', # TODO : remove this?? - 'Layer', - 'ModelLayer', - # 'SequentialLayer', - 'LayerList' -] - -_global_layer_name_dict = {} # TODO: better implementation? - -# @six.add_metaclass(ABCMeta) -# class LayersConfig(object): -# -# tf_dtype = tf.float32 # TensorFlow DType -# set_keep = {} # A dictionary for holding tf.placeholders -# -# @abstractmethod -# def __init__(self): -# pass - -# TF_GRAPHKEYS_VARIABLES = tf.compat.v1.GraphKeys.GLOBAL_VARIABLE - - -def _addindent(s_, numSpaces): - s = s_.split('\n') - # don't do anything for single-line stuff - if len(s) == 1: - return s_ - first = s.pop(0) - s = [(numSpaces * ' ') + line for line in s] - s = '\n'.join(s) - s = first + '\n' + s - return s - -class Layer(object): - #FIXME: documentation update needed - """The basic :class:`Layer` class represents a single layer of a neural network. - - It should be subclassed when implementing new types of layers. - Because each layer can keep track of the layer(s) feeding into it, a - network's output :class:`Layer` instance can double as a handle to the full - network. - - Parameters - ---------- - prev_layer : :class:`Layer` or None - Previous layer (optional), for adding all properties of previous layer(s) to this layer. - act : activation function (None by default) - The activation function of this layer. - name : str or None - A unique layer name. - - Methods - --------- - check this https://github.com/luomai/tensorlayer2-design/issues/7 - # print_weights(details=True, session=None) - # Print all parameters of this network. - # print_layers() - # Print all outputs of all layers of this network. - # count_weights() - # Return the number of parameters of this network. - # get_all_weights() - # Return the parameters in a list of array. - - Examples - --------- - - Define model - - >>> import tensorflow as tf - >>> import tensorlayer as tl - >>> x = tf.placeholder("float32", [None, 100]) # TODO: rewrite - >>> n = tl.layers.InputLayer(x, name='in') - >>> n = tl.layers.DenseLayer(n, 80, name='d1') - >>> n = tl.layers.DenseLayer(n, 80, name='d2') - - - Get information - - >>> print(n) - Last layer is: DenseLayer (d2) [None, 80] - >>> n.print_layers() - [TL] layer 0: d1/Identity:0 (?, 80) float32 - [TL] layer 1: d2/Identity:0 (?, 80) float32 - >>> n.print_weights(False) - [TL] param 0: d1/W:0 (100, 80) float32_ref - [TL] param 1: d1/b:0 (80,) float32_ref - [TL] param 2: d2/W:0 (80, 80) float32_ref - [TL] param 3: d2/b:0 (80,) float32_ref - [TL] num of weights: 14560 - >>> n.count_weights() - 14560 - - - Slicing the outputs - - >>> n2 = n[:, :30] - >>> print(n2) - Last layer is: Layer (d2) [None, 30] - - - Iterating the outputs - - >>> for l in n: - >>> print(l) - Tensor("d1/Identity:0", shape=(?, 80), dtype=float32) - Tensor("d2/Identity:0", shape=(?, 80), dtype=float32) - - """ - - # Added to allow auto-completion - # FIXME: it seems act is never used in derived Layers - def __init__(self, name=None, act=None, *args, **kwargs): - # Layer constants - - for key in kwargs.keys(): - setattr(self, key, self._argument_dict_checkup(kwargs[key])) - - self.act = act if act not in [None, tf.identity] else None - - ## Hao Dong: automatically add layer type as the prefix of the layers - global _global_layer_name_dict - if name is None: - prefix = self.__class__.__name__.lower() - if _global_layer_name_dict.get(prefix) is not None: - _global_layer_name_dict[prefix] += 1 - name = prefix + '_' + str(_global_layer_name_dict[prefix]) - else: - _global_layer_name_dict[prefix] = 0 - name = prefix - - # FIXME: double check needed: the scope name may be deprecated in TF2 - # scope_name = tf.get_variable_scope().name - # self.name = scope_name + '/' + name if scope_name else name - self.name = name - - # Layer input outputs - # TODO: note that in dynamic network, inputs and outputs can be both None, may cause problem, test needed - self.inputs = None - self.outputs = None - self._inputs_shape_mem = None - self._outputs_shape_mem = None - - # self._inputs_shape = None - # self._outputs_shape = None - - self._input_layer = None - - # TODO: need to update - # self.all_layers = list() # we change layers --> outputs ? - # self.all_weights = list() # we change weights --> weights ? - # self.all_drop = dict() # remove all_drop - - # Layer building state - self._built = False - - # Layer weight state - self._weights = None - - # Layer training state - self.is_train = True - - @property - def _inputs_shape(self): - if self.inputs is not None: - if isinstance(self.inputs, list): - self._inputs_shape_mem = [t.get_shape().as_list() for t in self.inputs] - else: - self._inputs_shape_mem = self.inputs.get_shape().as_list() - return self._inputs_shape_mem - - @property - def _outputs_shape(self): - if self.outputs is not None: - if isinstance(self.outputs, list): - self._outputs_shape_mem = [t.get_shape().as_list() for t in self.outputs] - else: - self._outputs_shape_mem = self.outputs.get_shape().as_list() - return self._outputs_shape_mem - - @property - def weights(self): - return self._weights - - def __call__(self, prev_layer, **kwargs): - - if self.__class__.__name__ in tl.layers.inputs.__all__: - # 1. for input layers - # Input layers should use tf.convert_to_tensor to make sure the inputs is converted into tf.Tensor - - # code in tl 1.0 - # raise RuntimeError("Please use layers in `tl.layers.inputs` to convert Variable/Tensor/Placeholder/Numpy arrays to a TL layer") - # FIXME: not sure convert_to_tensor here or ask user to do it - self.inputs = tf.convert_to_tensor(prev_layer) - self._input_layer = None - self._built = True - self.build(self._inputs_shape) - self.outputs = self.forward(self.inputs, **kwargs) - - elif isinstance(prev_layer, Layer): - # 2. for normal layer have only 1 input i.e. DenseLayer - # Hint : list(), dict() is pass by value (shallow), without them, - # it is pass by reference. - - self.inputs = prev_layer.outputs - self._input_layer = prev_layer - - if not self._built: - self.build(self._inputs_shape) - self._built = True - - self.outputs = self.forward(self.inputs, **kwargs) - # self._outputs_shape = self.outputs.get_shape().as_list() - - # TODO: need update - # self._add_layers(prev_layer.all_layers) - # self._add_weights(self._weights) - # self._add_weights(prev_layer.all_weights) - # self._add_dropout_layers(prev_layer.all_drop) - - elif isinstance(prev_layer, list): - # 3. for layer have multiply inputs i.e. ConcatLayer - - self.inputs = [layer.outputs for layer in prev_layer] - self._input_layer = prev_layer # FIXME: not sure how to deal with it - - # FIXME: only support concat/elementwise, where build does nothing - if not self._built: - self._built = True - - self.outputs = self.forward(self.inputs, **kwargs) - - # TODO: need update - # self._add_layers(sum([l.all_layers for l in prev_layer], [])) - # self._add_weights(sum([l.all_weights for l in prev_layer], [])) - # self._add_dropout_layers(sum([list(l.all_drop.items()) for l in prev_layer], [])) - - else: - # FIXME: not sure if there is other cases - pass - # elif prev_layer is not None: - # # 4. tl.models - # self._add_layers(prev_layer.all_layers) - # self._add_weights(prev_layer.all_weights) - # self._add_dropout_layers(prev_layer.all_drop) - # - # if hasattr(prev_layer, "outputs"): - # self.inputs = prev_layer.outputs - - return self - - def _release_memory(self): - ''' - WARINING: This function should be called with great caution. - - self.inputs and self.outputs will be set as None but not deleted. - - ''' - _ = self._inputs_shape # save input shape before inputs become None - _ = self._outputs_shape # save outputs shape before outputs become None - self.inputs = None - self.outputs = None - - def _set_mode_for_layers(self, is_train): - self.is_train = is_train - - def _get_weights(self, var_name, shape, init=tl.initializers.random_normal()): - weight = get_variable_with_initializer( - scope_name=self.name, var_name=var_name, shape=shape, init=init - ) - if self._weights is None: - self._weights = list() - self._weights.append(weight) # Add into the weight collection - # self.__setattr__(var_name, weight) # FIXME: prefer to remove this line, the weights should be manually defined as members of the Layer - return weight - - @abstractmethod - def build(self, inputs_shape): - # FIXME: documentation needed - """ - An abstract method which should be overwritten in derived classes to define all necessary weights of the layer. - - :param inputs_shape: tuple - :return: void - """ - raise Exception("The build(self, inputs_shape) method must be implemented by inherited class") - - @abstractmethod - def forward(self, inputs): - # FIXME: documentation needed - """ - An abstract method which should be overwritten in derived classes to define forward feeding operations of the layer. - - :param inputs: Tensor - :return: Tensor - """ - raise Exception("The forward method must be implemented by inherited class") - - ''' - def print_weights(self, details=False, session=None): - """Print all information of weights in the model. """ - for i, p in enumerate(self.all_weights): - if details: - try: - val = p.eval(session=session) - logging.info( - " param {:3}: {:20} {:15} {} (mean: {:<18}, median: {:<18}, std: {:<18}) ". - format(i, p.name, str(val.shape), p.dtype.name, val.mean(), np.median(val), val.std()) - ) - except Exception as e: - logging.info(str(e)) - raise Exception( - "Hint: print weights details after tl.layers.initialize_global_variables(sess) " - "or use network.print_weights(False)." - ) - else: - logging.info(" param {:3}: {:20} {:15} {}".format(i, p.name, str(p.get_shape()), p.dtype.name)) - logging.info(" num of weights: %d" % self.count_weights()) - - # TODO: deprecated if no all_layers - def print_layers(self): - """Print all info of layers in the network.""" - for i, layer in enumerate(self.all_layers): - # logging.info(" layer %d: %s" % (i, str(layer))) - logging.info( - " layer {:3}: {:20} {:15} {}".format(i, layer.name, str(layer.get_shape()), layer.dtype.name) - ) - - # TODO: need to rewrite - def count_weights(self): - """Returns the number of parameters in the network.""" - n_weights = 0 - for _i, p in enumerate(self.all_weights): - n = 1 - # for s in p.eval().shape: - for s in p.get_shape(): - try: - s = int(s) - except Exception: - s = 1 - if s: - n = n * s - n_weights = n_weights + n - return n_weights - - @property - def n_weights(): - return count_weights() - - # TODO: need to rewrite - def get_all_weights(self, sess=None): - """Return the weights in a list of array.""" - _weights = [] - for p in self.all_weights: - if sess is None: - _weights.append(p.eval()) - else: - _weights.append(sess.run(p)) - return _weights - ''' - - def __repr__(self): - reprstr = "Layer" - return reprstr - - # FIXME : No need for __str__ given that we have __repr__ - # def __str__(self): - # - # if self.outputs is not None: - # _outputs_shape = self._outputs_shape - # if _outputs_shape[0] == 1: - # _outputs_shape[0] = "batch_size" - # else: - # _outputs_shape = "unknown for unbuilt layer" - # return " {} ({}) outputs_shape: {}".format(self.__class__.__name__, self.name, _outputs_shape) - # # self._outputs_shape)#outputs.get_shape().as_list()) - - # def __getitem__(self, key): - # - # net_new = Layer(prev_layer=None, name=self.name) - # - # net_new.name = self.name + '_indexing' - # net_new.inputs = self.inputs - # net_new.outputs = self.outputs[key] - # - # net_new._add_layers(self.all_layers[:-1]) - # net_new._add_layers(net_new.outputs) - # - # net_new._add_weights(self.all_weights) - # # net_new._add_dropout_layers(self.all_drop) - # - # return net_new - - def __setitem__(self, key, item): - raise TypeError("The Layer API does not allow to use the method: `__setitem__`") - - def __delitem__(self, key): - raise TypeError("The Layer API does not allow to use the method: `__delitem__`") - - # FIXME: all_layers are removed in new API - ''' - def __iter__(self): - for x in self.all_layers: # FIXME: it is good for eager mode? - yield x - - def __len__(self): - return len(self.all_layers) - ''' - - ''' - @protected_method - def _get_init_args(self, skip=4): - """Get all arguments of current layer for the configuration information.""" - stack = inspect.stack() - - if len(stack) < skip + 1: - raise ValueError("The length of the inspection stack is shorter than the requested start position.") - - args, _, _, values = inspect.getargvalues(stack[skip][0]) - - weights = {} - - for arg in args: - - # some args dont need to be saved into the graph. e.g. the input placeholder - if values[arg] is not None and arg not in ['self', 'prev_layer', 'inputs']: - - val = values[arg] - - # change function (e.g. act) into dictionary of module path and function name - if inspect.isfunction(val): - weights[arg] = {"module_path": val.__module__, "func_name": val.__name__} - # ignore more args e.g. TF class - elif arg.endswith('init'): - continue - # for other data type, save them directly - else: - weights[arg] = val - - return weights - ''' - - # # todo: deprecated if no all_layer - # @protected_method - # def _add_layers(self, layers): - # if isinstance(layers, list): - # try: # list of class Layer - # new_layers = [layer.outputs for layer in layers] - # self.all_layers.extend(list(new_layers)) - # - # except AttributeError: # list of tf.Tensor - # self.all_layers.extend(list(layers)) - # - # else: - # self.all_layers.append(layers) - # - # self.all_layers = list_remove_repeat(self.all_layers) - - # # todo: deprecated if no all_weights - # @protected_method - # def _add_weights(self, weights): - # - # if isinstance(weights, list): - # self.all_weights.extend(list(weights)) - # - # else: - # self.all_weights.append(weights) - # - # self.all_weights = list_remove_repeat(self.all_weights) - - # @protected_method - # def _add_dropout_layers(self, drop_layers): - # if isinstance(drop_layers, dict) or isinstance(drop_layers, list): - # self.all_drop.update(dict(drop_layers)) - # - # elif isinstance(drop_layers, tuple): - # self.all_drop.update(list(drop_layers)) - # - # else: - # raise ValueError() - - ''' - # FIXME: may not be necessary ??? Hao: I think it is not necessary.. - @private_method - def _apply_activation(self, logits, **kwargs): - if not kwargs: - kwargs = {} - return self.act(logits, **kwargs) if self.act is not None else logits - - # TODO: may need update - ''' - @private_method - def _argument_dict_checkup(self, args): - - if not isinstance(args, dict) and args is not None: - raise AssertionError( - "One of the argument given to %s should be formatted as a dictionary" % self.__class__.__name__ - ) - - return args if args is not None else {} - - # def __getstate__(self): # pickle save - # return {'version': 0.1, - # # 'outputs': self.outputs, - # } - # - # def __setstate__(self, state): # pickle restore - # self.outputs = state['outputs'] - - ## raise Exceptions for old version codes - ''' - def count_params(self, **kwargs): - raise Exception("please change count_params --> count_weights") - - def print_params(self, **kwargs): - raise Exception("please change print_params --> print_weights") - - @property - def all_params(self): - raise Exception("please change all_params --> weights") - ''' - - -class ModelLayer(Layer): - # TODO: documentation - ''' - Documentation pending - ''' - - def __init__(self, model): - super(ModelLayer, self).__init__(name="%s_layer" % model.name) - - self.model = model - - # Layer input outputs - # FIXME: model.inputs can be a list - self.inputs = model.inputs.outputs - # FIXME: model.outputs can be a list - self.outputs = model.forward(self.inputs) - - self._input_layer = model.inputs - - # Layer building state - self._built = True - - # Layer weight state - self._weights = model.weights - - # Layer training state - self.is_train = True - - logging.info( - "ModelLayer %s from Model: %s" % - (self.name, self.model.name) - ) - - def build(self, inputs_shape): - pass - - def forward(self, inputs): - return self.model.forward(inputs) - - def _set_mode_for_layers(self, is_train): - self.is_train = is_train - return self.model._set_mode_for_layers(is_train) - - def _release_memory(self): - ''' - WARINING: This function should be called with great caution. - - self.inputs and self.outputs will be set as None but not deleted. - - ''' - super(ModelLayer, self)._release_memory() - self.model.release_memory() - -''' -class SequentialLayer(Layer): - - - def __init__(self, prev_layer, following_layers, name=None): - - super(SequentialLayer, self).__init__(name=name) - - # Layer input outputs - self.inputs = prev_layer.outputs - self._input_layer = prev_layer - - # Layer weight state - self._weights = list() - - # TODO: check type of following layers - self.following_layer = list() - in_layer = prev_layer - for layer in following_layers: - nlayer = layer(in_layer) - self.following_layer.append(nlayer) - self._weights.extend(nlayer.weights) - in_layer = nlayer - - self.outputs = self.forward(self.inputs) - - # Layer building state - self._built = True - - logging.info( - "SequentialLayer %s including layers [%s]" % - (self.name, ', '.join([layer.name for layer in self.following_layer])) - ) - - def build(self, inputs_shape): - pass - - def forward(self, inputs): - z = inputs - for layer in self.following_layer: - z = layer.forward(z) - - return z -''' - - -class LayerList(Layer): - # TODO: documentation - ''' - Documentation pending - ''' - def __init__(self, layers:list, name=None): - super(LayerList, self).__init__(name=name) - self.layers = layers - - is_built = True - for layer in self.layers: - if layer._built == False: - is_built = False - if layer._built == True and layer.weights is not None: - # some layers in the list passed in have already been built - # e.g. using input shape to construct layers in dynamic eager - if self._weights == None: - self._weights = list() - self._weights.extend(layer.weights) - if is_built == True: - self._built = True - - logging.info( - "LayerList %s including layers [%s]" % - (self.name, ', '.join([layer.name for layer in self.layers])) - ) - - def __getitem__(self, idx): - if isinstance(idx, slice): - return LayerList(list(self.layers)[idx]) - else: - return self.layers[idx] - - def __len__(self): - return len(self.layers) - - def __repr__(self): - tmpstr = 'LayerList' + '(\n' - for idx, layer in enumerate(self.layers): - modstr = layer.__repr__() - modstr = _addindent(modstr, 2) - tmpstr = tmpstr + ' (' + str(idx) + '): ' + modstr + '\n' - - tmpstr = tmpstr + ')' - return tmpstr - - def build(self, inputs_shape): - in_layer = self._input_layer - for layer in self.layers: - is_build = layer._built - nlayer = layer(in_layer) - if is_build == False and layer.weights is not None: - if self._weights == None: - self._weights = list() - self._weights.extend(layer.weights) - layer._built = True - in_layer = nlayer - - def forward(self, inputs): - z = inputs - for layer in self.layers: - z = layer.forward(z) - return z - - def _set_mode_for_layers(self, is_train): - self.is_train = is_train - for layer in self.layers: - if isinstance(layer, ModelLayer): - layer._set_mode_for_layers(is_train) - elif isinstance(layer, LayerList): - layer._set_mode_for_layers(is_train) - else: - layer.is_train = is_train - - def _release_memory(self): - ''' - WARINING: This function should be called with great caution. - - self.inputs and self.outputs will be set as None but not deleted. - - ''' - super(LayerList, self)._release_memory() - for layer in self.layers: - layer._release_memory() - - - -# if __name__ == '__main__': -# -# from tensorlayer.layers import Input, Dense, Dropout, LayerList -# from tensorlayer.models import Model -# -# class mynet(Model): -# -# def __init__(self): -# super(mynet, self).__init__() -# -# self.layers = LayerList([ -# Input([None, 784]), -# Dropout(keep=0.8), -# Dense(n_units=800, act=tf.nn.relu, in_channels=784), -# Dense(n_units=800, act=tf.nn.relu, in_channels=800) -# ]) -# -# def forward(self, x): -# z = x -# for i in range(3): -# z = self.layers[i](z) -# return z -# -# def get_model(inputs_shape): -# ni = Input(inputs_shape) -# nn = LayerList([ -# Dropout(keep=0.8), -# Dense(n_units=800, act=tf.nn.relu), -# Dropout(keep=0.8), -# Dense(n_units=800, act=tf.nn.relu) -# ])(ni) -# -# M = Model(inputs=ni, outputs=nn) -# -# return M -# -# #net = mynet() -# net = get_model([None, 784]) -# print(net.weights) -# print(net.layer_dict['layerlist']._built) diff --git a/tensorlayer/layers/dense/__init__.py b/tensorlayer/layers/dense/__init__.py deleted file mode 100644 index 675559e..0000000 --- a/tensorlayer/layers/dense/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- -""" -TensorLayer provides rich layer implementations trailed for -various benchmarks and domain-specific problems. In addition, we also -support transparent access to native TensorFlow parameters. -For example, we provide not only layers for local response normalization, but also -layers that allow user to apply ``tf.nn.lrn`` on ``network.outputs``. -More functions can be found in `TensorFlow API `__. -""" - -from .base_dense import * -from .binary_dense import * -from .dorefa_dense import * -from .dropconnect import * -from .ternary_dense import * -from .quan_dense import * -from .quan_dense_bn import * - -__all__ = [ - 'BinaryDense', - 'Dense', - 'DorefaDense', - 'DropconnectDense', - 'TernaryDense', - 'QuanDense', - 'QuanDenseLayerWithBN', -] diff --git a/tensorlayer/layers/dense/base_dense.py b/tensorlayer/layers/dense/base_dense.py deleted file mode 100644 index 832e60f..0000000 --- a/tensorlayer/layers/dense/base_dense.py +++ /dev/null @@ -1,159 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import numpy as np -import tensorflow as tf -import tensorlayer as tl - -from tensorlayer.layers.core import Layer -# from tensorlayer.layers.core import LayersConfig - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -__all__ = [ - 'Dense', -] - - -class Dense(Layer): - # FIXME: documentation update needed - """The :class:`Dense` class is a fully connected layer. - - Parameters - ---------- - n_units : int - The number of units of this layer. - act : activation function - The activation function of this layer. - W_init : initializer - The initializer for the weight matrix. - b_init : initializer or None - The initializer for the bias vector. If None, skip biases. - W_init_args : dictionary - The arguments for the weight matrix initializer. - b_init_args : dictionary - The arguments for the bias vector initializer. - in_channels - - name : None or str - A unique layer name. - - Examples - -------- - With TensorLayer - - >>> net = tl.layers.Input(x, name='input') - >>> net = tl.layers.Dense(net, 800, act=tf.nn.relu, name='relu') - - Without native TensorLayer APIs, you can do as follow. - - >>> W = tf.Variable( - ... tf.random_uniform([n_in, n_units], -1.0, 1.0), name='W') - >>> b = tf.Variable(tf.zeros(shape=[n_units]), name='b') - >>> y = tf.nn.relu(tf.matmul(inputs, W) + b) - - Notes - ----- - If the layer input has more than two axes, it needs to be flatten by using :class:`Flatten`. - - """ - - def __init__( - self, - n_units, - act=None, - W_init=tl.initializers.truncated_normal(stddev=0.1), - b_init=tl.initializers.constant(value=0.0), - # W_init=tf.compat.v1.truncated_normal_initializer(stddev=0.1), - # b_init=tf.constant_initializer(value=0.0), - # W_init=tf.compat.v1.initializers.truncated_normal, - # b_init=tf.compat.v1.initializers.constant, - # W_init_args={'stddev': 0.1}, - # b_init_args=None, - in_channels=None, - name=None, # 'dense', - ): - - # super(Dense, self - # ).__init__(prev_layer=prev_layer, act=act, W_init_args=W_init_args, b_init_args=b_init_args, name=name) - super(Dense, self).__init__(name) - - self.n_units = n_units - self.act = act - self.W_init = W_init - self.b_init = b_init - self.in_channels = in_channels - - if self.in_channels is not None: - self.build(self.in_channels) - self._built = True - # self.W_init_args = W_init_args - # self.b_init_args = b_init_args - - # self.n_in = int(self.inputs.get_shape()[-1]) - # self.inputs_shape = self.inputs.shape.as_list() # - # self.outputs_shape = [self.inputs_shape[0], n_units] - - logging.info( - "Dense %s: %d %s" % - (self.name, self.n_units, self.act.__name__ if self.act is not None else 'No Activation') - ) - - def __repr__(self): - actstr = self.act.__name__ if self.act is not None else 'No Activation' - s = ('{classname}(n_units={n_units}, ' + actstr) - if self.name is not None: - s += ', name=\'{name}\'' - s += ')' - return s.format(classname=self.__class__.__name__, **self.__dict__) - - ''' - def build(self, inputs): - self.W = tf.get_variable( - name='W', shape=(self.n_in, self.n_units), initializer=self.W_init, dtype=LayersConfig.tf_dtype, - **self.W_init_args - ) - if self.b_init is not None: - try: - self.b = tf.get_variable( - name='b', shape=(self.n_units), initializer=self.b_init, dtype=LayersConfig.tf_dtype, - **self.b_init_args - ) - except Exception: # If initializer is a constant, do not specify shape. - self.b = tf.get_variable( - name='b', initializer=self.b_init, dtype=LayersConfig.tf_dtype, **self.b_init_args - ) - self.get_weights(self.W, self.b) - ''' - - def build(self, inputs_shape): - if self.in_channels is None and len(inputs_shape) != 2: - raise AssertionError("The input dimension must be rank 2, please reshape or flatten it") - if self.in_channels: - shape = [self.in_channels, self.n_units] - else: - shape = [inputs_shape[1], self.n_units] - self.W = self._get_weights("weights", shape=tuple(shape), init=self.W_init) - if self.b_init: - self.b = self._get_weights("biases", shape=(self.n_units, ), init=self.b_init) - # outputs_shape = [inputs_shape[0], self.n_units] - # return outputs_shape - - ''' - def forward(self, inputs, is_train): - outputs = tf.matmul(inputs, self.W) - if self.b_init is not None: - outputs = tf.add(z, self.b) - outputs = self.act(outputs) - return outputs - ''' - - def forward(self, inputs): - z = tf.matmul(inputs, self.W) - if self.b_init: - z = tf.add(z, self.b) - if self.act: - z = self.act(z) - return z diff --git a/tensorlayer/layers/dense/binary_dense.py b/tensorlayer/layers/dense/binary_dense.py deleted file mode 100644 index 695c683..0000000 --- a/tensorlayer/layers/dense/binary_dense.py +++ /dev/null @@ -1,116 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf - -from tensorlayer.layers.core import Layer -# from tensorlayer.layers.core import LayersConfig - -from tensorlayer.layers.utils import quantize - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -__all__ = [ - 'BinaryDense', -] - - -class BinaryDense(Layer): - """The :class:`BinaryDense` class is a binary fully connected layer, which weights are either -1 or 1 while inferencing. - - Note that, the bias vector would not be binarized. - - Parameters - ---------- - n_units : int - The number of units of this layer. - act : activation function - The activation function of this layer, usually set to ``tf.act.sign`` or apply :class:`Sign` after :class:`BatchNorm`. - use_gemm : boolean - If True, use gemm instead of ``tf.matmul`` for inference. (TODO). - W_init : initializer - The initializer for the weight matrix. - b_init : initializer or None - The initializer for the bias vector. If None, skip biases. - W_init_args : dictionary - The arguments for the weight matrix initializer. - b_init_args : dictionary - The arguments for the bias vector initializer. - name : None or str - A unique layer name. - - """ - - def __init__( - self, - n_units=100, - act=None, - use_gemm=False, - W_init=tf.compat.v1.initializers.truncated_normal(stddev=0.1), - b_init=tf.compat.v1.initializers.constant(value=0.0), - W_init_args=None, - b_init_args=None, - name=None, #'binary_dense', - ): - # super(BinaryDense, self - # ).__init__(prev_layer=prev_layer, act=act, W_init_args=W_init_args, b_init_args=b_init_args, name=name) - super().__init__(name) - self.n_units = n_units - self.act = act - self.use_gemm = use_gemm - self.W_init = W_init - self.b_init = b_init - self.W_init_args = W_init_args - self.b_init_args = b_init_args - logging.info( - "BinaryDense %s: %d %s" % - (self.name, n_units, self.act.__name__ if self.act is not None else 'No Activation') - ) - - def build(self, inputs_shape): - # if inputs.get_shape().ndims != 2: - if len(inputs_shape) != 2: - raise Exception("The input dimension must be rank 2, please reshape or flatten it") - - if self.use_gemm: - raise Exception("TODO. The current version use tf.matmul for inferencing.") - - n_in = inputs_shape[-1] - - self.W = self._get_weights("weights", shape=tuple(shape), init=self.W_init, init_args=self.W_init_args) - # self.W = tf.compat.v1.get_variable( - # name=self.name + '\W', shape=(n_in, self.n_units), initializer=self.W_init, dtype=LayersConfig.tf_dtype, - # **self.W_init_args - # ) - if self.b_init is not None: - self.b = self._get_weights("biases", shape=(self.n_units), init=self.b_init, init_args=self.b_init_args) - # try: - # self.b = tf.compat.v1.get_variable( - # name=self.name + '\b', shape=(self.n_units), initializer=self.b_init, dtype=LayersConfig.tf_dtype, - # **self.b_init_args - # ) - # - # except Exception: # If initializer is a constant, do not specify shape. - # self.b = tf.compat.v1.get_variable( - # name=self.name + '\b', initializer=self.b_init, dtype=LayersConfig.tf_dtype, **self.b_init_args - # ) - # self.get_weights([self.W, self.b]) - # else: - # self.get_weights(self.W) - - def forward(self, inputs): - # W = tl.act.sign(W) # dont update ... - W_ = quantize(self.W) - # W = tf.Variable(W) - - outputs = tf.matmul(inputs, W_) - # self.outputs = xnor_gemm(self.inputs, W) # TODO - - if self.b_init is not None: - outputs = tf.nn.bias_add(outputs, self.b, name='bias_add') - - if self.act: - outputs = self.act(outputs) - return outputs diff --git a/tensorlayer/layers/dense/dorefa_dense.py b/tensorlayer/layers/dense/dorefa_dense.py deleted file mode 100644 index d351348..0000000 --- a/tensorlayer/layers/dense/dorefa_dense.py +++ /dev/null @@ -1,121 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf - -from tensorlayer.layers.core import Layer -# from tensorlayer.layers.core import LayersConfig - -from tensorlayer.layers.utils import cabs -from tensorlayer.layers.utils import quantize_active -from tensorlayer.layers.utils import quantize_weight - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -__all__ = [ - 'DorefaDense', -] - - -class DorefaDense(Layer): - """The :class:`DorefaDense` class is a binary fully connected layer, which weights are 'bitW' bits and the output of the previous layer - are 'bitA' bits while inferencing. - - Note that, the bias vector would not be binarized. - - Parameters - ---------- - bitW : int - The bits of this layer's parameter - bitA : int - The bits of the output of previous layer - n_units : int - The number of units of this layer. - act : activation function - The activation function of this layer, usually set to ``tf.act.sign`` or apply :class:`Sign` after :class:`BatchNorm`. - use_gemm : boolean - If True, use gemm instead of ``tf.matmul`` for inferencing. (TODO). - W_init : initializer - The initializer for the weight matrix. - b_init : initializer or None - The initializer for the bias vector. If None, skip biases. - W_init_args : dictionary - The arguments for the weight matrix initializer. - b_init_args : dictionary - The arguments for the bias vector initializer. - name : a str - A unique layer name. - - """ - - def __init__( - self, - bitW=1, - bitA=3, - n_units=100, - act=None, - use_gemm=False, - W_init=tf.compat.v1.initializers.truncated_normal(stddev=0.1), - b_init=tf.compat.v1.initializers.constant(value=0.0), - W_init_args=None, - b_init_args=None, - name=None, #'dorefa_dense', - ): - # super(DorefaDense, self - # ).__init__(prev_layer=prev_layer, act=act, W_init_args=W_init_args, b_init_args=b_init_args, name=name) - super().__init__(name) - self.bitW = bitW - self.bitA = bitA - self.n_units = n_units - self.act = act - self.use_gemm = use_gemm - self.W_init = W_init - self.b_init = b_init - self.W_init_args = W_init_args - self.b_init_args = b_init_args - logging.info( - "DorefaDense %s: %d %s" % - (self.name, n_units, self.act.__name__ if self.act is not None else 'No Activation') - ) - - def build(self, inputs_shape): - if len(inputs_shape) != 2: - raise Exception("The input dimension must be rank 2, please reshape or flatten it") - if self.use_gemm: - raise Exception("TODO. The current version use tf.matmul for inferencing.") - - n_in = inputs_shape[-1] - self.W = self._get_weights("weights", shape=(n_in, self.n_units), init=self.W_init, init_args=self.W_init_args) - # self.W = tf.compat.v1.get_variable( - # name=self.name + '\W', shape=(n_in, self.n_units), initializer=self.W_init, dtype=LayersConfig.tf_dtype, - # **self.W_init_args - # ) - if self.b_init is not None: - self.b = self._get_weights("biases", shape=(self.n_units), init=self.b_init, init_args=self.b_init_args) - # try: - # self.b = tf.compat.v1.get_variable( - # name=self.name + '\b', shape=(self.n_units), initializer=self.b_init, dtype=LayersConfig.tf_dtype, - # **self.b_init_args - # ) - # - # except Exception: # If initializer is a constant, do not specify shape. - # self.b = tf.compat.v1.get_variable( - # name=self.name + '\b', initializer=self.b_init, dtype=LayersConfig.tf_dtype, **self.b_init_args - # ) - # self.get_weights([self.W, self.b]) - # else: - # self.get_weights(self.W) - - def forward(self, inputs): - inputs = quantize_active(cabs(inputs), self.bitA) - W_ = quantize_weight(self.W, self.bitW) - outputs = tf.matmul(inputs, W_) - # self.outputs = xnor_gemm(self.inputs, W) # TODO - if self.b_init is not None: - outputs = tf.nn.bias_add(outputs, self.b, name='bias_add') - # self.outputs = xnor_gemm(self.inputs, W) + b # TODO - if self.act: - outputs = self.act(outputs) - return outputs diff --git a/tensorlayer/layers/dense/dropconnect.py b/tensorlayer/layers/dense/dropconnect.py deleted file mode 100644 index 2a7f0ea..0000000 --- a/tensorlayer/layers/dense/dropconnect.py +++ /dev/null @@ -1,115 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf - -from tensorlayer.layers.core import Layer -# from tensorlayer.layers.core import LayersConfig - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -__all__ = [ - 'DropconnectDense', -] - - -class DropconnectDense(Layer): - """ - The :class:`DropconnectDense` class is :class:`Dense` with DropConnect - behaviour which randomly removes connections between this layer and the previous - layer according to a keeping probability. - - Parameters - ---------- - keep : float - The keeping probability. - The lower the probability it is, the more activations are set to zero. - n_units : int - The number of units of this layer. - act : activation function - The activation function of this layer. - W_init : weights initializer - The initializer for the weight matrix. - b_init : biases initializer - The initializer for the bias vector. - W_init_args : dictionary - The arguments for the weight matrix initializer. - b_init_args : dictionary - The arguments for the bias vector initializer. - name : str - A unique layer name. - - Examples - -------- - >>> net = tl.layers.Input(x, name='input') - >>> net = tl.layers.DropconnectDense(net, keep=0.8, - ... n_units=800, act=tf.nn.relu, name='relu1') - >>> net = tl.layers.DropconnectDense(net, keep=0.5, - ... n_units=800, act=tf.nn.relu, name='relu2') - >>> net = tl.layers.DropconnectDense(net, keep=0.5, - ... n_units=10, name='output') - - References - ---------- - - `Wan, L. (2013). Regularization of neural networks using dropconnect `__ - - """ - - def __init__( - self, - keep=0.5, - n_units=100, - act=None, - W_init=tf.compat.v1.initializers.truncated_normal(stddev=0.1), - b_init=tf.compat.v1.initializers.constant(value=0.0), - W_init_args=None, - b_init_args=None, - name=None, # 'dropconnect', - ): - # super(DropconnectDense, self - # ).__init__(prev_layer=prev_layer, act=act, W_init_args=W_init_args, b_init_args=b_init_args, name=name) - super().__init__(name) - self.keep = keep - self.n_units = n_units - self.act = act - self.W_init = W_init - self.b_init = b_init - self.W_init_args = W_init_args - self.b_init_args = b_init_args - - logging.info( - "DropconnectDense %s: %d %s" % - (self.name, n_units, self.act.__name__ if self.act is not None else 'No Activation') - ) - - def build(self, inputs_shape): - - if len(inputs_shape) != 2: - raise Exception("The input dimension must be rank 2") - - self.n_in = inputs_shape[-1] - - self.W = self._get_weights("weights", shape=(n_in, self.n_units), init=self.W_init, init_args=self.W_init_args) - # self.W = tf.compat.v1.get_variable( - # name=self.name + '\W', shape=(self.n_in, self.n_units), initializer=self.W_init, - # dtype=LayersConfig.tf_dtype, **self.W_init_args - # ) - if self.b_init: - self.b = self._get_weights("biases", shape=(self.n_units), init=self.b_init, init_args=self.b_init_args) - # self.b = tf.compat.v1.get_variable( - # name=self.name + '\b', shape=(self.n_units), initializer=self.b_init, dtype=LayersConfig.tf_dtype, - # **self.b_init_args - # ) - # self.get_weights([self.W, self.b]) - # else: - # self.get_weights(self.W) - - def forward(self, inputs): - W_dropcon = tf.nn.dropout(self.W, 1 - (self.keep)) - outputs = tf.matmul(inputs, W_dropcon) - if self.b_init: - outputs = tf.nn.bias_add(outputs, self.b, name='bias_add') - outputs = self.act(outputs) - return outputs diff --git a/tensorlayer/layers/dense/quan_dense.py b/tensorlayer/layers/dense/quan_dense.py deleted file mode 100644 index 0a38eac..0000000 --- a/tensorlayer/layers/dense/quan_dense.py +++ /dev/null @@ -1,123 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf - -from tensorlayer.layers.core import Layer -# from tensorlayer.layers.core import LayersConfig - -from tensorlayer.layers.utils import quantize_active_overflow -from tensorlayer.layers.utils import quantize_weight_overflow - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -__all__ = [ - 'QuanDense', -] - - -class QuanDense(Layer): - """The :class:`QuanDense` class is a quantized fully connected layer with BN, which weights are 'bitW' bits and the output of the previous layer - are 'bitA' bits while inferencing. - - Parameters - ---------- - n_units : int - The number of units of this layer. - act : activation function - The activation function of this layer. - bitW : int - The bits of this layer's parameter - bitA : int - The bits of the output of previous layer - use_gemm : boolean - If True, use gemm instead of ``tf.matmul`` for inference. (TODO). - W_init : initializer - The initializer for the weight matrix. - b_init : initializer or None - The initializer for the bias vector. If None, skip biases. - W_init_args : dictionary - The arguments for the weight matrix initializer. - b_init_args : dictionary - The arguments for the bias vector initializer. - name : None or str - A unique layer name. - - """ - - def __init__( - self, - n_units=100, - act=None, - bitW=8, - bitA=8, - use_gemm=False, - W_init=tf.compat.v1.initializers.truncated_normal(stddev=0.1), - b_init=tf.compat.v1.initializers.constant(value=0.0), - W_init_args=None, - b_init_args=None, - name=None, #'quan_dense', - ): - # super(QuanDense, self - # ).__init__(prev_layer=prev_layer, act=act, W_init_args=W_init_args, b_init_args=b_init_args, name=name) - super().__init__(name) - self.n_units = n_units - self.act = act - self.bitW = bitW - self.bitA = bitA - self.use_gemm = use_gemm - self.W_init = W_init - self.b_init = b_init - self.W_init_args = W_init_args - self.b_init_args = b_init_args - logging.info( - "QuanDense %s: %d %s" % - (self.name, n_units, self.act.__name__ if self.act is not None else 'No Activation') - ) - - def build(self, inputs_shape): - # if inputs.get_shape().ndims != 2: - if len(inputs_shape) != 2: - raise Exception("The input dimension must be rank 2, please reshape or flatten it") - - if self.use_gemm: - raise Exception("TODO. The current version use tf.matmul for inferencing.") - - n_in = inputs_shape[-1] - - # self.W = tf.compat.v1.get_variable( - # name=self.name + '\W', shape=(n_in, self.n_units), initializer=self.W_init, dtype=LayersConfig.tf_dtype, - # **self.W_init_args - # ) - self.W = self._get_weights("weights", shape=(n_in, self.n_units), init=self.W_init, init_args=self.W_init_args) - if self.b_init is not None: - self.b = self._get_weights("biases", shape=int(self.n_units), init=self.b_init, init_args=self.b_init_args) - # try: - # self.b = tf.compat.v1.get_variable( - # name=self.name + '\b', shape=(self.n_units), initializer=self.b_init, dtype=LayersConfig.tf_dtype, - # **self.b_init_args - # ) - # except Exception: # If initializer is a constant, do not specify shape. - # self.b = tf.compat.v1.get_variable( - # name=self.name + '\b', initializer=self.b_init, dtype=LayersConfig.tf_dtype, **self.b_init_args - # ) - # self.get_weights([self.W, self.b]) - # else: - # self.get_weights(self.W) - - def forward(self, inputs): - - inputs = quantize_active_overflow(inputs, self.bitA) - - W_ = quantize_weight_overflow(self.W, self.bitW) - - # outputs = tf.matmul(inputs, self.W) - outputs = tf.matmul(inputs, W_) # hao dong change to this - - if self.b_init is not None: - outputs = tf.nn.bias_add(outputs, self.b, name='bias_add') - if self.act: - outputs = self.act(outputs) - return outputs diff --git a/tensorlayer/layers/dense/quan_dense_bn.py b/tensorlayer/layers/dense/quan_dense_bn.py deleted file mode 100644 index f120f0a..0000000 --- a/tensorlayer/layers/dense/quan_dense_bn.py +++ /dev/null @@ -1,192 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf - -from tensorlayer.layers.core import Layer -# from tensorlayer.layers.core import LayersConfig -from tensorflow.python.training import moving_averages - -from tensorlayer.layers.utils import quantize_active_overflow -from tensorlayer.layers.utils import quantize_weight_overflow - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -__all__ = [ - 'QuanDenseLayerWithBN', -] - - -class QuanDenseLayerWithBN(Layer): - """The :class:`QuanDenseLayerWithBN` class is a quantized fully connected layer with BN, which weights are 'bitW' bits and the output of the previous layer - are 'bitA' bits while inferencing. - - Parameters - ---------- - prev_layer : :class:`Layer` - Previous layer. - n_units : int - The number of units of this layer. - act : activation function - The activation function of this layer. - decay : float - A decay factor for `ExponentialMovingAverage`. - Suggest to use a large value for large dataset. - epsilon : float - Eplison. - is_train : boolean - Is being used for training or inference. - beta_init : initializer or None - The initializer for initializing beta, if None, skip beta. - Usually you should not skip beta unless you know what happened. - gamma_init : initializer or None - The initializer for initializing gamma, if None, skip gamma. - bitW : int - The bits of this layer's parameter - bitA : int - The bits of the output of previous layer - decay : float - A decay factor for `ExponentialMovingAverage`. - Suggest to use a large value for large dataset. - epsilon : float - Eplison. - is_train : boolean - Is being used for training or inference. - beta_init : initializer or None - The initializer for initializing beta, if None, skip beta. - Usually you should not skip beta unless you know what happened. - gamma_init : initializer or None - The initializer for initializing gamma, if None, skip gamma. - use_gemm : boolean - If True, use gemm instead of ``tf.matmul`` for inferencing. (TODO). - W_init : initializer - The initializer for the the weight matrix. - W_init_args : dictionary - The arguments for the weight matrix initializer. - name : a str - A unique layer name. - - """ - - @deprecated_alias(layer='prev_layer', end_support_version=1.9) # TODO remove this line for the 1.9 release - def __init__( - self, - prev_layer, - n_units=100, - act=None, - decay=0.9, - epsilon=1e-5, - is_train=False, - bitW=8, - bitA=8, - gamma_init=tf.compat.v1.initializers.ones, - beta_init=tf.compat.v1.initializers.zeros, - use_gemm=False, - W_init=tf.compat.v1.initializers.truncated_normal(stddev=0.1), - W_init_args=None, - name=None, #'quan_dense_with_bn', - ): - super(QuanDenseLayerWithBN, self).__init__(prev_layer=prev_layer, act=act, W_init_args=W_init_args, name=name) - - logging.info( - "QuanDenseLayerWithBN %s: %d %s" % - (self.name, n_units, self.act.__name__ if self.act is not None else 'No Activation') - ) - - if self.inputs.get_shape().ndims != 2: - raise Exception("The input dimension must be rank 2, please reshape or flatten it") - - if use_gemm: - raise Exception("TODO. The current version use tf.matmul for inferencing.") - - n_in = int(self.inputs.get_shape()[-1]) - x = self.inputs - self.inputs = quantize_active_overflow(self.inputs, bitA) - self.n_units = n_units - - with tf.compat.v1.variable_scope(name): - - W = tf.compat.v1.get_variable( - name='W', shape=(n_in, n_units), initializer=W_init, dtype=LayersConfig.tf_dtype, **self.W_init_args - ) - - mid_out = tf.matmul(x, W) - - para_bn_shape = mid_out.get_shape()[-1:] - - if gamma_init: - scale_para = tf.compat.v1.get_variable( - name='scale_para', shape=para_bn_shape, initializer=gamma_init, dtype=LayersConfig.tf_dtype, - trainable=is_train - ) - else: - scale_para = None - - if beta_init: - offset_para = tf.compat.v1.get_variable( - name='offset_para', shape=para_bn_shape, initializer=beta_init, dtype=LayersConfig.tf_dtype, - trainable=is_train - ) - else: - offset_para = None - - moving_mean = tf.compat.v1.get_variable( - 'moving_mean', para_bn_shape, initializer=tf.compat.v1.initializers.constant(1.), - dtype=LayersConfig.tf_dtype, trainable=False - ) - - moving_variance = tf.compat.v1.get_variable( - 'moving_variance', - para_bn_shape, - initializer=tf.compat.v1.initializers.constant(1.), - dtype=LayersConfig.tf_dtype, - trainable=False, - ) - - mean, variance = tf.nn.moments(x=mid_out, axes=list(range(len(mid_out.get_shape()) - 1))) - - update_moving_mean = moving_averages.assign_moving_average( - moving_mean, mean, decay, zero_debias=False - ) # if zero_debias=True, has bias - - update_moving_variance = moving_averages.assign_moving_average( - moving_variance, variance, decay, zero_debias=False - ) # if zero_debias=True, has bias - - def mean_var_with_update(): - with tf.control_dependencies([update_moving_mean, update_moving_variance]): - return tf.identity(mean), tf.identity(variance) - - if is_train: - mean, var = mean_var_with_update() - else: - mean, var = moving_mean, moving_variance - - w_fold = _w_fold(W, scale_para, var, epsilon) - bias_fold = _bias_fold(offset_para, scale_para, mean, var, epsilon) - - W = quantize_weight_overflow(w_fold, bitW) - # W = tl.act.sign(W) # dont update ... - - # W = tf.Variable(W) - - self.outputs = tf.matmul(self.inputs, W) - # self.outputs = xnor_gemm(self.inputs, W) # TODO - - self.outputs = tf.nn.bias_add(self.outputs, bias_fold, name='bias_add') - - self.outputs = self._apply_activation(self.outputs) - - self._add_layers(self.outputs) - - self._add_params([W, scale_para, offset_para, moving_mean, moving_variance]) - - -def _w_fold(w, gama, var, epsilon): - return tf.compat.v1.div(tf.multiply(gama, w), tf.sqrt(var + epsilon)) - - -def _bias_fold(beta, gama, mean, var, epsilon): - return tf.subtract(beta, tf.compat.v1.div(tf.multiply(gama, mean), tf.sqrt(var + epsilon))) diff --git a/tensorlayer/layers/dense/ternary_dense.py b/tensorlayer/layers/dense/ternary_dense.py deleted file mode 100644 index 88bded3..0000000 --- a/tensorlayer/layers/dense/ternary_dense.py +++ /dev/null @@ -1,123 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf - -from tensorlayer.layers.core import Layer -# from tensorlayer.layers.core import LayersConfig - -from tensorlayer.layers.utils import compute_alpha -from tensorlayer.layers.utils import ternary_operation - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -__all__ = [ - 'TernaryDense', -] - - -class TernaryDense(Layer): - """The :class:`TernaryDense` class is a ternary fully connected layer, which weights are either -1 or 1 or 0 while inference. - - Note that, the bias vector would not be tenaried. - - Parameters - ---------- - n_units : int - The number of units of this layer. - act : activation function - The activation function of this layer, usually set to ``tf.act.sign`` or apply :class:`SignLayer` after :class:`BatchNormLayer`. - use_gemm : boolean - If True, use gemm instead of ``tf.matmul`` for inference. (TODO). - W_init : initializer - The initializer for the weight matrix. - b_init : initializer or None - The initializer for the bias vector. If None, skip biases. - W_init_args : dictionary - The arguments for the weight matrix initializer. - b_init_args : dictionary - The arguments for the bias vector initializer. - name : None or str - A unique layer name. - - """ - - def __init__( - self, - n_units=100, - act=None, - use_gemm=False, - W_init=tf.compat.v1.initializers.truncated_normal(stddev=0.1), - b_init=tf.compat.v1.initializers.constant(value=0.0), - W_init_args=None, - b_init_args=None, - name=None, #'ternary_dense', - ): - # super(TernaryDense, self - # ).__init__(prev_layer=prev_layer, act=act, W_init_args=W_init_args, b_init_args=b_init_args, name=name) - super().__init__(name) - self.n_units = n_units - self.act = act - self.use_gemm = use_gemm - self.W_init = W_init - self.b_init = b_init - self.W_init_args = W_init_args - self.b_init_args = b_init_args - logging.info( - "TernaryDense %s: %d %s" % - (self.name, n_units, self.act.__name__ if self.act is not None else 'No Activation') - ) - - def build(self, inputs_shape): - # if inputs.get_shape().ndims != 2: - if len(inputs_shape) != 2: - raise Exception("The input dimension must be rank 2, please reshape or flatten it") - - if self.use_gemm: - raise Exception("TODO. The current version use tf.matmul for inferencing.") - - n_in = inputs_shape[-1] - - # self.W = tf.compat.v1.get_variable( - # name=self.name + '\W', shape=(n_in, self.n_units), initializer=self.W_init, dtype=LayersConfig.tf_dtype, - # **self.W_init_args - # ) - self.W = self._get_weights( - scope_name=self.name, var_name="weights", shape=(n_in, self.n_units), init=self.W_init, - init_args=self.W_init_args - ) - if self.b_init is not None: - self.b = self._get_weights( - scope_name=self.name, var_name="biases", shape=(self.n_units), init=self.b_init, - init_args=self.b_init_args - ) - # try: - # self.b = tf.compat.v1.get_variable( - # name=self.name + '\b', shape=(self.n_units), initializer=self.b_init, dtype=LayersConfig.tf_dtype, - # **self.b_init_args - # ) - # except Exception: # If initializer is a constant, do not specify shape. - # self.b = tf.compat.v1.get_variable( - # name=self.name + '\b', initializer=self.b_init, dtype=LayersConfig.tf_dtype, **self.b_init_args - # ) - # self.get_weights([self.W, self.b]) - # else: - # self.get_weights(self.W) - - def forward(self, inputs): - # W = tl.act.sign(W) # dont update ... - alpha = compute_alpha(self.W) - W_ = ternary_operation(self.W) - W_ = tf.multiply(alpha, W_) - # W = tf.Variable(W) - - outputs = tf.matmul(inputs, W_) - # self.outputs = xnor_gemm(self.inputs, W) # TODO - - if self.b_init is not None: - outputs = tf.nn.bias_add(outputs, self.b, name='bias_add') - if self.act: - outputs = self.act(outputs) - return outputs diff --git a/tensorlayer/layers/deprecated.py b/tensorlayer/layers/deprecated.py deleted file mode 100644 index 21de4f2..0000000 --- a/tensorlayer/layers/deprecated.py +++ /dev/null @@ -1,383 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -__all__ = [] - - -class NonExistingLayerError(Exception): - pass - - -# activation.py -__all__ += [ - 'PReluLayer', - 'PRelu6Layer', - 'PTRelu6Layer', -] - -__log__ = '\n Hint: 1) downgrade TF and TL from version 2.x to 1.x. 2) check the documentation of TF and TL version 2.x' - -def PReluLayer(*args, **kwargs): - raise NonExistingLayerError("PReluLayer(net, name='a') --> PRelu(name='a')(net))"+__log__) - - -def PRelu6Layer(*args, **kwargs): - raise NonExistingLayerError("PRelu6Layer(net, name='a') --> PRelu6(name='a')(net))"+__log__) - - -def PTRelu6Layer(*args, **kwargs): - raise NonExistingLayerError("PTRelu6Layer(net, name='a') --> PTRelu(name='a')(net))"+__log__) - - -# convolution/atrous_conv.py -__all__ += [ - 'AtrousConv1dLayer', - 'AtrousConv2dLayer', - 'AtrousDeConv2dLayer', -] - - -def AtrousConv1dLayer(*args, **kwargs): - raise NonExistingLayerError("use `tl.layers.Conv1d` with dilation instead"+__log__) - - -def AtrousConv2dLayer(*args, **kwargs): - raise NonExistingLayerError("use `tl.layers.Conv2d` with dilation instead"+__log__) - - -def AtrousDeConv2dLayer(*args, **kwargs): - # raise NonExistingLayerError("AtrousDeConv2dLayer(net, name='a') --> AtrousDeConv2d(name='a')(net)") - raise NonExistingLayerError("use `tl.layers.DeConv2d` with dilation instead"+__log__) - -# dense/base_dense.py -__all__ += [ - 'DenseLayer', -] - - -def DenseLayer(*args, **kwargs): - raise NonExistingLayerError("DenseLayer(net, name='a') --> Dense(name='a')(net)"+__log__) - - -# dense/binary_dense.py -__all__ += [ - 'BinaryDenseLayer', -] - - -def BinaryDenseLayer(*args, **kwargs): - raise NonExistingLayerError("BinaryDenseLayer(net, name='a') --> BinaryDense(name='a')(net)"+__log__) - - -# dense/dorefa_dense.py -__all__ += [ - 'DorefaDenseLayer', -] - - -def DorefaDenseLayer(*args, **kwargs): - raise NonExistingLayerError("DorefaDenseLayer(net, name='a') --> DorefaDense(name='a')(net)"+__log__) - - -# dense/dropconnect.py -__all__ += [ - 'DropconnectDenseLayer', -] - - -def DropconnectDenseLayer(*args, **kwargs): - raise NonExistingLayerError("DropconnectDenseLayer(net, name='a') --> DropconnectDense(name='a')(net)"+__log__) - - -# dense/ternary_dense.py -__all__ += [ - 'TernaryDenseLayer', -] - - -def TernaryDenseLayer(*args, **kwargs): - raise NonExistingLayerError("TernaryDenseLayer(net, name='a') --> TernaryDense(name='a')(net)"+__log__) - - -# dropout.py -__all__ += [ - 'DropoutLayer', -] - - -def DropoutLayer(*args, **kwargs): - raise NonExistingLayerError("DropoutLayer(net, is_train=True, name='a') --> Dropout(name='a')(net, is_train=True)"+__log__) - - -# extend.py -__all__ += [ - 'ExpandDimsLayer', - 'TileLayer', -] - - -def ExpandDimsLayer(*args, **kwargs): - raise NonExistingLayerError("ExpandDimsLayer(net, name='a') --> ExpandDims(name='a')(net)"+__log__) - - -def TileLayer(*args, **kwargs): - raise NonExistingLayerError("TileLayer(net, name='a') --> Tile(name='a')(net)"+__log__) - - -# image_resampling.py -__all__ += [ - 'UpSampling2dLayer', - 'DownSampling2dLayer', -] - - -def UpSampling2dLayer(*args, **kwargs): - raise NonExistingLayerError("UpSampling2dLayer(net, name='a') --> UpSampling2d(name='a')(net)"+__log__) - - -def DownSampling2dLayer(*args, **kwargs): - raise NonExistingLayerError("DownSampling2dLayer(net, name='a') --> DownSampling2d(name='a')(net)"+__log__) - - -# importer.py -__all__ += [ - 'SlimNetsLayer', - 'KerasLayer', -] - - -def SlimNetsLayer(*args, **kwargs): - raise NonExistingLayerError("SlimNetsLayer(net, name='a') --> SlimNets(name='a')(net)"+__log__) - - -def KerasLayer(*args, **kwargs): - raise NonExistingLayerError("KerasLayer(net, name='a') --> Keras(name='a')(net)"+__log__) - - -# inputs.py -__all__ += [ - 'InputLayer', -] - - -def InputLayer(*args, **kwargs): - raise NonExistingLayerError("InputLayer(x, name='a') --> Input(name='a')(x)"+__log__) - -# embedding.py -__all__ += [ - 'OneHotInputLayer', - 'Word2vecEmbeddingInputlayer', - 'EmbeddingInputlayer', - 'AverageEmbeddingInputlayer', -] - - -def OneHotInputLayer(*args, **kwargs): - raise NonExistingLayerError("Not longer Input layer: OneHotInputLayer(x, name='a') --> OneHot(name='a')(layer)"+__log__) - - -def Word2vecEmbeddingInputlayer(*args, **kwargs): - raise NonExistingLayerError("Not longer Input layer: Word2vecEmbeddingInputlayer(x, name='a') --> Word2vecEmbedding(name='a')(layer)"+__log__) - - -def EmbeddingInputlayer(*args, **kwargs): - raise NonExistingLayerError("Not longer Input layer: EmbeddingInputlayer(x, name='a') --> Embedding(name='a')(layer)"+__log__) - - -def AverageEmbeddingInputlayer(*args, **kwargs): - raise NonExistingLayerError("Not longer Input layer: AverageEmbeddingInputlayer(x, name='a') --> AverageEmbedding(name='a')(layer)"+__log__) - - -# lambda.py -__all__ += [ - 'LambdaLayer', - 'ElementwiseLambdaLayer', -] - - -def LambdaLayer(*args, **kwargs): - raise NonExistingLayerError("LambdaLayer(x, lambda x: 2*x, name='a') --> Lambda(lambda x: 2*x, name='a')(x)"+__log__) - - -def ElementwiseLambdaLayer(*args, **kwargs): - raise NonExistingLayerError("ElementwiseLambdaLayer(x, ..., name='a') --> ElementwiseLambda(..., name='a')(x)"+__log__) - - -# merge.py -__all__ += [ - 'ConcatLayer', - 'ElementwiseLayer', -] - - -def ConcatLayer(*args, **kwargs): - raise NonExistingLayerError("ConcatLayer(x, ..., name='a') --> Concat(..., name='a')(x)"+__log__) - - -def ElementwiseLayer(*args, **kwargs): - raise NonExistingLayerError("ElementwiseLayer(x, ..., name='a') --> Elementwise(..., name='a')(x)"+__log__) - - -# noise.py -__all__ += [ - 'GaussianNoiseLayer', -] - - -def GaussianNoiseLayer(*args, **kwargs): - raise NonExistingLayerError("GaussianNoiseLayer(x, ..., name='a') --> GaussianNoise(..., name='a')(x)"+__log__) - - -# normalization.py -__all__ += [ - 'BatchNormLayer', - 'InstanceNormLayer', - 'LayerNormLayer', - 'LocalResponseNormLayer', - 'GroupNormLayer', - 'SwitchNormLayer', -] - - -def BatchNormLayer(*args, **kwargs): - raise NonExistingLayerError("BatchNormLayer(x, is_train=True, name='a') --> BatchNorm(name='a')(x, is_train=True)"+__log__) - - -def InstanceNormLayer(*args, **kwargs): - raise NonExistingLayerError("InstanceNormLayer(x, name='a') --> InstanceNorm(name='a')(x)"+__log__) - - -def LayerNormLayer(*args, **kwargs): - raise NonExistingLayerError("LayerNormLayer(x, name='a') --> LayerNorm(name='a')(x)"+__log__) - - -def LocalResponseNormLayer(*args, **kwargs): - raise NonExistingLayerError("LocalResponseNormLayer(x, name='a') --> LocalResponseNorm(name='a')(x)"+__log__) - - -def GroupNormLayer(*args, **kwargs): - raise NonExistingLayerError("GroupNormLayer(x, name='a') --> GroupNorm(name='a')(x)"+__log__) - - -def SwitchNormLayer(*args, **kwargs): - raise NonExistingLayerError("SwitchNormLayer(x, name='a') --> SwitchNorm(name='a')(x)"+__log__) - - -# quantize_layer.py -__all__ += [ - 'SignLayer', -] - - -def SignLayer(*args, **kwargs): - raise NonExistingLayerError("SignLayer(x, name='a') --> Sign(name='a')(x)"+__log__) - - -# recurrent/lstm_layers.py -__all__ += [ - 'ConvLSTMLayer', -] - - -def ConvLSTMLayer(*args, **kwargs): - raise NonExistingLayerError("ConvLSTMLayer(x, name='a') --> ConvLSTM(name='a')(x)"+__log__) - - -# recurrent/rnn_dynamic_layers.py -__all__ += [ - 'DynamicRNNLayer', - 'BiDynamicRNNLayer', -] - - -def DynamicRNNLayer(*args, **kwargs): - raise NonExistingLayerError( - "DynamicRNNLayer(x, is_train=True, name='a') --> DynamicRNN(name='a')(x, is_train=True)"+__log__ - ) - - -def BiDynamicRNNLayer(*args, **kwargs): - raise NonExistingLayerError( - "BiDynamicRNNLayer(x, is_train=True, name='a') --> BiDynamicRNN(name='a')(x, is_train=True)"+__log__ - ) - - -# recurrent/rnn_layers.py -__all__ += [ - 'RNNLayer', - 'BiRNNLayer', -] - - -def RNNLayer(*args, **kwargs): - raise NonExistingLayerError("RNNLayer(x, name='a') --> RNN(name='a')(x)"+__log__) - - -def BiRNNLayer(*args, **kwargs): - raise NonExistingLayerError("BiRNNLayer(x, is_train=True, name='a') --> BiRNN(name='a')(x, is_train=True)"+__log__) - - -# reshape.py -__all__ += [ - 'FlattenLayer', - 'ReshapeLayer', - 'TransposeLayer', -] - - -def FlattenLayer(*args, **kwargs): - raise NonExistingLayerError("FlattenLayer(x, name='a') --> Flatten(name='a')(x)"+__log__) - - -def ReshapeLayer(*args, **kwargs): - raise NonExistingLayerError("ReshapeLayer(x, name='a') --> Reshape(name='a')(x)"+__log__) - - -def TransposeLayer(*args, **kwargs): - raise NonExistingLayerError("TransposeLayer(x, name='a') --> Transpose(name='a')(x)"+__log__) - - -# scale.py -__all__ += [ - 'ScaleLayer', -] - - -def ScaleLayer(*args, **kwargs): - raise NonExistingLayerError("ScaleLayer(x, name='a') --> Scale(name='a')(x)"+__log__) - - -# spatial_transformer.py -__all__ += ['SpatialTransformer2dAffineLayer'] - - -def SpatialTransformer2dAffineLayer(*args, **kwargs): - raise NonExistingLayerError( - "SpatialTransformer2dAffineLayer(x1, x2, name='a') --> SpatialTransformer2dAffine(name='a')(x1, x2)"+__log__ - ) - - -# stack.py -__all__ += [ - 'StackLayer', - 'UnStackLayer', -] - - -def StackLayer(*args, **kwargs): - raise NonExistingLayerError("StackLayer(x1, x2, name='a') --> Stack(name='a')(x1, x2)"+__log__) - - -def UnStackLayer(*args, **kwargs): - raise NonExistingLayerError("UnStackLayer(x1, x2, name='a') --> UnStack(name='a')(x1, x2)"+__log__) - - -# time_distributed.py -__all__ += [ - 'TimeDistributedLayer', -] - - -def TimeDistributedLayer(*args, **kwargs): - # raise NonExistingLayerError("TimeDistributedLayer(x1, x2, name='a') --> TimeDistributed(name='a')(x1, x2)") - raise NonExistingLayerError("TimeDistributedLayer is removed for TF 2.0, please use eager mode instead."+__log__) diff --git a/tensorlayer/layers/dropout.py b/tensorlayer/layers/dropout.py deleted file mode 100644 index 006780a..0000000 --- a/tensorlayer/layers/dropout.py +++ /dev/null @@ -1,67 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf - -from tensorlayer.layers.core import Layer -# from tensorlayer.layers.core import LayersConfig - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -__all__ = [ - 'Dropout', -] - - -class Dropout(Layer): - """ - The :class:`Dropout` class is a noise layer which randomly set some - activations to zero according to a keeping probability. - - Parameters - ---------- - keep : float - The keeping probability. - The lower the probability it is, the more activations are set to zero. - seed : int or None - The seed for random dropout. - name : None or str - A unique layer name. - - """ - - def __init__(self, keep, seed=None, name=None): #"dropout"): - super(Dropout, self).__init__(name) - self.keep = keep - self.seed = seed - - self.build() - self._built = True - - logging.info("Dropout %s: keep: %f " % (self.name, self.keep)) - - ''' - def build(self, inputs): - pass - - def forward(self, inputs, is_train): - if is_train: - outputs = tf.nn.dropout(inputs, keep=self.keep, seed=self.seed, name=self.name) - else: - outputs = inputs - return outputs - ''' - - def build(self, inputs_shape=None): - # return inputs_shape - pass - - def forward(self, inputs): - # print(self.is_train) - if self.is_train: - outputs = tf.nn.dropout(inputs, rate=1 - (self.keep), seed=self.seed, name=self.name) - else: - outputs = inputs - return outputs diff --git a/tensorlayer/layers/embedding.py b/tensorlayer/layers/embedding.py deleted file mode 100644 index d3c543c..0000000 --- a/tensorlayer/layers/embedding.py +++ /dev/null @@ -1,411 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import numpy as np -import tensorflow as tf -import tensorlayer as tl - -from tensorlayer.layers.core import Layer -# from tensorlayer.layers.core import LayersConfig - -from tensorlayer import logging - -__all__ = [ - 'OneHot', - 'Word2vecEmbedding', - 'Embedding', - 'AverageEmbedding', -] - -class OneHot(Layer): - """ - The :class:`OneHot` class is the starting layer of a neural network, see ``tf.one_hot``. - Useful link: `https://www.tensorflow.org/api_docs/python/tf/one_hot`. - - Parameters - ---------- - depth : None or int - If the input indices is rank N, the output will have rank N+1. The new axis is created at dimension `axis` (default: the new axis is appended at the end). - on_value : None or number - The value to represnt `ON`. If None, it will default to the value 1. - off_value : None or number - The value to represnt `OFF`. If None, it will default to the value 0. - axis : None or int - The axis. - dtype : None or TensorFlow dtype - The data type, None means tf.float32. - name : str - A unique layer name. - - Examples - --------- - >>> import tensorflow as tf - >>> import tensorlayer as tl - >>> net = tl.layers.Input([32], dtype=tf.int32) - >>> net = tl.layers.OneHot(depth=8)(net) - >>> print(net.outputs) - - - """ - - def __init__(self, depth=None, on_value=None, off_value=None, axis=None, dtype=None, name=None): #'input'): - - super(OneHot, self).__init__(name) - self.depth = depth - self.on_value = on_value - self.off_value = off_value - self.axis = axis - self.dtype = dtype - logging.info("OneHotInput %s" % (self.name)) - - if self.depth is None: - raise RuntimeError(self.__class__.__name__ + ": depth == None the number of output units is undefined") - - def build(self, inputs_shape): - pass - - def forward(self, inputs): - """ - Parameters - ---------- - inputs : input tensor - The inputs are indices. The locations represented by indices in indices take value on_value, while all other locations take value off_value. - """ - outputs = tf.one_hot( - inputs, self.depth, on_value=self.on_value, off_value=self.off_value, axis=self.axis, dtype=self.dtype - ) - return outputs - - -class Word2vecEmbedding(Layer): - """ - The :class:`Word2vecEmbedding` class is a fully connected layer. - For Word Embedding, words are input as integer index. - The output is the embedded word vector. - - Parameters - ---------- - vocabulary_size : int - The size of vocabulary, number of words - embedding_size : int - The number of embedding dimensions - num_sampled : int - The mumber of negative examples for NCE loss - nce_loss_args : dictionary - The arguments for tf.nn.nce_loss() - E_init : initializer - The initializer for initializing the embedding matrix - nce_W_init : initializer - The initializer for initializing the nce decoder weight matrix - nce_b_init : initializer - The initializer for initializing of the nce decoder bias vector - name : str - A unique layer name - - Attributes - ---------- - outputs : Tensor - The embedding layer outputs. - normalized_embeddings : Tensor - Normalized embedding matrix. - nce_weights : Tensor - The NCE weights - nce_biases: Tensor - The NCE biases - - Examples - -------- - Word2Vec With TensorLayer (Example in `examples/text_word_embedding/tutorial_word2vec_basic.py`) - - >>> import tensorflow as tf - >>> import tensorlayer as tl - >>> batch_size = 8 - >>> train_inputs = tf.placeholder(tf.int32, shape=[batch_size]) - >>> train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1]) - - >>> net_in = tl.layers.Input([batch_size], dtype=tf.int32) - >>> emb_net = tl.layers.Word2vecEmbedding( - >>> vocabulary_size=vocabulary_size, - >>> embedding_size=embedding_size, - >>> num_sampled=num_sampled, - >>> nce_loss_args={}, - >>> E_init=tl.initializers.random_uniform(minval=-1.0, maxval=1.0), - >>> nce_W_init=tl.initializers.truncated_normal(stddev=float(1.0 / np.sqrt(embedding_size))), - >>> nce_b_init=tl.initializers.constant(value=0.0), - >>> name='word2vec_layer', - >>> )(net_in) - - >>> model = tl.models.Model(inputs=net_in, outputs=emb_net, name="word2vec_model") - - >>> nce_cost = tf.reduce_mean( - >>> input_tensor=tf.nn.nce_loss( - >>> weights=emb_net.nce_weights, - >>> biases=emb_net.nce_biases, - >>> inputs=model(train_inputs, is_train=True), - >>> labels=train_labels, #self.train_labels, - >>> num_sampled=emb_net.num_sampled, - >>> num_classes=emb_net.vocabulary_size, - >>> **emb_net.nce_loss_args - >>> ) - >>> ) - - >>> train_params = model.weights - >>> train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(nce_cost, var_list=train_params) - - >>> normalized_embeddings = emb_net.normalized_embeddings - - References - ---------- - `tensorflow/examples/tutorials/word2vec/word2vec_basic.py `__ - - """ - - def __init__( - self, - vocabulary_size=80000, - embedding_size=200, - num_sampled=64, - nce_loss_args=None, - E_init=tl.initializers.random_uniform(minval=-1.0, maxval=1.0), - nce_W_init=tl.initializers.truncated_normal(stddev=0.03), - nce_b_init=tl.initializers.constant(value=0.0), - name=None, #'word2vec', - ): - - super(Word2vecEmbedding, self).__init__(name) - self.vocabulary_size = vocabulary_size - self.embedding_size = embedding_size - self.num_sampled = num_sampled - self.nce_loss_args = nce_loss_args - self.E_init = E_init - self.nce_W_init = nce_W_init - self.nce_b_init = nce_b_init - logging.info("Word2vecEmbedding %s: (%d, %d)" % (self.name, self.vocabulary_size, self.embedding_size)) - - def build(self, inputs_shape): - """ - Parameters - ---------- - inputs_shape : tuple - the shape of inputs tensor - """ - # Look up embeddings for inputs. - # Note: a row of 'embeddings' is the vector representation of a word. - # for the sake of speed, it is better to slice the embedding matrix - # instead of transferring a word id to one-hot-format vector and then - # multiply by the embedding matrix. - # embed is the outputs of the hidden layer (embedding layer), it is a - # row vector with 'embedding_size' values. - - self.embeddings = self._get_weights( - "embeddings", shape=(self.vocabulary_size, self.embedding_size), init=self.E_init, - ) - - self.normalized_embeddings = tf.nn.l2_normalize(self.embeddings, 1) - - # Construct the variables for the NCE loss (i.e. negative sampling) - self.nce_weights = self._get_weights( - "nce_weights", shape=(self.vocabulary_size, self.embedding_size), init=self.nce_W_init, - ) - - self.nce_biases = self._get_weights( - "nce_biases", shape=(self.vocabulary_size,), init=self.nce_b_init, - ) - - def forward(self, inputs): - """ - Parameters - ---------- - inputs : input tensor - The input of a network - """ - outputs = tf.nn.embedding_lookup(params=self.embeddings, ids=inputs) - - return outputs - - -class Embedding(Layer): - """ - The :class:`Embedding` class is a look-up table for word embedding. - - Word content are accessed using integer indexes, then the output is the embedded word vector. - To train a word embedding matrix, you can used :class:`Word2vecEmbedding`. - If you have a pre-trained matrix, you can assign the parameters into it. - - Parameters - ---------- - vocabulary_size : int - The size of vocabulary, number of words. - embedding_size : int - The number of embedding dimensions. - E_init : initializer - The initializer for the embedding matrix. - E_init_args : dictionary - The arguments for embedding matrix initializer. - name : str - A unique layer name. - - Attributes - ---------- - outputs : tensor - The embedding layer output is a 3D tensor in the shape: (batch_size, num_steps(num_words), embedding_size). - - Examples - -------- - >>> import tensorflow as tf - >>> import tensorlayer as tl - >>> batch_size = 8 - >>> net = tl.layers.Input([batch_size, 100], dtype=tf.int32) - >>> net = tl.layers.Embedding(vocabulary_size=1000, embedding_size=50, name='embed')(net) - >>> print(net.outputs) - - - """ - - def __init__( - self, - vocabulary_size=80000, - embedding_size=200, - E_init=tl.initializers.random_uniform(-0.1, 0.1), - name=None, #'embedding', - ): - super(Embedding, self).__init__(name) - self.vocabulary_size = vocabulary_size - self.embedding_size = embedding_size - self.E_init = E_init - - logging.info("Embedding %s: (%d, %d)" % (self.name, self.vocabulary_size, self.embedding_size)) - - def build(self, inputs_shape): - """ - Parameters - ---------- - inputs_shape : tuple - the shape of inputs tensor - """ - - self.embeddings = self._get_weights( - "embeddings", shape=(self.vocabulary_size, self.embedding_size), init=self.E_init, - ) - - def forward(self, inputs): - """ - Parameters - ---------- - inputs : Tensor - The input of a network. - """ - outputs = tf.nn.embedding_lookup(params=self.embeddings, ids=inputs) - return outputs - - -class AverageEmbedding(Layer): - """The :class:`AverageEmbedding` averages over embeddings of inputs. - This is often used as the input layer for models like DAN[1] and FastText[2]. - - Parameters - ---------- - vocabulary_size : int - The size of vocabulary. - embedding_size : int - The dimension of the embedding vectors. - pad_value : int - The scalar padding value used in inputs, 0 as default. - E_init : initializer - The initializer of the embedding matrix. - name : str - A unique layer name. - - Attributes - ---------- - outputs : tensor - The embedding layer output is a 2D tensor in the shape: (batch_size, embedding_size). - - References - ---------- - - [1] Iyyer, M., Manjunatha, V., Boyd-Graber, J., & Daum’e III, H. (2015). Deep Unordered Composition Rivals Syntactic Methods for Text Classification. In Association for Computational Linguistics. - - [2] Joulin, A., Grave, E., Bojanowski, P., & Mikolov, T. (2016). `Bag of Tricks for Efficient Text Classification. `__ - - Examples - --------- - >>> import tensorflow as tf - >>> import tensorlayer as tl - >>> batch_size = 8 - >>> length = 5 - >>> net = tl.layers.Input([batch_size, length], dtype=tf.int32) - >>> net = tl.layers.AverageEmbedding(vocabulary_size=1000, embedding_size=50, name='avg')(net) - >>> print(net.outputs) - - - """ - - def __init__( - self, - vocabulary_size, - embedding_size, - pad_value=0, - E_init=tl.initializers.random_uniform(-0.1, 0.1), - name=None, # 'average_embedding', - ): - - super(AverageEmbedding, self).__init__(name) - self.vocabulary_size = vocabulary_size - self.embedding_size = embedding_size - self.pad_value = pad_value - self.E_init = E_init - logging.info("AverageEmbedding %s: (%d, %d)" % (self.name, self.vocabulary_size, self.embedding_size)) - - def build(self, inputs_shape): - """ - Parameters - ---------- - inputs_shape : tuple - the shape of inputs tensor. - """ - if len(inputs_shape) != 2: - raise ValueError('inputs must be of size (batch_size, sentence_length)') - - self.embeddings = self._get_weights( - "embeddings", shape=(self.vocabulary_size, self.embedding_size), init=self.E_init, - ) - - def forward(self, inputs): - """ - Parameters - ---------- - inputs : tensor - The network input. - For word inputs, please use integer index format, 2D tensor: (batch_size, sentence_length). - """ - word_embeddings = tf.nn.embedding_lookup( - params=self.embeddings, - ids=inputs, - name='word_embeddings', - ) - - # Zero out embeddings of pad value - masks = tf.not_equal(inputs, self.pad_value, name='masks') - word_embeddings *= tf.cast( - tf.expand_dims(masks, axis=-1), - dtype=tf.float32 - ) - sum_word_embeddings = tf.reduce_sum(input_tensor=word_embeddings, axis=1) - - # Count number of non-padding words in each sentence - sentence_lengths = tf.math.count_nonzero( - masks, - axis=1, - keepdims=True, - dtype=tf.float32, - name='sentence_lengths', - ) - - sentence_embeddings = tf.divide( - sum_word_embeddings, - sentence_lengths + 1e-8, # Add epsilon to avoid dividing by 0 - name='sentence_embeddings' - ) - - outputs = sentence_embeddings - - return outputs diff --git a/tensorlayer/layers/extend.py b/tensorlayer/layers/extend.py deleted file mode 100644 index d734ff3..0000000 --- a/tensorlayer/layers/extend.py +++ /dev/null @@ -1,97 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf - -from tensorlayer.layers.core import Layer - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -__all__ = [ - 'ExpandDims', - 'Tile', -] - - -class ExpandDims(Layer): - """ - The :class:`ExpandDims` class inserts a dimension of 1 into a tensor's shape, - see `tf.expand_dims() `__ . - - Parameters - ---------- - axis : int - The dimension index at which to expand the shape of input. - name : str - A unique layer name. - - Examples - -------- - >>> import tensorflow as tf - >>> import tensorlayer as tl - >>> x = tf.placeholder(tf.float32, (None, 100)) - >>> n = tl.layers.Input(x, name='in') - >>> n = tl.layers.ExpandDims(n, 2) - [None, 100, 1] - """ - - @deprecated_alias(layer='prev_layer', end_support_version=1.9) # TODO remove this line for the 1.9 release - def __init__( - self, - prev_layer, - axis, - name='expand_dims', - ): - # super(ExpandDims, self).__init__(prev_layer=prev_layer, name=name) - super().__init__(name) - self.axis = axis - logging.info("ExpandDims %s: axis: %d" % (self.name, self.axis)) - - def build(self, inputs_shape): - pass - - def forward(self, inputs): - outputs = tf.expand_dims(inputs, axis=self.axis, name=self.name) - return outputs - - -class Tile(Layer): - """ - The :class:`Tile` class constructs a tensor by tiling a given tensor, - see `tf.tile() `__ . - - Parameters - ---------- - multiples: tensor - Must be one of the following types: int32, int64. - 1-D Length must be the same as the number of dimensions in input. - name : None or str - A unique layer name. - - Examples - -------- - >>> import tensorflow as tf - >>> import tensorlayer as tl - >>> x = tf.placeholder(tf.float32, (None, 100)) - >>> n = tl.layers.Input(x, name='in') - >>> n = tl.layers.ExpandDims(n, 2) - >>> n = tl.layers.Tile(n, [-1, 1, 3]) - [None, 100, 3] - """ - - def __init__(self, multiples=None, name=None): #'tile'): - - # super(Tile, self).__init__(prev_layer=prev_layer, name=name) - super().__init__(name) - self.multiples = multiples - - logging.info("Tile %s: multiples: %s" % (self.name, self.multiples)) - - def build(self, inputs_shape): - pass - - def forward(self, inputs): - outputs = tf.tile(inputs, multiples=self.multiples, name=self.name) - return outputs diff --git a/tensorlayer/layers/image_resampling.py b/tensorlayer/layers/image_resampling.py deleted file mode 100644 index 0618620..0000000 --- a/tensorlayer/layers/image_resampling.py +++ /dev/null @@ -1,209 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf - -from tensorlayer.layers.core import Layer - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -__all__ = [ - 'UpSampling2d', - 'DownSampling2d', -] - - -class UpSampling2d(Layer): - """The :class:`UpSampling2d` class is a up-sampling 2D layer. - - See `tf.image.resize_images `__. - - Parameters - ---------- - size : tuple of int/float - (height, width) scale factor or new size of height and width. - is_scale : boolean - If True (default), the `size` is a scale factor; otherwise, the `size` is the numbers of pixels of height and width. - method : int - The resize method selected through the index. Defaults index is 0 which is ResizeMethod.BILINEAR. - - Index 0 is ResizeMethod.BILINEAR, Bilinear interpolation. - - Index 1 is ResizeMethod.NEAREST_NEIGHBOR, Nearest neighbor interpolation. - - Index 2 is ResizeMethod.BICUBIC, Bicubic interpolation. - - Index 3 ResizeMethod.AREA, Area interpolation. - align_corners : boolean - If True, align the corners of the input and output. Default is False. - data_format : str - channels_last 'channel_last' (default) or channels_first. - name : None or str - A unique layer name. - """ - - def __init__( - self, - size, - is_scale=True, - method=0, - align_corners=False, - data_format='channel_last', - name=None, #'upsample2d', - ): - # super(UpSampling2d, self).__init__(prev_layer=prev_layer, name=name) - super().__init__(name) - self.size = size - self.is_scale = scale - self.method = method - self.align_corners = align_corners - self.data_format = data_format - - logging.info( - "UpSampling2d %s: is_scale: %s size: %s method: %d align_corners: %s" % - (self.name, self.is_scale, self.size, self.method, self.align_corners) - ) - - if not isinstance(self.size, (list, tuple)) and len(self.size) == 2: - raise AssertionError() - - def build(self, inputs_shape): - if self.data_format != 'channel_last': - raise Exception("UpSampling2d tf.image.resize_images only support channel_last") - - # if len(self.inputs.get_shape()) == 3: - if len(inputs_shape) == 3: - if self.is_scale: - # inputs_shape = inputs.shape.as_list() - if inputs_shape[0] is not None: - size_h = self.size[0] * inputs_shape[0] - else: - size_h = self.size[0] * tf.shape(input=inputs)[0] - if inputs_shape[1] is not None: - size_w = self.size[1] * inputs_shape[1] - else: - size_w = self.size[1] * tf.shape(input=inputs)[1] - self.size = [size_h, size_w] - - # elif len(self.inputs.get_shape()) == 4: - elif len(inputs_shape) == 4: - if self.is_scale: - # inputs_shape = inputs.shape.as_list() - if inputs_shape[1] is not None: - size_h = self.size[0] * inputs_shape[1] - else: - size_h = self.size[0] * tf.shape(input=inputs)[1] - if inputs_shape[2] is not None: - size_w = self.size[1] * inputs_shape[2] - else: - size_w = self.size[1] * tf.shape(input=inputs)[2] - self.size = [size_h, size_w] - - else: - raise Exception("Donot support shape %s" % str(inputs.shape.as_list())) - - def forward(self, inputs): - """ - - Parameters - ------------ - prev_layer : :class:`Layer` - Previous layer with 4-D Tensor of the shape (batch, height, width, channels) or 3-D Tensor of the shape (height, width, channels). - """ - outputs = tf.image.resize(inputs, size=self.size, method=self.method, align_corners=self.align_corners) - return outputs - - -class DownSampling2d(Layer): - """The :class:`DownSampling2d` class is down-sampling 2D layer. - - See `tf.image.resize_images `__. - - Parameters - ---------- - size : tuple of int/float - (height, width) scale factor or new size of height and width. - is_scale : boolean - If True (default), the `size` is the scale factor; otherwise, the `size` are numbers of pixels of height and width. - method : int - The resize method selected through the index. Defaults index is 0 which is ResizeMethod.BILINEAR. - - Index 0 is ResizeMethod.BILINEAR, Bilinear interpolation. - - Index 1 is ResizeMethod.NEAREST_NEIGHBOR, Nearest neighbor interpolation. - - Index 2 is ResizeMethod.BICUBIC, Bicubic interpolation. - - Index 3 ResizeMethod.AREA, Area interpolation. - align_corners : boolean - If True, exactly align all 4 corners of the input and output. Default is False. - data_format : str - channels_last 'channel_last' (default) or channels_first. - name : None or str - A unique layer name. - """ - - def __init__( - self, - size, - is_scale=True, - method=0, - align_corners=False, - data_format='channel_last', - name='downsample2d', - ): - # super(DownSampling2d, self).__init__(prev_layer=prev_layer, name=name) - super().__init__(name) - self.size = size - self.is_scale = scale - self.method = method - self.align_corners = align_corners - self.data_format = data_format - - logging.info( - "DownSampling2d %s: is_scale: %s size: %s method: %d, align_corners: %s" % - (self.name, self.is_scale, self.size, self.method, self.align_corners) - ) - - if not isinstance(self.size, (list, tuple)) and len(self.size) == 2: - raise AssertionError() - - def build(self, inputs_shape): - if self.data_format != 'channel_last': - raise Exception("DownSampling2d tf.image.resize_images only support channel_last") - - if len(inputs_shape) == 3: - # if inputs.shape.ndims == 3: - if self.is_scale: - # inputs_shape = inputs.shape.as_list() - if inputs_shape[1] is not None: - size_h = self.size[0] * inputs_shape[0] - else: - size_h = self.size[0] * tf.shape(input=inputs)[0] - if inputs_shape[1] is not None: - size_w = self.size[1] * inputs_shape[1] - else: - size_w = self.size[1] * tf.shape(input=inputs)[1] - self.size = [size_h, size_w] - - elif len(inputs_shape) == 4: - # elif inputs.shape.ndims == 4: - if self.is_scale: - # inputs_shape = inputs.shape.as_list() - if inputs_shape[1] is not None: - size_h = self.size[0] * inputs_shape[1] - else: - size_h = self.size[0] * tf.shape(input=inputs)[1] - if inputs_shape[2] is not None: - size_w = self.size[1] * inputs_shape[2] - else: - size_w = self.size[1] * tf.shape(input=inputs)[2] - self.size = [size_h, size_w] - - else: - raise Exception("Donot support shape %s" % str(inputs.shape.as_list())) - - def forward(self, inputs): - """ - - Parameters - ------------ - prev_layer : :class:`Layer` - Previous layer with 4-D Tensor of the shape (batch, height, width, channels) or 3-D Tensor of the shape (height, width, channels). - """ - outputs = tf.image.resize(inputs, size=self.size, method=self.method, align_corners=self.align_corners) - return outputs diff --git a/tensorlayer/layers/importer.py b/tensorlayer/layers/importer.py deleted file mode 100644 index 2beb0a8..0000000 --- a/tensorlayer/layers/importer.py +++ /dev/null @@ -1,173 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf - -from tensorlayer.layers.core import Layer -# from tensorlayer.layers.core import TF_GRAPHKEYS_VARIABLES - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated -from tensorlayer.decorators import deprecated_alias - -__all__ = [ - 'SlimNets', - # 'Keras', - # 'Estimator', -] - - -class SlimNets(Layer): - """A layer that merges TF-Slim models into TensorLayer. - - Models can be found in `slim-model `__, - see Inception V3 example on `Github `__. - - Parameters - ---------- - prev_layer : :class:`Layer` - Previous layer. - slim_layer : a slim network function - The network you want to stack onto, end with ``return net, end_points``. - slim_args : dictionary - The arguments for the slim model. - name : str - A unique layer name. - - Notes - ----- - - As TF-Slim stores the layers as dictionary, the ``all_layers`` in this network is not in order ! Fortunately, the ``all_params`` are in order. - - """ - - @deprecated_alias(layer='prev_layer', end_support_version=1.9) # TODO remove this line for the 1.9 release - def __init__( - self, - prev_layer, - slim_layer, - slim_args=None, - name='tfslim', - ): - - if slim_layer is None: - raise ValueError("slim layer is None") - - super(SlimNets, self).__init__(prev_layer=prev_layer, slim_args=slim_args, name=name) - - logging.info("SlimNets %s: %s" % (self.name, slim_layer.__name__)) - - # with tf.variable_scope(name) as vs: - # net, end_points = slim_layer(self.inputs, **slim_args) - # slim_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) - - with tf.compat.v1.variable_scope(name): - self.outputs, end_points = slim_layer(self.inputs, **self.slim_args) - - slim_variables = tf.compat.v1.get_collection(TF_GRAPHKEYS_VARIABLES, scope=self.name) - - if slim_variables == []: - raise RuntimeError( - "No variables found under %s : the name of SlimNetsLayer should be matched with the begining of the ckpt file.\n" - "see tutorial_inceptionV3_tfslim.py for more details" % self.name - ) - - slim_layers = [] - - for v in end_points.values(): - slim_layers.append(v) - - self._add_layers(slim_layers) - self._add_params(slim_variables) - - -# @deprecated( -# date="2018-06-30", instructions="This layer will be deprecated soon as :class:`LambdaLayer` can do the same thing" -# ) -class Keras(Layer): - """A layer to import Keras layers into TensorLayer. - - Example can be found here `tutorial_keras.py `__. - - Parameters - ---------- - prev_layer : :class:`Layer` - Previous layer - keras_layer : function - A tensor in tensor out function for building model. - keras_args : dictionary - The arguments for the `keras_layer`. - name : str - A unique layer name. - - """ - - @deprecated_alias(layer='prev_layer', end_support_version=1.9) # TODO remove this line for the 1.9 release - def __init__( - self, - prev_layer, - keras_layer, - keras_args=None, - name='keras_layer', - ): - - super(Keras, self).__init__(prev_layer=prev_layer, keras_args=keras_args, name=name) - - logging.info("Keras %s: %s" % (self.name, keras_layer)) - - logging.warning("This API will be removed, please use LambdaLayer instead.") - - with tf.variable_scope(name) as vs: - self.outputs = keras_layer(self.inputs, **self.keras_args) - variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) - - self._add_layers(self.outputs) - self._add_params(variables) - - -# @deprecated( -# date="2018-06-30", instructions="This layer will be deprecated soon as :class:`LambdaLayer` can do the same thing" -# ) -# class Estimator(Layer): -# """A layer that accepts a user-defined model. -# -# It is similar with :class:`KerasLayer`, see `tutorial_keras.py `__. -# -# Parameters -# ---------- -# prev_layer : :class:`Layer` -# Previous layer -# model_fn : function -# A tensor in tensor out function for building model. -# layer_args : dictionary -# The arguments for the `model_fn`. -# name : str -# A unique layer name. -# -# """ -# -# @deprecated_alias( -# layer='prev_layer', args='layer_args', end_support_version=1.9 -# ) # TODO remove this line for the 1.9 release -# def __init__( -# self, -# prev_layer, -# model_fn, -# layer_args=None, -# name='estimator', -# ): -# super(Estimator, self).__init__(prev_layer=prev_layer, layer_args=layer_args, name=name) -# -# logging.info("Estimator %s: %s" % (self.name, model_fn)) -# -# if model_fn is None: -# raise ValueError('model fn is None') -# -# logging.warning("This API will be removed, please use LambdaLayer instead.") -# -# with tf.variable_scope(name) as vs: -# self.outputs = model_fn(self.inputs, **self.layer_args) -# variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) -# -# self._add_layers(self.outputs) -# self._add_params(variables) diff --git a/tensorlayer/layers/inputs.py b/tensorlayer/layers/inputs.py deleted file mode 100644 index ebafcc2..0000000 --- a/tensorlayer/layers/inputs.py +++ /dev/null @@ -1,69 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import numpy as np -import tensorflow as tf -import tensorlayer as tl - -from tensorlayer.layers.core import Layer -# from tensorlayer.layers.core import LayersConfig - -from tensorlayer import logging - -__all__ = [ - 'Input', -] - - -class Input(Layer): - """ - The :class:`Input` class is the starting layer of a neural network. - - Parameters - ---------- - shape : tuple (int) - Including batch size. - name : None or str - A unique layer name. - - """ - - def __init__(self, shape, dtype=tf.float32, name=None): #'input'): - # super(InputLayer, self).__init__(prev_layer=inputs, name=name) - super(Input, self).__init__(name) - - logging.info("Input %s: %s" % (self.name, str(shape))) - self.shape = shape # shape is needed in __repr__ - - shape_without_none = [_ if _ is not None else 1 for _ in shape] - # self.outputs = self.forward(tl.initializers.random_normal()(shape_without_none)) - self.outputs = self.forward(tl.initializers.ones()(shape_without_none, dtype=dtype)) - - def __repr__(self): - s = 'Input(shape=%s' % str(self.shape) - if self.name is not None: - s += (', name=\'%s\'' % self.name) - s += ')' - return s - - def __call__(self, inputs): - return super(Input, self).__call__(prev_layer=inputs) - - def build(self, inputs_shape): - # FIXME: documentation need double check - """ - no weights to define - """ - pass - - def forward(self, inputs): - # FIXME: documentation need double check - """ - Parameters - ---------- - inputs : input tensor - The input of a network. - is_train: bool - train (True) or test (False) - """ - return inputs diff --git a/tensorlayer/layers/lambda_layers.py b/tensorlayer/layers/lambda_layers.py deleted file mode 100644 index 37425bf..0000000 --- a/tensorlayer/layers/lambda_layers.py +++ /dev/null @@ -1,138 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf - -from tensorlayer.layers.core import Layer -# from tensorlayer.layers.core import TF_GRAPHKEYS_VARIABLES - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -__all__ = [ - 'Lambda', - 'ElementwiseLambda', -] - - -class Lambda(Layer): - """A layer that takes a user-defined function using TensorFlow Lambda, for multiple inputs see :class:`ElementwiseLambdaLayer`. - - Parameters - ---------- - prev_layer : :class:`Layer` - Previous layer. - fn : function - The function that applies to the outputs of previous layer. - fn_args : dictionary or None - The arguments for the function (option). - name : str or None - A unique layer name. - - Examples - --------- - Non-parametric case - - >>> import tensorflow as tf - >>> import tensorlayer as tl - >>> x = tf.placeholder(tf.float32, shape=[None, 1], name='x') - >>> net = tl.layers.Input(x, name='input') - >>> net = tl.layers.Lambda(net, lambda x: 2*x, name='lambda') - - Parametric case, merge other wrappers into TensorLayer - - >>> from keras.layers import * - >>> from tensorlayer.layers import * - >>> def keras_block(x): - >>> x = Dropout(0.8)(x) - >>> x = Dense(800, activation='relu')(x) - >>> x = Dropout(0.5)(x) - >>> x = Dense(800, activation='relu')(x) - >>> x = Dropout(0.5)(x) - >>> logits = Dense(10, activation='linear')(x) - >>> return logits - >>> net = Input(x, name='input') - >>> net = Lambda(net, fn=keras_block, name='keras') - - """ - - @deprecated_alias(layer='prev_layer', end_support_version=1.9) # TODO remove this line for the 1.9 release - def __init__( - self, - prev_layer, - fn, - fn_args=None, - name=None, #'lambda', - ): - - super(LambdaLayer, self).__init__(prev_layer=prev_layer, fn_args=fn_args, name=name) - - logging.info("Lambda %s" % self.name) - - if fn is None: - raise AssertionError("The `fn` argument cannot be None") - - with tf.compat.v1.variable_scope(name) as vs: - self.outputs = fn(self.inputs, **self.fn_args) - variables = tf.compat.v1.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) - - self._add_layers(self.outputs) - self._add_params(variables) - - -class ElementwiseLambda(Layer): - """A layer that use a custom function to combine multiple :class:`Layer` inputs. - - Parameters - ---------- - layers : list of :class:`Layer` - The list of layers to combine. - fn : function - The function that applies to the outputs of previous layer. - fn_args : dictionary or None - The arguments for the function (option). - act : activation function - The activation function of this layer. - name : str - A unique layer name. - - Examples - -------- - z = mean + noise * tf.exp(std * 0.5) - - >>> import tensorflow as tf - >>> import tensorlayer as tl - - >>> def func(noise, mean, std): - >>> return mean + noise * tf.exp(std * 0.5) - - >>> x = tf.placeholder(tf.float32, [None, 200]) - >>> noise_tensor = tf.random_normal(tf.stack([tf.shape(x)[0], 200])) - >>> noise = tl.layers.Input(noise_tensor) - >>> net = tl.layers.Input(x) - >>> net = tl.layers.Dense(net, n_units=200, act=tf.nn.relu, name='dense1') - >>> mean = tl.layers.Dense(net, n_units=200, name='mean') - >>> std = tl.layers.Dense(net, n_units=200, name='std') - >>> z = tl.layers.ElementwiseLambda([noise, mean, std], fn=func, name='z') - """ - - def __init__( - self, - layers, - fn, - fn_args=None, - act=None, - name=None, #'elementwiselambda', - ): - - super(ElementwiseLambda, self).__init__(prev_layer=layers, act=act, fn_args=fn_args, name=name) - logging.info("ElementwiseLambda %s" % self.name) - - with tf.compat.v1.variable_scope(name) as vs: - self.outputs = self._apply_activation(fn(*self.inputs, **self.fn_args)) - - variables = tf.compat.v1.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) - - self._add_layers(self.outputs) - self._add_params(variables) diff --git a/tensorlayer/layers/merge.py b/tensorlayer/layers/merge.py deleted file mode 100644 index 422c3e2..0000000 --- a/tensorlayer/layers/merge.py +++ /dev/null @@ -1,145 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf - -from tensorlayer.layers.core import Layer - -from tensorlayer import logging - -__all__ = [ - 'Concat', - 'Elementwise', -] - - -class Concat(Layer): - """A layer that concats multiple tensors according to given axis. - - Parameters - ---------- - concat_dim : int - The dimension to concatenate. - name : None or str - A unique layer name. - - Examples - ---------- - >>> import tensorflow as tf - >>> import tensorlayer as tl - >>> sess = tf.InteractiveSession() - >>> x = tf.placeholder(tf.float32, shape=[None, 784]) - >>> inputs = tl.layers.Input(x, name='input') - [TL] Input input (?, 784) - >>> net1 = tl.layers.Dense(inputs, 800, act=tf.nn.relu, name='relu1_1') - [TL] Dense relu1_1: 800, relu - >>> net2 = tl.layers.Dense(inputs, 300, act=tf.nn.relu, name='relu2_1') - [TL] Dense relu2_1: 300, relu - >>> net = tl.layers.Concat([net1, net2], 1, name ='concat_layer') - [TL] Concat concat, 1100 - >>> tl.layers.initialize_global_variables(sess) - >>> net.print_params() - [TL] param 0: relu1_1/W:0 (784, 800) float32_ref - [TL] param 1: relu1_1/b:0 (800,) float32_ref - [TL] param 2: relu2_1/W:0 (784, 300) float32_ref - [TL] param 3: relu2_1/b:0 (300,) float32_ref - num of params: 863500 - >>> net.print_layers() - [TL] layer 0: relu1_1/Relu:0 (?, 800) float32 - [TL] layer 1: relu2_1/Relu:0 (?, 300) float32 - [TL] layer 2: concat:0 (?, 1100) float32 - - """ - - def __init__( - self, - concat_dim=-1, - name=None, #'concat', - ): - - # super(ConcatLayer, self).__init__(prev_layer=prev_layer, name=name) - super().__init__(name) - self.concat_dim = concat_dim - logging.info("Concat %s: axis: %d" % (self.name, concat_dim)) - - def build(self, inputs): - pass - - def forward(self, inputs): - """ - - prev_layer : list of :class:`Layer` - List of layers to concatenate. - """ - outputs = tf.concat(inputs, self.concat_dim, name=self.name) - - return outputs - - -class Elementwise(Layer): - """A layer that combines multiple :class:`Layer` that have the same output shapes - according to an element-wise operation. - - Parameters - ---------- - combine_fn : a TensorFlow element-wise combine function - e.g. AND is ``tf.minimum`` ; OR is ``tf.maximum`` ; ADD is ``tf.add`` ; MUL is ``tf.multiply`` and so on. - See `TensorFlow Math API `__ . - act : activation function - The activation function of this layer. - name : None or str - A unique layer name. - - Examples - -------- - >>> import tensorflow as tf - >>> import tensorlayer as tl - >>> x = tf.placeholder(tf.float32, shape=[None, 784]) - >>> inputs = tl.layers.Input(x, name='input') - >>> net_0 = tl.layers.Dense(inputs, n_units=500, act=tf.nn.relu, name='net_0') - >>> net_1 = tl.layers.Dense(inputs, n_units=500, act=tf.nn.relu, name='net_1') - >>> net = tl.layers.Elementwise([net_0, net_1], combine_fn=tf.minimum, name='minimum') - >>> net.print_params(False) - [TL] param 0: net_0/W:0 (784, 500) float32_ref - [TL] param 1: net_0/b:0 (500,) float32_ref - [TL] param 2: net_1/W:0 (784, 500) float32_ref - [TL] param 3: net_1/b:0 (500,) float32_ref - >>> net.print_layers() - [TL] layer 0: net_0/Relu:0 (?, 500) float32 - [TL] layer 1: net_1/Relu:0 (?, 500) float32 - [TL] layer 2: minimum:0 (?, 500) float32 - """ - - def __init__( - self, - combine_fn=tf.minimum, - act=None, - name=None, #'elementwise', - ): - - # super(Elementwise, self).__init__(prev_layer=prev_layer, act=act, name=name) - super().__init__(name) - self.combine_fn = combine_fn - - logging.info( - "Elementwise %s: fn: %s act: %s" % - (self.name, combine_fn.__name__, ('No Activation' if self.act is None else self.act.__name__)) - ) - - def build(self, inputs): - pass - - def forward(self, inputs): - """ - - Parameters - ---------- - prev_layer : list of :class:`Layer` - The list of layers to combine. - """ - outputs = inputs[0] - for input in inputs[1:]: - outputs = self.combine_fn(outputs, input, name=self.name) - if self.act: - outputs = self.act(outputs) - return outputs diff --git a/tensorlayer/layers/noise.py b/tensorlayer/layers/noise.py deleted file mode 100644 index ba2de32..0000000 --- a/tensorlayer/layers/noise.py +++ /dev/null @@ -1,72 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf - -from tensorlayer.layers.core import Layer - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -__all__ = [ - 'GaussianNoise', -] - - -class GaussianNoise(Layer): - """ - The :class:`GaussianNoise` class is noise layer that adding noise with - gaussian distribution to the activation. - - Parameters - ------------ - mean : float - The mean. Default is 0. - stddev : float - The standard deviation. Default is 1. - is_train : boolean - Is trainable layer. If False, skip this layer. default is True. - seed : int or None - The seed for random noise. - name : str - A unique layer name. - - Examples - ---------- - >>> import tensorflow as tf - >>> import tensorlayer as tl - >>> x = tf.placeholder(tf.float32, shape=(100, 784)) - >>> net = tl.layers.Input(x, name='input') - >>> net = tl.layers.Dense(net, n_units=100, act=tf.nn.relu, name='dense3') - >>> net = tl.layers.GaussianNoise(net, name='gaussian') - (64, 100) - - """ - - def __init__( - self, - # prev_layer, - mean=0.0, - stddev=1.0, - seed=None, - name=None, #'gaussian_noise', - ): - # super(GaussianNoise, self).__init__(prev_layer=prev_layer, name=name) - super().__init__(name) - self.mean = mean - self.stddev = stddev - self.seed = seed - logging.info("GaussianNoise %s: mean: %f stddev: %f" % (self.name, self.mean, self.stddev)) - - def build(self, inputs): - pass - - def forward(self, inputs, is_train): - if is_train is False: - return inputs - else: - # noise = np.random.normal(0.0 , sigma , tf.to_int64(self.inputs).get_shape()) - noise = tf.random.normal(shape=inputs.get_shape(), mean=self.mean, stddev=self.stddev, seed=self.seed) - outputs = inputs + noise - return outputs diff --git a/tensorlayer/layers/normalization.py b/tensorlayer/layers/normalization.py deleted file mode 100644 index cdf0ec2..0000000 --- a/tensorlayer/layers/normalization.py +++ /dev/null @@ -1,773 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf -import tensorlayer as tl -from tensorflow.python.training import moving_averages -from tensorflow.python.framework import ops -from tensorflow.python.ops import math_ops - -from tensorlayer.layers.core import Layer -# from tensorlayer.layers.core import LayersConfig -# from tensorlayer.layers.core import TF_GRAPHKEYS_VARIABLES -from tensorlayer.layers.utils import get_collection_trainable - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -__all__ = [ - 'LocalResponseNorm', - 'BatchNorm', # FIXME: wthether to keep BatchNorm - 'BatchNorm1d', - 'BatchNorm2d', - 'BatchNorm3d', - 'InstanceNorm', - 'LayerNorm', - 'GroupNorm', - 'SwitchNorm', -] - - -class LocalResponseNorm(Layer): - """The :class:`LocalResponseNorm` layer is for Local Response Normalization. - See ``tf.nn.local_response_normalization`` or ``tf.nn.lrn`` for new TF version. - The 4-D input tensor is a 3-D array of 1-D vectors (along the last dimension), and each vector is normalized independently. - Within a given vector, each component is divided by the weighted square-sum of inputs within depth_radius. - - Parameters - ----------- - depth_radius : int - Depth radius. 0-D. Half-width of the 1-D normalization window. - bias : float - An offset which is usually positive and shall avoid dividing by 0. - alpha : float - A scale factor which is usually positive. - beta : float - An exponent. - name : None or str - A unique layer name. - - """ - - def __init__( - self, - depth_radius=None, - bias=None, - alpha=None, - beta=None, - name=None, #'lrn', - ): - # super(LocalResponseNorm, self).__init__(prev_layer=prev_layer, name=name) - super().__init__(name) - self.depth_radius = depth_radius - self.bias = bias - self.alpha = alpha - self.beta = beta - - logging.info( - "LocalResponseNorm %s: depth_radius: %s, bias: %s, alpha: %s, beta: %s" % - (self.name, str(depth_radius), str(bias), str(alpha), str(beta)) - ) - - def build(self, inputs): - pass - - def forward(self, inputs): - """ - prev_layer : :class:`Layer` - The previous layer with a 4D output shape. - """ - outputs = tf.nn.lrn(inputs, depth_radius=self.depth_radius, bias=self.bias, alpha=self.alpha, beta=self.beta) - return outputs - - -def _to_channel_first_bias(b): - """Reshape [c] to [c, 1, 1].""" - channel_size = int(b.shape[0]) - new_shape = (channel_size, 1, 1) - # new_shape = [-1, 1, 1] # doesn't work with tensorRT - return tf.reshape(b, new_shape) - - -def _bias_scale(x, b, data_format): - """The multiplication counter part of tf.nn.bias_add.""" - if data_format == 'NHWC': - return x * b - elif data_format == 'NCHW': - return x * _to_channel_first_bias(b) - else: - raise ValueError('invalid data_format: %s' % data_format) - - -def _bias_add(x, b, data_format): - """Alternative implementation of tf.nn.bias_add which is compatiable with tensorRT.""" - if data_format == 'NHWC': - return tf.add(x, b) - elif data_format == 'NCHW': - return tf.add(x, _to_channel_first_bias(b)) - else: - raise ValueError('invalid data_format: %s' % data_format) - - -def batch_normalization(x, mean, variance, offset, scale, variance_epsilon, data_format, name=None): - """Data Format aware version of tf.nn.batch_normalization.""" - with ops.name_scope(name, 'batchnorm', [x, mean, variance, scale, offset]): - inv = math_ops.rsqrt(variance + variance_epsilon) - if scale is not None: - inv *= scale - - a = math_ops.cast(inv, x.dtype) - b = math_ops.cast(offset - mean * inv if offset is not None else -mean * inv, x.dtype) - - # Return a * x + b with customized data_format. - # Currently TF doesn't have bias_scale, and tensorRT has bug in converting tf.nn.bias_add - # So we reimplemted them to allow make the model work with tensorRT. - # See https://github.com/tensorlayer/openpose-plus/issues/75 for more details. - df = {'channels_first': 'NCHW', 'channels_last': 'NHWC'} - return _bias_add(_bias_scale(x, a, df[data_format]), b, df[data_format]) - - -class BatchNorm(Layer): - """ - The :class:`BatchNorm` is a batch normalization layer for both fully-connected and convolution outputs. - See ``tf.nn.batch_normalization`` and ``tf.nn.moments``. - - Parameters - ---------- - decay : float - A decay factor for `ExponentialMovingAverage`. - Suggest to use a large value for large dataset. - epsilon : float - Eplison. - act : activation function - The activation function of this layer. - is_train : boolean - Is being used for training or inference. - beta_init : initializer or None - The initializer for initializing beta, if None, skip beta. - Usually you should not skip beta unless you know what happened. - gamma_init : initializer or None - The initializer for initializing gamma, if None, skip gamma. - When the batch normalization layer is use instead of 'biases', or the next layer is linear, this can be - disabled since the scaling can be done by the next layer. see `Inception-ResNet-v2 `__ - moving_mean_init : initializer or None - The initializer for initializing moving mean, if None, skip moving mean. - moving_var_init : initializer or None - The initializer for initializing moving var, if None, skip moving var. - num_features: int - Number of features for input tensor. Useful to build layer if using BatchNorm1d, BatchNorm2d or BatchNorm3d, - but should be left as None if using BatchNorm. - data_format : str - channels_last 'channel_last' (default) or channels_first. - name : None or str - A unique layer name. - - References - ---------- - - `Source `__ - - `stackoverflow `__ - - """ - - def __init__( - self, - decay=0.9, - epsilon=0.00001, - act=None, - is_train=False, - beta_init=tl.initializers.zeros(), - gamma_init=tl.initializers.random_normal(mean=1.0, stddev=0.002), - moving_mean_init=tl.initializers.zeros(), - moving_var_init=tl.initializers.zeros(), - # beta_init=tf.compat.v1.initializers.zeros(), - # gamma_init=tf.compat.v1.initializers.random_normal(mean=1.0, stddev=0.002), - # moving_mean_init=tf.compat.v1.initializers.zeros(), - # moving_var_init=tf.compat.v1.initializers.zeros(), - num_features=None, - data_format='channels_last', - name=None, - ): - super(BatchNorm, self).__init__(name=name) - self.act = act - self.decay = decay - self.epsilon = epsilon - self.data_format = data_format - self.beta_init = beta_init - self.gamma_init = gamma_init - self.moving_mean_init = moving_mean_init - self.moving_var_init = moving_var_init - self.num_features = num_features - - if num_features is not None: - if not isinstance(self, BatchNorm1d) and not isinstance(self, BatchNorm2d) and not isinstance(self, BatchNorm3d): - raise ValueError("Please use BatchNorm1d or BatchNorm2d or BatchNorm3d instead of BatchNorm " - "if you want to specify 'num_features'.") - self.build(None) - self._built = True - - logging.info( - "BatchNorm %s: decay: %f epsilon: %f act: %s is_train: %s" % - (self.name, decay, epsilon, self.act.__name__ if self.act is not None else 'No Activation', is_train) - ) - - def __repr__(self): - actstr = self.act.__name__ if self.act is not None else 'No Activation' - s = ('{classname}(num_features={num_features}, decay={decay}' - ', epsilon={epsilon}') - s += (', ' + actstr) - if self.name is not None: - s += ', name="{name}"' - s += ')' - return s.format(classname=self.__class__.__name__, **self.__dict__) - - def _get_param_shape(self, inputs_shape): - if self.data_format == 'channels_last': - axis = len(inputs_shape) - 1 - elif self.data_format == 'channels_first': - axis = 1 - else: - raise ValueError('data_format should be either %s or %s' % ('channels_last', 'channels_first')) - - channels = inputs_shape[axis] - params_shape = [1] * len(inputs_shape) - params_shape[axis] = channels - - axes = [i for i in range(len(inputs_shape)) if i != axis] - return params_shape, axes - - def build(self, inputs_shape): - if self.decay < 0 or 1 < self.decay: - raise Exception("decay should be between 0 to 1") - - # x_shape = self.inputs.get_shape() - # if self.data_format == 'channels_last': - # axis = len(inputs_shape) - 1 - # channels = inputs_shape[-1] - # params_shape = [1] * (len(inputs_shape) - 1) + [channels] - # elif self.data_format == 'channels_first': - # axis = 1 - # channels = inputs_shape[1] - # params_shape = [1, channels] + [1] * (len(inputs_shape) - 2) - # else: - # raise ValueError('data_format should be either %s or %s' % ('channels_last', 'channels_first')) - # - # # params_shape = inputs_shape[axis] - # self.axes = [i for i in range(len(inputs_shape)) if i != axis] - - params_shape, self.axes = self._get_param_shape(inputs_shape) - - self.beta, self.gamma = None, None - if self.beta_init: - self.beta = self._get_weights("beta", shape=params_shape, init=self.beta_init) - # with tf.variable_scope(name): - # axes = [i for i in range(len(x_shape)) if i != axis] - # - # # 1. beta, gamma - # variables = [] - # - # if beta_init: - # - # if beta_init == tf.zeros_initializer: - # beta_init = beta_init() - # - # beta = tf.get_variable( - # 'beta', shape=params_shape, initializer=beta_init, dtype=LayersConfig.tf_dtype, trainable=is_train - # ) - # - # variables.append(beta) - # - # else: - # beta = None - if self.gamma_init: - self.gamma = self._get_weights("gamma", shape=params_shape, init=self.gamma_init) - # if gamma_init: - # gamma = tf.get_variable( - # 'gamma', - # shape=params_shape, - # initializer=gamma_init, - # dtype=LayersConfig.tf_dtype, - # trainable=is_train, - # ) - # variables.append(gamma) - # else: - # gamma = None - # - # # 2. - self.moving_mean = self._get_weights("moving_mean", shape=params_shape, init=self.moving_mean_init) - # moving_mean = tf.get_variable( - # 'moving_mean', params_shape, initializer=moving_mean_init, dtype=LayersConfig.tf_dtype, trainable=False - # ) - # - # moving_variance = tf.get_variable( - # 'moving_variance', - # params_shape, - # initializer=tf.constant_initializer(1.), - # dtype=LayersConfig.tf_dtype, - # trainable=False, - # ) - self.moving_var = self._get_weights("moving_var", shape=params_shape, init=self.moving_var_init) - - def forward(self, inputs): - mean, var = tf.nn.moments(inputs, self.axes) - if self.is_train: - # update moving_mean and moving_var - self.moving_mean = moving_averages.assign_moving_average(self.moving_mean, mean, - self.decay, zero_debias=False) - self.moving_var = moving_averages.assign_moving_average(self.moving_var, var, - self.decay, zero_debias=False) - outputs = batch_normalization(inputs, mean, var, self.beta, self.gamma, - self.epsilon, self.data_format) - else: - outputs = batch_normalization(inputs, self.moving_mean, self.moving_var, self.beta, self.gamma, - self.epsilon, self.data_format) - if self.act: - outputs = self.act(outputs) - return outputs - # # 3. - # # These ops will only be preformed when training. - # mean, variance = tf.nn.moments(self.inputs, axes) - # update_moving_mean = moving_averages.assign_moving_average( - # moving_mean, mean, decay, zero_debias=False - # ) # if zero_debias=True, has bias - # update_moving_variance = moving_averages.assign_moving_average( - # moving_variance, variance, decay, zero_debias=False - # ) # if zero_debias=True, has bias - # - # def mean_var_with_update(): - # with tf.control_dependencies([update_moving_mean, update_moving_variance]): - # return tf.identity(mean), tf.identity(variance) - # - # if is_train: - # mean, var = mean_var_with_update() - # else: - # mean, var = moving_mean, moving_variance - # - # self.outputs = self._apply_activation( - # batch_normalization(self.inputs, mean, var, beta, gamma, epsilon, data_format) - # ) - # - # variables.extend([moving_mean, moving_variance]) - # - # self._add_layers(self.outputs) - # self._add_params(variables) - - -class BatchNorm1d(BatchNorm): - # TODO: documentation pending, need test - def _get_param_shape(self, inputs_shape): - if self.data_format == 'channels_last': - axis = 2 - elif self.data_format == 'channels_first': - axis = 1 - else: - raise ValueError('data_format should be either %s or %s' % ('channels_last', 'channels_first')) - - if self.num_features is None: - channels = inputs_shape[axis] - else: - channels = self.num_features - params_shape = [1] * 3 - params_shape[axis] = channels - - axes = [i for i in range(3) if i != axis] - return params_shape, axes - - -class BatchNorm2d(BatchNorm): - # TODO: documentation pending - def _get_param_shape(self, inputs_shape): - if self.data_format == 'channels_last': - axis = 3 - elif self.data_format == 'channels_first': - axis = 1 - else: - raise ValueError('data_format should be either %s or %s' % ('channels_last', 'channels_first')) - - if self.num_features is None: - channels = inputs_shape[axis] - else: - channels = self.num_features - params_shape = [1] * 4 - params_shape[axis] = channels - - axes = [i for i in range(4) if i != axis] - return params_shape, axes - - -class BatchNorm3d(BatchNorm): - # TODO: documentation pending, need test - def _get_param_shape(self, inputs_shape): - if self.data_format == 'channels_last': - axis = 4 - elif self.data_format == 'channels_first': - axis = 1 - else: - raise ValueError('data_format should be either %s or %s' % ('channels_last', 'channels_first')) - - if self.num_features is None: - channels = inputs_shape[axis] - else: - channels = self.num_features - params_shape = [1] * 5 - params_shape[axis] = channels - - axes = [i for i in range(5) if i != axis] - return params_shape, axes - - -class InstanceNorm(Layer): - """The :class:`InstanceNorm` class is a for instance normalization. - - Parameters - ----------- - act : activation function. - The activation function of this layer. - epsilon : float - Eplison. - name : None or str - A unique layer name - - """ - - def __init__( - self, - act=None, - epsilon=1e-5, - name=None, #'instan_norm', - ): - # super(InstanceNorm, self).__init__(prev_layer=prev_layer, act=act, name=name) - super().__init__(name) - self.act = act - self.epsilon = epsilon - - logging.info( - "InstanceNorm %s: epsilon: %f act: %s" % - (self.name, epsilon, self.act.__name__ if self.act is not None else 'No Activation') - ) - - def build(self, inputs_shape): - # self.scale = tf.compat.v1.get_variable( - # self.name + '\scale', [inputs.get_shape()[-1]], - # initializer=tf.compat.v1.initializers.truncated_normal(mean=1.0, stddev=0.02), dtype=LayersConfig.tf_dtype - # ) - self.scale = self._get_weights( - "scale", shape=[inputs_shape[-1]], init=tf.compat.v1.initializers.truncated_normal(mean=1.0, stddev=0.02) - ) - # self.offset = tf.compat.v1.get_variable( - # self.name + '\offset', [inputs.get_shape()[-1]], initializer=tf.compat.v1.initializers.constant(0.0), - # dtype=LayersConfig.tf_dtype - # ) - self.offset = self._get_weights( - "offset", shape=[inputs_shape[-1]], init=tf.compat.v1.initializers.constant(0.0) - ) - # self.add_weights([self.scale, self.offset]) - - def forward(self, inputs): - - mean, var = tf.nn.moments(x=inputs, axes=[1, 2], keepdims=True) - - outputs = self.scale * tf.compat.v1.div(inputs - mean, tf.sqrt(var + self.epsilon)) + self.offset - outputs = self.act(outputs) - - return outputs - - # with tf.variable_scope(name) as vs: - # mean, var = tf.nn.moments(self.inputs, [1, 2], keep_dims=True) - # - # scale = tf.get_variable( - # 'scale', [self.inputs.get_shape()[-1]], - # initializer=tf.truncated_normal_initializer(mean=1.0, stddev=0.02), dtype=LayersConfig.tf_dtype - # ) - # - # offset = tf.get_variable( - # 'offset', [self.inputs.get_shape()[-1]], initializer=tf.constant_initializer(0.0), - # dtype=LayersConfig.tf_dtype - # ) - # - # self.outputs = scale * tf.div(self.inputs - mean, tf.sqrt(var + epsilon)) + offset - # self.outputs = self._apply_activation(self.outputs) - # - # variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) - # - # self._add_layers(self.outputs) - # self._add_params(variables) - - -# FIXME : not sure about the correctness, need testing -class LayerNorm(Layer): - """ - The :class:`LayerNorm` class is for layer normalization, see `tf.contrib.layers.layer_norm `__. - - Parameters - ---------- - prev_layer : :class:`Layer` - The previous layer. - act : activation function - The activation function of this layer. - others : _ - `tf.contrib.layers.layer_norm `__. - - """ - - def __init__( - self, #prev_layer, - center=True, - scale=True, - act=None, - # reuse=None, - # variables_collections=None, - # outputs_collections=None, - # trainable=True, - epsilon=1e-12, - begin_norm_axis=1, - begin_params_axis=-1, - beta_init=tl.initializers.zeros(), - gamma_init=tl.initializers.ones(), - data_format='channels_last', - name=None, - ): - - # super(LayerNorm, self).__init__(prev_layer=prev_layer, act=act, name=name) - super(LayerNorm, self).__init__(name) - self.center = center - self.scale = scale - self.act = act - self.epsilon = epsilon - self.begin_norm_axis = begin_norm_axis - self.begin_params_axis = begin_params_axis - self.beta_init = beta_init - self.gamma_init = gamma_init - self.data_format = data_format - - logging.info( - "LayerNorm %s: act: %s" % (self.name, self.act.__name__ if self.act is not None else 'No Activation') - ) - - def build(self, inputs_shape): - params_shape = inputs_shape[self.begin_params_axis:] - self.beta, self.gamma = None, None - if self.center: - self.beta = self._get_weights("beta", shape=params_shape, init=self.beta_init) - if self.scale: - self.gamma = self._get_weights("gamma", shape=params_shape, init=self.gamma_init) - - self.norm_axes = range(self.begin_norm_axis, len(inputs_shape)) - - def forward(self, inputs): - mean, var = tf.nn.moments(inputs, self.norm_axes, keepdims=True) - # compute layer normalization using batch_normalization function - outputs = batch_normalization(inputs, mean, var, self.beta, self.gamma, - self.epsilon, data_format=self.data_format) - if self.act: - outputs = self.act(outputs) - return outputs - # with tf.compat.v1.variable_scope(name) as vs: - # self.outputs = tf.contrib.layers.layer_norm( - # self.inputs, - # center=center, - # scale=scale, - # activation_fn=self.act, - # reuse=reuse, - # variables_collections=variables_collections, - # outputs_collections=outputs_collections, - # trainable=trainable, - # begin_norm_axis=begin_norm_axis, - # begin_params_axis=begin_params_axis, - # scope='var', - # ) - # - # variables = tf.compat.v1.get_collection("TF_GRAPHKEYS_VARIABLES", scope=vs.name) - # - # self._add_layers(self.outputs) - # self._add_params(variables) - - -class GroupNorm(Layer): - """The :class:`GroupNorm` layer is for Group Normalization. - See `tf.contrib.layers.group_norm `__. - - Parameters - ----------- - # prev_layer : :class:`Layer` - # The previous layer. - groups : int - The number of groups - act : activation function - The activation function of this layer. - epsilon : float - Eplison. - data_format : str - channels_last 'channel_last' (default) or channels_first. - name : None or str - A unique layer name - - """ - - def __init__(self, groups=32, epsilon=1e-06, act=None, data_format='channels_last', name=None): #'groupnorm'): - # super(GroupNorm, self).__init__(prev_layer=prev_layer, act=act, name=name) - super().__init__(name) - self.groups = groups - self.epsilon = epsilon - self.act = act - self.data_format = data_format - - logging.info( - "GroupNorm %s: act: %s" % (self.name, self.act.__name__ if self.act is not None else 'No Activation') - ) - - def build(self, inputs_shape): - # shape = inputs.get_shape().as_list() - if len(inputs_shape) != 4: - raise Exception("This GroupNorm only supports 2D images.") - - if self.data_format == 'channels_last': - channels = inputs_shape[-1] - self.int_shape = tf.concat( - [#tf.shape(input=self.inputs)[0:3], - inputs_shape[0:3], - tf.convert_to_tensor(value=[self.groups, channels // self.groups])], axis=0 - ) - elif self.data_format == 'channels_first': - channels = inputs_shape[1] - self.int_shape = tf.concat( - [ - # tf.shape(input=self.inputs)[0:1], - inputs_shape[0:1], - tf.convert_to_tensor(value=[self.groups, channels // self.groups]), - # tf.shape(input=self.inputs)[2:4] - inputs_shape[2:4], - ], - axis=0 - ) - else: - raise ValueError("data_format must be 'channels_last' or 'channels_first'.") - - if self.groups > channels: - raise ValueError('Invalid groups %d for %d channels.' % (self.groups, channels)) - if channels % self.groups != 0: - raise ValueError('%d channels is not commensurate with %d groups.' % (channels, self.groups)) - - if self.data_format == 'channels_last': - # mean, var = tf.nn.moments(x, [1, 2, 4], keep_dims=True) - self.gamma = self._get_weights("gamma", shape=channels, init=tl.initializers.ones()) - # self.gamma = tf.compat.v1.get_variable('gamma', channels, initializer=tf.compat.v1.initializers.ones()) - self.beta = self._get_weights("beta", shape=channels, init=tl.initializers.zeros()) - # self.beta = tf.compat.v1.get_variable('beta', channels, initializer=tf.compat.v1.initializers.zeros()) - elif self.data_format == 'channels_first': - # mean, var = tf.nn.moments(x, [2, 3, 4], keep_dims=True) - self.gamma = self._get_weights("gamma", shape=[1, channels, 1, 1], init=tl.initializers.ones()) - # self.gamma = tf.compat.v1.get_variable('gamma', [1, channels, 1, 1], initializer=tf.compat.v1.initializers.ones()) - self.beta = self._get_weights("beta", shape=[1, channels, 1, 1], init=tl.initializers.zeros()) - # self.beta = tf.compat.v1.get_variable('beta', [1, channels, 1, 1], initializer=tf.compat.v1.initializers.zeros()) - # self.add_weights([self.gamma, self.bata]) - - def forward(self, inputs): - x = tf.reshape(inputs, self.int_shape) - if self.data_format == 'channels_last': - mean, var = tf.nn.moments(x=x, axes=[1, 2, 4], keepdims=True) - elif self.data_format == 'channels_first': - mean, var = tf.nn.moments(x=x, axes=[2, 3, 4], keepdims=True) - else: - raise Exception("unknown data_format") - x = (x - mean) / tf.sqrt(var + self.epsilon) - - outputs = tf.reshape(x, tf.shape(input=inputs)) * self.gamma + self.beta - if self.act: - outputs = self.act(outputs) - return outputs - - -class SwitchNorm(Layer): - """ - The :class:`SwitchNorm` is a switchable normalization. - - Parameters - ---------- - act : activation function - The activation function of this layer. - epsilon : float - Eplison. - beta_init : initializer or None - The initializer for initializing beta, if None, skip beta. - Usually you should not skip beta unless you know what happened. - gamma_init : initializer or None - The initializer for initializing gamma, if None, skip gamma. - When the batch normalization layer is use instead of 'biases', or the next layer is linear, this can be - disabled since the scaling can be done by the next layer. see `Inception-ResNet-v2 `__ - moving_mean_init : initializer or None - The initializer for initializing moving mean, if None, skip moving mean. - data_format : str - channels_last 'channel_last' (default) or channels_first. - name : None or str - A unique layer name. - - References - ---------- - - `Differentiable Learning-to-Normalize via Switchable Normalization `__ - - `Zhihu (CN) `__ - - """ - - def __init__( - self, - act=None, - epsilon=1e-5, - beta_init=tl.initializers.constant(0.0), - gamma_init=tl.initializers.constant(1.0), - moving_mean_init=tl.initializers.zeros(), - # beta_init=tf.compat.v1.initializers.constant(0.0), - # gamma_init=tf.compat.v1.initializers.constant(1.0), - # moving_mean_init=tf.compat.v1.initializers.zeros(), - data_format='channels_last', - name=None, #'switchnorm', - ): - # super(SwitchNorm, self).__init__(prev_layer=prev_layer, act=act, name=name) - super().__init__(name) - self.act = act - self.epsilon = epsilon - self.beta_init = beta_init - self.gamma_init = gamma_init - self.moving_mean_init = moving_mean_init - self.data_format = data_format - - logging.info( - "SwitchNorm %s: epsilon: %f act: %s" % - (self.name, epsilon, self.act.__name__ if self.act is not None else 'No Activation') - ) - - def build(self, inputs_shape): - if len(inputs_shape) != 4: - raise Exception("This SwitchNorm only supports 2D images.") - if self.data_format != 'channels_last': - raise Exception("This SwitchNorm only supports channels_last.") - ch = inputs_shape[-1] - self.gamma = self._get_weights("gamma", shape=[ch], init=self.gamma_init) - # self.gamma = tf.compat.v1.get_variable("gamma", [ch], initializer=gamma_init) - self.beta = self._get_weights("beta", shape=[ch], init=self.beta_init) - # self.beta = tf.compat.v1.get_variable("beta", [ch], initializer=beta_init) - - self.mean_weight_var = self._get_weights("mean_weight", shape=[3], init=tl.initializers.constant(1.0)) - # self.mean_weight_var = tf.compat.v1.get_variable("mean_weight", [3], initializer=tf.compat.v1.initializers.constant(1.0)) - self.var_weight_var = self._get_weights("var_weight", shape=[3], init=tl.initializers.constant(1.0)) - # self.var_weight_var = tf.compat.v1.get_variable("var_weight", [3], initializer=tf.compat.v1.initializers.constant(1.0)) - - # self.add_weights([self.gamma, self.beta, self.mean_weight_var, self.var_weight_var]) - - def forward(self, inputs): - - batch_mean, batch_var = tf.nn.moments(x=inputs, axes=[0, 1, 2], keepdims=True) - ins_mean, ins_var = tf.nn.moments(x=inputs, axes=[1, 2], keepdims=True) - layer_mean, layer_var = tf.nn.moments(x=inputs, axes=[1, 2, 3], keepdims=True) - - mean_weight = tf.nn.softmax(self.mean_weight_var) - var_weight = tf.nn.softmax(self.var_weight_var) - - mean = mean_weight[0] * batch_mean + mean_weight[1] * ins_mean + mean_weight[2] * layer_mean - var = var_weight[0] * batch_var + var_weight[1] * ins_var + var_weight[2] * layer_var - - inputs = (inputs - mean) / (tf.sqrt(var + self.epsilon)) - outputs = inputs * self.gamma + self.beta - if self.act: - outputs = self.act(outputs) - return outputs diff --git a/tensorlayer/layers/object_detection.py b/tensorlayer/layers/object_detection.py deleted file mode 100644 index 0e1bd01..0000000 --- a/tensorlayer/layers/object_detection.py +++ /dev/null @@ -1,62 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -from tensorlayer.layers.core import Layer - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -from tensorlayer.lazy_imports import LazyImport - -try: - roi_pooling = LazyImport("tensorlayer.third_party.roi_pooling.roi_pooling.roi_pooling_ops") -except Exception as e: - logging.error(e) - logging.error("HINT: 1. https://github.com/deepsense-ai/roi-pooling 2. tensorlayer/third_party/roi_pooling") - -__all__ = [ - 'ROIPooling', -] - - -class ROIPooling(Layer): - """ - The region of interest pooling layer. - - Parameters - ----------- - prev_layer : :class:`Layer` - The previous layer. - rois : tuple of int - Regions of interest in the format of (feature map index, upper left, bottom right). - pool_width : int - The size of the pooling sections. - pool_width : int - The size of the pooling sections. - name : str - A unique layer name. - - Notes - ----------- - - This implementation is imported from `Deepsense-AI `__ . - - Please install it by the instruction `HERE `__. - - """ - - @deprecated_alias(layer='prev_layer', end_support_version=1.9) # TODO remove this line for the 1.9 release - def __init__( - self, - prev_layer, - rois, - pool_height=2, - pool_width=2, - name='roipooling', - ): - super(ROIPooling, self).__init__(prev_layer=prev_layer, name=name) - - logging.info("ROIPooling %s: (%d, %d)" % (self.name, pool_height, pool_width)) - - self.outputs = roi_pooling(self.inputs, rois, pool_height, pool_width) - - self._add_layers(self.outputs) diff --git a/tensorlayer/layers/padding.py b/tensorlayer/layers/padding.py deleted file mode 100644 index aa76fa5..0000000 --- a/tensorlayer/layers/padding.py +++ /dev/null @@ -1,173 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf - -from tensorlayer.layers.core import Layer - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -__all__ = [ - 'PadLayer', - 'ZeroPad1d', - 'ZeroPad2d', - 'ZeroPad3d', -] - - -class PadLayer(Layer): - """The :class:`PadLayer` class is a padding layer for any mode and dimension. - Please see `tf.pad `__ for usage. - - Parameters - ---------- - padding : list of lists of 2 ints, or a Tensor of type int32. - The int32 values to pad. - mode : str - "CONSTANT", "REFLECT", or "SYMMETRIC" (case-insensitive). - name : None or str - A unique layer name. - - Examples - -------- - >>> import tensorflow as tf - >>> import tensorlayer as tl - >>> images = tf.placeholder(tf.float32, [None, 224, 224, 3]) - >>> net = tl.layers.Input(images, name='in') - >>> net = tl.layers.PadLayer(net, [[0, 0], [3, 3], [3, 3], [0, 0]], "REFLECT", name='inpad') - - """ - - def __init__( - self, - padding=None, - mode='CONSTANT', - name=None, #'pad_layer', - ): - # super(PadLayer, self).__init__(prev_layer=prev_layer, name=name) - super().__init__(name) - self.padding = padding - self.mode = mode - - logging.info("PadLayer %s: padding: %s mode: %s" % (self.name, list(self.padding), self.mode)) - - if self.padding is None: - raise Exception( - "padding should be a Tensor of type int32. see https://www.tensorflow.org/api_docs/python/tf/pad" - ) - - def build(self, inputs_shape): - pass - - def forward(self, inputs): - outputs = tf.pad(tensor=inputs, paddings=self.padding, mode=self.mode, name=self.name) - return outputs - - -class ZeroPad1d(Layer): - """ - The :class:`ZeroPad1d` class is a 1D padding layer for signal [batch, length, channel]. - - Parameters - ---------- - padding : int, or tuple of 2 ints - - If int, zeros to add at the beginning and end of the padding dimension (axis 1). - - If tuple of 2 ints, zeros to add at the beginning and at the end of the padding dimension. - name : None or str - A unique layer name. - - """ - - def __init__( - self, - padding, - name=None, #'zeropad1d', - ): - # super(ZeroPad1d, self).__init__(prev_layer=prev_layer, name=name) - self.padding = padding - logging.info("ZeroPad1d %s: padding: %s" % (self.name, str(padding))) - - if not isinstance(self.padding, (int, tuple, dict)): - raise AssertionError() - - def build(self, inputs_shape): - self.layer = tf.keras.layers.ZeroPadding1D(padding=self.padding, name=self.name) - - def forward(self, inputs): - outputs = self.layer(inputs) - - -class ZeroPad2d(Layer): - """ - The :class:`ZeroPad2d` class is a 2D padding layer for image [batch, height, width, channel]. - - Parameters - ---------- - padding : int, or tuple of 2 ints, or tuple of 2 tuples of 2 ints. - - If int, the same symmetric padding is applied to width and height. - - If tuple of 2 ints, interpreted as two different symmetric padding values for height and width as ``(symmetric_height_pad, symmetric_width_pad)``. - - If tuple of 2 tuples of 2 ints, interpreted as ``((top_pad, bottom_pad), (left_pad, right_pad))``. - name : None or str - A unique layer name. - - """ - - def __init__( - self, - padding, - name=None, #'zeropad2d', - ): - # super(ZeroPad2d, self).__init__(prev_layer=prev_layer, name=name) - super().__init__(name) - - self.padding = padding - logging.info("ZeroPad2d %s: padding: %s" % (self.name, str(self.padding))) - - if not isinstance(self.padding, (int, tuple)): - raise AssertionError("Padding should be of type `int` or `tuple`") - - def build(self, inputs_shape): - self.layer = tf.keras.layers.ZeroPadding2D(padding=self.padding, name=self.name) - - def forward(self, inputs): - outputs = self.layer(inputs) - return outputs - - -class ZeroPad3d(Layer): - """ - The :class:`ZeroPad3d` class is a 3D padding layer for volume [batch, depth, height, width, channel]. - - Parameters - ---------- - padding : int, or tuple of 2 ints, or tuple of 2 tuples of 2 ints. - - If int, the same symmetric padding is applied to width and height. - - If tuple of 2 ints, interpreted as two different symmetric padding values for height and width as ``(symmetric_dim1_pad, symmetric_dim2_pad, symmetric_dim3_pad)``. - - If tuple of 2 tuples of 2 ints, interpreted as ``((left_dim1_pad, right_dim1_pad), (left_dim2_pad, right_dim2_pad), (left_dim3_pad, right_dim3_pad))``. - name : None or str - A unique layer name. - - """ - - def __init__( - self, - padding, - name=None, #'zeropad3d', - ): - # super(ZeroPad3d, self).__init__(prev_layer=prev_layer, name=name) - super().__init__(name) - self.padding = padding - - logging.info("ZeroPad3d %s: padding: %s" % (self.name, str(self.padding))) - - if not isinstance(self.padding, (int, tuple)): - raise AssertionError() - - def build(self, inputs_shape): - self.layer = tf.keras.layers.ZeroPadding3D(padding=self.padding, name=self.name) - - def forward(self, inputs): - outputs = self.layer(inputs) - return outputs diff --git a/tensorlayer/layers/pooling.py b/tensorlayer/layers/pooling.py deleted file mode 100644 index c65fc34..0000000 --- a/tensorlayer/layers/pooling.py +++ /dev/null @@ -1,920 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf - -from tensorlayer.layers.core import Layer - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -__all__ = [ - 'PoolLayer', - 'MaxPool1d', - 'MeanPool1d', - 'MaxPool2d', - 'MeanPool2d', - 'MaxPool3d', - 'MeanPool3d', - 'GlobalMaxPool1d', - 'GlobalMeanPool1d', - 'GlobalMaxPool2d', - 'GlobalMeanPool2d', - 'GlobalMaxPool3d', - 'GlobalMeanPool3d', -] - - -class PoolLayer(Layer): - """ - The :class:`PoolLayer` class is a Pooling layer. - You can choose ``tf.nn.max_pool`` and ``tf.nn.avg_pool`` for 2D input or - ``tf.nn.max_pool3d`` and ``tf.nn.avg_pool3d`` for 3D input. - - Parameters - ---------- - ksize : tuple of int - The size of the window for each dimension of the input tensor. - Note that: len(ksize) >= 4. - strides : tuple of int - The stride of the sliding window for each dimension of the input tensor. - Note that: len(strides) >= 4. - padding : str - The padding algorithm type: "SAME" or "VALID". - pool : pooling function - One of ``tf.nn.max_pool``, ``tf.nn.avg_pool``, ``tf.nn.max_pool3d`` and ``f.nn.avg_pool3d``. - See `TensorFlow pooling APIs `__ - name : None or str - A unique layer name. - - Examples - -------- - - see :class:`Conv2dLayer`. - - """ - - def __init__( - self, - ksize=(1, 2, 2, 1), - strides=(1, 2, 2, 1), - padding='SAME', - pool=tf.nn.max_pool, - name=None, #'pool_pro', - ): - # super(PoolLayer, self).__init__(prev_layer=prev_layer, name=name) - super().__init__(name) - self.ksize = ksize - self.strides = strides - self.padding = padding - self.pool = pool - - self.build() - self._built = True - - logging.info( - "PoolLayer %s: ksize: %s strides: %s padding: %s pool: %s" % - (self.name, str(self.ksize), str(self.strides), self.padding, pool.__name__) - ) - - def __repr__(self): - s = '{classname}(pool={poolname}, ksize={strides}, padding={padding}' - if self.name is not None: - s += ', name=\'{name}\'' - s += ')' - return s.format(classname=self.__class__.__name__, poolname=self.pool.__name__, **self.__dict__) - - def build(self, inputs_shape=None): - pass - - def forward(self, inputs): - outputs = self.pool(inputs, ksize=self.ksize, strides=self.strides, padding=self.padding, name=self.name) - return outputs - - -class MaxPool1d(Layer): - """Max pooling for 1D signal. - - Parameters - ---------- - filter_size : tuple of int - Pooling window size. - strides : int - Stride of the pooling operation. - padding : str - The padding method: 'valid' or 'same'. - data_format : str - One of channels_last (default, [batch, length, channel]) or channels_first. The ordering of the dimensions in the inputs. - name : None or str - A unique layer name. - - """ - - def __init__( - self, - filter_size=3, - strides=2, - padding='SAME', - data_format='channels_last', - name=None, #'maxpool1d' - ): - # super(MaxPool1d, self).__init__(prev_layer=prev_layer, name=name) - super().__init__(name) - self.filter_size = filter_size - self.strides = strides - self.padding = padding - self.data_format = data_format - - self.build() - self._built = True - - logging.info( - "MaxPool1d %s: filter_size: %s strides: %s padding: %s" % - (self.name, str(filter_size), str(strides), str(padding)) - ) - - def __repr__(self): - s = ('{classname}(filter_size={filter_size}' - ', strides={strides}, padding={padding}') - if self.name is not None: - s += ', name=\'{name}\'' - s += ')' - return s.format(classname=self.__class__.__name__, **self.__dict__) - - def build(self, inputs_shape=None): - # https://www.tensorflow.org/api_docs/python/tf/nn/pool - if self.data_format == 'channels_last': - self.data_format = 'NWC' - elif self.data_format == 'channels_first': - self.data_format = 'NCW' - else: - raise Exception("unsupported data format") - self.filter_size = [self.filter_size] - self.strides = [self.strides] - - def forward(self, inputs): - """ - prev_layer : :class:`Layer` # NO PREVIOUS LAYER NOW - The previous layer with a output rank as 3 [batch, length(width), channel]. - """ - ## TODO : tf.layers will be removed in TF 2.0 - # outputs = tf.layers.max_pooling1d( - # inputs, self.filter_size, self.strides, padding=self.padding, data_format=self.data_format, name=self.name - # ) - # https://www.tensorflow.org/api_docs/python/tf/nn/pool - # print(self.strides, self.data_format) - outputs = tf.nn.pool( - input=inputs, window_shape=self.filter_size, pooling_type="MAX", padding=self.padding, dilations=None, - strides=self.strides, name=self.name, data_format=self.data_format - ) - return outputs - - -# x = tf.placeholder("float32", [None, 100, 3]) -# n = MaxPool1d(name='sasds') -# print(type(n)) -# n.build(x) -# print(n.strides, n.filter_size, n.data_format) -# # exit() -# y = n.forward(x) -# print(type(y), y)#.outputs) -# exit() - - -class MeanPool1d(Layer): - """Mean pooling for 1D signal. - - Parameters - ------------ - # prev_layer : :class:`Layer` - # The previous layer with a output rank as 3. - filter_size : tuple of int - Pooling window size. - strides : tuple of int - Strides of the pooling operation. - padding : str - The padding method: 'valid' or 'same'. - data_format : str - One of channels_last (default, [batch, length, channel]) or channels_first. The ordering of the dimensions in the inputs. - name : None or str - A unique layer name. - - """ - - # logging.info("MeanPool1d %s: filter_size: %s strides: %s padding: %s" % (self.name, str(filter_size), str(strides), str(padding))) - # outputs = tf.layers.average_pooling1d(prev_layer.outputs, filter_size, strides, padding=padding, data_format=data_format, name=name) - # - # net_new = copy.copy(prev_layer) - # net_new.outputs = outputs - # net_new.all_layers.extend([outputs]) - # return net_new - def __init__( - self, #prev_layer, - filter_size=3, - strides=2, - padding='SAME', - data_format='channels_last', - name=None, #'meanpool1d' - ): - # super(MeanPool1d, self).__init__(prev_layer=prev_layer, name=name) - super().__init__(name) - self.filter_size = filter_size - self.strides = strides - self.padding = padding - self.data_format = data_format - - self.build() - self._built = True - - logging.info( - "MeanPool1d %s: filter_size: %s strides: %s padding: %s" % - (self.name, str(filter_size), str(strides), str(padding)) - ) - - def __repr__(self): - s = ('{classname}(filter_size={filter_size}' - ', strides={strides}, padding={padding}') - if self.name is not None: - s += ', name=\'{name}\'' - s += ')' - return s.format(classname=self.__class__.__name__, **self.__dict__) - - def build(self, inputs_shape=None): - # pass - # https://www.tensorflow.org/api_docs/python/tf/nn/pool - if self.data_format == 'channels_last': - self.data_format == 'NWC' - elif self.data_format == 'channels_first': - self.data_format == 'NCW' - else: - raise Exception("unsupported data format") - self.filter_size = [self.filter_size] - self.strides = [self.strides] - - def forward(self, inputs): - # self.outputs = tf.layers.average_pooling1d( - # prev_layer.outputs, filter_size, strides, padding=padding, data_format=data_format, name=name - # ) - # self._add_layers(self.outputs) - # https://www.tensorflow.org/api_docs/python/tf/nn/pool - outputs = tf.nn.pool( - input=inputs, window_shape=1, pooling_type="AVG", padding=self.padding, dilations=None, - strides=self.strides, name=self.name, data_format=self.data_format - ) - return outputs - - -class MaxPool2d(Layer): - """Max pooling for 2D image. - - Parameters - ----------- - filter_size : tuple of int - (height, width) for filter size. - strides : tuple of int - (height, width) for strides. - padding : str - The padding method: 'valid' or 'same'. - data_format : str - One of channels_last (default, [batch, height, width, channel]) or channels_first. The ordering of the dimensions in the inputs. - name : None or str - A unique layer name. - - """ - - def __init__( - self, - filter_size=(3, 3), - strides=(2, 2), - padding='SAME', - data_format='channels_last', - name=None, #'maxpool2d' - ): - if strides is None: - strides = filter_size - - # super(MaxPool2d, self).__init__(prev_layer=prev_layer, name=name) - super().__init__(name) - self.filter_size = filter_size - self.strides = strides - self.padding = padding - self.data_format = data_format - - self.build() - self._built = True - - logging.info( - "MaxPool2d %s: filter_size: %s strides: %s padding: %s" % - (self.name, str(filter_size), str(strides), str(padding)) - ) - - def __repr__(self): - s = ('{classname}(filter_size={filter_size}' - ', strides={strides}, padding={padding}') - if self.name is not None: - s += ', name=\'{name}\'' - s += ')' - return s.format(classname=self.__class__.__name__, **self.__dict__) - - def build(self, inputs_shape=None): - self.strides = [1, self.strides[0], self.strides[1], 1] - if self.data_format == 'channels_last': - self.data_format = 'NHWC' - elif self.data_format == 'channels_first': - self.data_format = 'NCHW' - else: - raise Exception("unsupported data format") - - def forward(self, inputs): - """ - prev_layer : :class:`Layer` - The previous layer with a output rank as 4. - """ - # outputs = tf.layers.max_pooling2d( - # inputs, filter_size, strides, padding=padding, data_format=data_format, name=name - # ) - outputs = tf.nn.max_pool(inputs, ksize=self.strides, strides=self.strides, padding=self.padding, name=self.name) - # net = PoolLayer(net, ksize=[1, filter_size[0], filter_size[1], 1], - # strides=[1, strides[0], strides[1], 1], - # padding=padding, - # pool = tf.nn.max_pool, - # name = name) - return outputs - - -class MeanPool2d(Layer): - """Mean pooling for 2D image [batch, height, width, channel]. - - Parameters - ----------- - # prev_layer : :class:`Layer` - # The previous layer with a output rank as 4 [batch, height, width, channel]. - filter_size : tuple of int - (height, width) for filter size. - strides : tuple of int - (height, width) for strides. - padding : str - The padding method: 'valid' or 'same'. - data_format : str - One of channels_last (default, [batch, height, width, channel]) or channels_first. The ordering of the dimensions in the inputs. - name : None or str - A unique layer name. - - """ - - def __init__( - self, #prev_layer, - filter_size=(3, 3), - strides=(2, 2), - padding='SAME', - data_format='channels_last', - name=None, #'meanpool2d' - ): - if strides is None: - strides = filter_size - - # super(MeanPool2d, self).__init__(prev_layer=prev_layer, name=name) - super().__init__(name) - self.filter_size = filter_size - self.strides = strides - self.padding = padding - self.data_format = data_format - - self.build() - self._built = True - - logging.info( - "MeanPool2d %s: filter_size: %s strides: %s padding: %s" % - (self.name, str(filter_size), str(strides), str(padding)) - ) - - def __repr__(self): - s = ('{classname}(filter_size={filter_size}' - ', strides={strides}, padding={padding}') - if self.name is not None: - s += ', name=\'{name}\'' - s += ')' - return s.format(classname=self.__class__.__name__, **self.__dict__) - - def build(self, inputs_shape=None): - self.strides = [1, self.strides[0], self.strides[1], 1] - if self.data_format == 'channels_last': - self.data_format = 'NHWC' - elif self.data_format == 'channels_first': - self.data_format = 'NCHW' - else: - raise Exception("unsupported data format") - - def forward(self, inputs): - """ - prev_layer : :class:`Layer` - The previous layer with a output rank as 4. - """ - # self.outputs = tf.layers.average_pooling2d( - # self.inputs, filter_size, strides, padding=padding, data_format=data_format, name=name - # ) - # self._add_layers(self.outputs) - outputs = tf.nn.avg_pool(inputs, ksize=self.strides, strides=self.strides, padding=self.padding, name=self.name) - return outputs - - -class MaxPool3d(Layer): - """Max pooling for 3D volume. - - Parameters - ------------ - # prev_layer : :class:`Layer` - # The previous layer with a output rank as 5. - filter_size : tuple of int - Pooling window size. - strides : tuple of int - Strides of the pooling operation. - padding : str - The padding method: 'valid' or 'same'. - data_format : str - One of channels_last (default, [batch, depth, height, width, channel]) or channels_first. The ordering of the dimensions in the inputs. - name : None or str - A unique layer name. - - Returns - ------- - :class:`Layer` - A max pooling 3-D layer with a output rank as 5. - - """ - - def __init__( - self, #prev_layer, - filter_size=(3, 3, 3), - strides=(2, 2, 2), - padding='valid', - data_format='channels_last', - name=None, #'maxpool3d' - ): - # super(MaxPool3d, self).__init__(prev_layer=prev_layer, name=name) - super().__init__(name) - self.filter_size = filter_size - self.strides = strides - self.padding = padding - self.data_format = data_format - - self.build() - self._built = True - - logging.info( - "MaxPool3d %s: filter_size: %s strides: %s padding: %s" % - (self.name, str(filter_size), str(strides), str(padding)) - ) - - def __repr__(self): - s = ('{classname}(filter_size={filter_size}' - ', strides={strides}, padding={padding}') - if self.name is not None: - s += ', name=\'{name}\'' - s += ')' - return s.format(classname=self.__class__.__name__, **self.__dict__) - - def build(self, inputs_shape=None): - self.strides = [1, self.strides[0], self.strides[1], self.strides[2], 1] - if self.data_format == 'channels_last': - self.data_format = 'NDHWC' - elif self.data_format == 'channels_first': - self.data_format = 'NCDHW' - else: - raise Exception("unsupported data format") - - def forward(self, inputs): - """ - prev_layer : :class:`Layer` - The previous layer with a output rank as 5. - """ - # self.outputs = tf.layers.max_pooling3d( - # self.inputs, filter_size, strides, padding=padding, data_format=data_format, name=name - # ) - # self._add_layers(self.outputs) - outputs = tf.nn.max_pool3d( - input=inputs, - ksize=self.filter_size, - strides=self.strides, - padding=self.filter_size, - data_format=self.data_format, - name=self.name, - ) - return outputs - - -class MeanPool3d(Layer): - """Mean pooling for 3D volume. - - Parameters - ------------ - prev_layer : :class:`Layer` - The previous layer with a output rank as 5. - filter_size : tuple of int - Pooling window size. - strides : tuple of int - Strides of the pooling operation. - padding : str - The padding method: 'valid' or 'same'. - data_format : str - One of channels_last (default, [batch, depth, height, width, channel]) or channels_first. The ordering of the dimensions in the inputs. - name : None or str - A unique layer name. - - Returns - ------- - :class:`Layer` - A mean pooling 3-D layer with a output rank as 5. - - """ - - def __init__( - self, #prev_layer, - filter_size=(3, 3, 3), - strides=(2, 2, 2), - padding='valid', - data_format='channels_last', - name=None, #'meanpool3d' - ): - - # super(MeanPool3d, self).__init__(prev_layer=prev_layer, name=name) - super().__init__(name) - self.filter_size = filter_size - self.strides = strides - self.padding = padding - self.data_format = data_format - - self.build() - self._built = True - - logging.info( - "MeanPool3d %s: filter_size: %s strides: %s padding: %s" % - (self.name, str(filter_size), str(strides), str(padding)) - ) - - def __repr__(self): - s = ('{classname}(filter_size={filter_size}' - ', strides={strides}, padding={padding}') - if self.name is not None: - s += ', name=\'{name}\'' - s += ')' - return s.format(classname=self.__class__.__name__, **self.__dict__) - - def build(self, inputs_shape=None): - self.strides = [1, self.strides[0], self.strides[1], self.strides[2], 1] - if self.data_format == 'channels_last': - self.data_format = 'NDHWC' - elif self.data_format == 'channels_first': - self.data_format = 'NCDHW' - else: - raise Exception("unsupported data format") - - def forward(self, inputs): - """ - prev_layer : :class:`Layer` - The previous layer with a output rank as 5. - """ - # self.outputs = tf.layers.average_pooling3d( - # prev_layer.outputs, filter_size, strides, padding=padding, data_format=data_format, name=name - # ) - # self._add_layers(self.outputs) - outputs = tf.nn.avg_pool3d( - input=inputs, - ksize=self.filter_size, - strides=self.strides, - padding=self.filter_size, - data_format=self.data_format, - name=self.name, - ) - return outputs - - -class GlobalMaxPool1d(Layer): - """The :class:`GlobalMaxPool1d` class is a 1D Global Max Pooling layer. - - Parameters - ------------ - data_format : str - One of channels_last (default, [batch, length, channel]) or channels_first. The ordering of the dimensions in the inputs. - name : None or str - A unique layer name. - - Examples - --------- - >>> x = tf.placeholder("float32", [None, 100, 30]) - >>> n = Input(x, name='in') - >>> n = GlobalMaxPool1d(n) - [None, 30] - """ - - def __init__(self, data_format="channels_last", name=None): #'globalmaxpool1d'): - # super(GlobalMaxPool1d, self).__init__(prev_layer=prev_layer, name=name) - - self.build() - self._built = True - - logging.info("GlobalMaxPool1d %s" % self.name) - - def __repr__(self): - s = '{classname}(' - if self.name is not None: - s += 'name=\'{name}\'' - s += ')' - return s.format(classname=self.__class__.__name__, **self.__dict__) - - def build(self, inputs_shape=None): - pass - - def forward(self, inputs): - """ - prev_layer : :class:`Layer` - The previous layer with a output rank as 3 [batch, length, channel] or [batch, channel, length]. - """ - if self.data_format == 'channels_last': - outputs = tf.reduce_max(input_tensor=inputs, axis=1, name=self.name) - elif self.data_format == 'channels_first': - self.outputs = tf.reduce_max(input_tensor=self.inputs, axis=2, name=self.name) - else: - raise ValueError( - "`data_format` should have one of the following values: [`channels_last`, `channels_first`]" - ) - return outputs - - -class GlobalMeanPool1d(Layer): - """The :class:`GlobalMeanPool1d` class is a 1D Global Mean Pooling layer. - - Parameters - ------------ - data_format : str - One of channels_last (default, [batch, length, channel]) or channels_first. The ordering of the dimensions in the inputs. - name : None or str - A unique layer name. - - Examples - --------- - >>> import tensorflow as tf - >>> import tensorlayer as tl - >>> x = tf.placeholder("float32", [None, 100, 30]) - >>> n = tl.layers.Input(x, name='in') - >>> n = tl.layers.GlobalMeanPool1d(n) - [None, 30] - """ - - def __init__(self, data_format='channels_last', name=None): #'globalmeanpool1d'): - # super(GlobalMeanPool1d, self).__init__(prev_layer=prev_layer, name=name) - super().__init__(name) - self.data_format = data_format - - self.build() - self._built = True - - logging.info("GlobalMeanPool1d %s" % self.name) - - def __repr__(self): - s = '{classname}(' - if self.name is not None: - s += 'name=\'{name}\'' - s += ')' - return s.format(classname=self.__class__.__name__, **self.__dict__) - - def build(self, inputs_shape=None): - pass - - def forward(self, inputs): - """ - prev_layer : :class:`Layer` - The previous layer with a output rank as 3 [batch, length, channel] or [batch, channel, length]. - """ - if self.data_format == 'channels_last': - outputs = tf.reduce_mean(input_tensor=inputs, axis=1, name=self.name) - elif self.data_format == 'channels_first': - outputs = tf.reduce_mean(input_tensor=inputs, axis=2, name=self.name) - else: - raise ValueError( - "`data_format` should have one of the following values: [`channels_last`, `channels_first`]" - ) - return outputs - - -class GlobalMaxPool2d(Layer): - """The :class:`GlobalMaxPool2d` class is a 2D Global Max Pooling layer. - - Parameters - ------------ - data_format : str - One of channels_last (default, [batch, height, width, channel]) or channels_first. The ordering of the dimensions in the inputs. - name : None or str - A unique layer name. - - Examples - --------- - >>> import tensorflow as tf - >>> import tensorlayer as tl - >>> x = tf.placeholder("float32", [None, 100, 100, 30]) - >>> n = tl.layers.Input(x, name='in2') - >>> n = tl.layers.GlobalMaxPool2d(n) - [None, 30] - """ - - def __init__(self, data_format='channels_last', name=None): #'globalmaxpool2d'): - # super(GlobalMaxPool2d, self).__init__(prev_layer=prev_layer, name=name) - super().__init__(name) - self.data_format = data_format - - self.build() - self._built = True - - logging.info("GlobalMaxPool2d %s" % self.name) - - def __repr__(self): - s = '{classname}(' - if self.name is not None: - s += 'name=\'{name}\'' - s += ')' - return s.format(classname=self.__class__.__name__, **self.__dict__) - - def build(self, inputs_shape=None): - pass - - def forward(self, inputs): - """ - prev_layer : :class:`Layer` - The previous layer with a output rank as 4 [batch, height, width, channel] or [batch, channel, height, width]. - """ - if self.data_format == 'channels_last': - outputs = tf.reduce_max(input_tensor=inputs, axis=[1, 2], name=self.name) - elif self.data_format == 'channels_first': - outputs = tf.reduce_max(input_tensor=inputs, axis=[2, 3], name=self.name) - else: - raise ValueError( - "`data_format` should have one of the following values: [`channels_last`, `channels_first`]" - ) - return outputs - - -class GlobalMeanPool2d(Layer): - """The :class:`GlobalMeanPool2d` class is a 2D Global Mean Pooling layer. - - Parameters - ------------ - data_format : str - One of channels_last (default, [batch, height, width, channel]) or channels_first. The ordering of the dimensions in the inputs. - name : None or str - A unique layer name. - - Examples - --------- - >>> import tensorflow as tf - >>> import tensorlayer as tl - >>> x = tf.placeholder("float32", [None, 100, 100, 30]) - >>> n = tl.layers.Input(x, name='in2') - >>> n = tl.layers.GlobalMeanPool2d(n) - [None, 30] - """ - - def __init__(self, data_format='channels_last', name=None): #'globalmeanpool2d'): - # super(GlobalMeanPool2d, self).__init__(prev_layer=prev_layer, name=name) - super().__init__(name) - - self.build() - self._built = True - - logging.info("GlobalMeanPool2d %s" % self.name) - - def __repr__(self): - s = '{classname}(' - if self.name is not None: - s += 'name=\'{name}\'' - s += ')' - return s.format(classname=self.__class__.__name__, **self.__dict__) - - def build(self, inputs_shape=None): - pass - - def forward(self, inputs): - """ - prev_layer : :class:`Layer` - The previous layer with a output rank as 4 [batch, height, width, channel] or [batch, channel, height, width]. - """ - if self.data_format == 'channels_last': - outputs = tf.reduce_mean(input_tensor=inputs, axis=[1, 2], name=self.name) - elif self.data_format == 'channels_first': - outputs = tf.reduce_mean(input_tensor=inputs, axis=[2, 3], name=self.name) - else: - raise ValueError( - "`data_format` should have one of the following values: [`channels_last`, `channels_first`]" - ) - return outputs - - -class GlobalMaxPool3d(Layer): - """The :class:`GlobalMaxPool3d` class is a 3D Global Max Pooling layer. - - Parameters - ------------ - data_format : str - One of channels_last (default, [batch, depth, height, width, channel]) or channels_first. The ordering of the dimensions in the inputs. - name : None or str - A unique layer name. - - Examples - --------- - >>> import tensorflow as tf - >>> import tensorlayer as tl - >>> x = tf.placeholder("float32", [None, 100, 100, 100, 30]) - >>> n = tl.layers.Input(x, name='in') - >>> n = tl.layers.GlobalMaxPool3d(n) - [None, 30] - """ - - def __init__(self, data_format='channels_last', name=None): #'globalmaxpool3d'): - # super(GlobalMaxPool3d, self).__init__(prev_layer=prev_layer, name=name) - super().__init__(name) - self.data_format = data_format - - self.build() - self._built = True - - logging.info("GlobalMaxPool3d %s" % self.name) - - def __repr__(self): - s = '{classname}(' - if self.name is not None: - s += ', name=\'{name}\'' - s += ')' - return s.format(classname=self.__class__.__name__, **self.__dict__) - - def build(self, inputs_shape=None): - pass - - def forward(self, inputs): - """ - prev_layer : :class:`Layer` - The previous layer with a output rank as 5 [batch, depth, height, width, channel] or [batch, channel, depth, height, width]. - """ - if self.data_format == 'channels_last': - outputs = tf.reduce_max(input_tensor=inputs, axis=[1, 2, 3], name=self.name) - elif data_format == 'channels_first': - outputs = tf.reduce_max(input_tensor=inputs, axis=[2, 3, 4], name=self.name) - else: - raise ValueError( - "`data_format` should have one of the following values: [`channels_last`, `channels_first`]" - ) - return outputs - - -class GlobalMeanPool3d(Layer): - """The :class:`GlobalMeanPool3d` class is a 3D Global Mean Pooling layer. - - Parameters - ------------ - data_format : str - One of channels_last (default, [batch, depth, height, width, channel]) or channels_first. The ordering of the dimensions in the inputs. - name : None or str - A unique layer name. - - Examples - --------- - >>> import tensorflow as tf - >>> import tensorlayer as tl - >>> x = tf.placeholder("float32", [None, 100, 100, 100, 30]) - >>> n = tl.layers.Input(x, name='in') - >>> n = tl.layers.GlobalMeanPool2d(n) - [None, 30] - """ - - def __init__(self, data_format='channels_last', name=None): #'globalmeanpool3d'): - # super(GlobalMeanPool3d, self).__init__(prev_layer=prev_layer, name=name) - super().__init__(name) - self.data_format = data_format - - self.build() - self._built = True - - logging.info("GlobalMeanPool3d %s" % self.name) - - def __repr__(self): - s = '{classname}(' - if self.name is not None: - s += 'name=\'{name}\'' - s += ')' - return s.format(classname=self.__class__.__name__, **self.__dict__) - - def build(self, inputs_shape=None): - pass - - def forward(self, inputs): - """ - prev_layer : :class:`Layer` - The previous layer with a output rank as 5 [batch, depth, height, width, channel] or [batch, channel, depth, height, width]. - """ - if self.data_format == 'channels_last': - outputs = tf.reduce_mean(input_tensor=inputs, axis=[1, 2, 3], name=self.name) - elif self.data_format == 'channels_first': - outputs = tf.reduce_mean(input_tensor=inputs, axis=[2, 3, 4], name=self.name) - else: - raise ValueError( - "`data_format` should have one of the following values: [`channels_last`, `channels_first`]" - ) - return outputs diff --git a/tensorlayer/layers/quantize.py b/tensorlayer/layers/quantize.py deleted file mode 100644 index a012415..0000000 --- a/tensorlayer/layers/quantize.py +++ /dev/null @@ -1,50 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf - -from tensorlayer.layers.core import Layer - -from tensorlayer.layers.utils import quantize - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -__all__ = [ - 'Sign', -] - - -class Sign(Layer): - """The :class:`SignLayer` class is for quantizing the layer outputs to -1 or 1 while inferencing. - - Parameters - ---------- - # prev_layer : :class:`Layer` - # Previous layer. - name : a str - A unique layer name. - - """ - - @deprecated_alias(layer='prev_layer', end_support_version=1.9) # TODO remove this line for the 1.9 release - def __init__( - self, - # prev_layer, - name=None, #'sign', - ): - # super(Sign, self).__init__(prev_layer=prev_layer, name=name) - super().__init__(name) - logging.info("Sign %s" % self.name) - - def build(self, inputs_shape): - pass - - def forward(inputs): - # with tf.variable_scope(name): - ## self.outputs = tl.act.sign(self.inputs) - # self.outputs = quantize(self.inputs) - outputs = quantize(inputs) - return outputs - # self._add_layers(self.outputs) diff --git a/tensorlayer/layers/recurrent.py b/tensorlayer/layers/recurrent.py deleted file mode 100644 index a00c71d..0000000 --- a/tensorlayer/layers/recurrent.py +++ /dev/null @@ -1,1714 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf -import tensorlayer as tl - -# from tensorflow.python.ops import array_ops -# from tensorflow.python.util.tf_inspect import getfullargspec -# from tensorflow.contrib.rnn import stack_bidirectional_dynamic_rnn -# from tensorflow.python.ops.rnn_cell import LSTMStateTuple - -from tensorlayer.layers.core import Layer -# from tensorlayer.layers.core import LayersConfig -# from tensorlayer.layers.core import TF_GRAPHKEYS_VARIABLES - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -# TODO: uncomment -__all__ = [ - 'RNN', - # 'BiRNN', - # 'ConvRNNCell', - # 'BasicConvLSTMCell', - # 'ConvLSTM', - # 'advanced_indexing_op', - # 'retrieve_seq_length_op', - # 'retrieve_seq_length_op2', - # 'retrieve_seq_length_op3', - # 'target_mask_op', - # 'DynamicRNN', - # 'BiDynamicRNN', - # 'Seq2Seq', -] - - -class RNN(Layer): - # TODO: documents - """ - The :class:`RNN` class is a fixed length recurrent layer for implementing vanilla RNN, - LSTM, GRU and etc. - - Parameters - ---------- - cell_fn : TensorFlow cell function - A TensorFlow core RNN cell - - See `RNN Cells in TensorFlow `__ - - Note TF1.0+ and TF1.0- are different - cell_init_args : dictionary - The arguments for the cell function. - n_hidden : int - The number of hidden units in the layer. - initializer : initializer - The initializer for initializing the model parameters. - n_steps : int or None - The fixed sequence length. If None, `n_steps` is automatically decided by inputs. - In dynamic eager mode, `n_steps` can be updated when it is called in customised forward(). - initial_state : None or RNN State - If None, `initial_state` is zero state. - In dynamic eager mode, `initial_state` can be updated when it is called in customised forward(). - return_last : boolean - Whether return last output or all outputs in each step. - - If True, return the last output, "Sequence input and single output" - - If False, return all outputs, "Synced sequence input and output" - - In other word, if you want to stack more RNNs on this layer, set to False. - In dynamic eager mode, `return_last` can be updated when it is called in customised forward(). - return_seq_2d : boolean - Only consider this argument when `return_last` is `False` - - If True, return 2D Tensor [n_example, n_hidden], for stacking DenseLayer after it. - - If False, return 3D Tensor [n_example/n_steps, n_steps, n_hidden], for stacking multiple RNN after it. - In dynamic eager mode, `return_seq_2d` can be updated when it is called in customised forward(). - name : str - A unique layer name. - - Attributes - ---------- - outputs : Tensor - The output of this layer. - - final_state : Tensor or StateTuple - The finial state of this layer. - - When `state_is_tuple` is `False`, it is the final hidden and cell states, `states.get_shape() = [?, 2 * n_hidden]`. - - When `state_is_tuple` is `True`, it stores two elements: `(c, h)`. - - In practice, you can get the final state after each iteration during training, then feed it to the initial state of next iteration. - - initial_state : Tensor or StateTuple - The initial state of this layer. - - In practice, you can set your state at the begining of each epoch or iteration according to your training procedure. - - batch_size : int or Tensor - It is an integer, if it is able to compute the `batch_size`; otherwise, tensor for dynamic batch size. - - Examples - -------- - - For synced sequence input and output, see `PTB example `__ - - - For encoding see below. - - >>> import tensorflow as tf - >>> import tensorlayer as tl - >>> batch_size = 32 - >>> num_steps = 5 - >>> vocab_size = 3000 - >>> hidden_size = 256 - >>> keep_prob = 0.8 - >>> is_train = True - >>> input_data = tf.placeholder(tf.int32, [batch_size, num_steps]) - >>> net = tl.layers.EmbeddingInput(inputs=input_data, vocabulary_size=vocab_size, - ... embedding_size=hidden_size, name='embed') - >>> net = tl.layers.Dropout(net, keep=keep_prob, is_fix=True, is_train=is_train, name='drop1') - >>> net = tl.layers.RNN(net, cell_fn=tf.contrib.rnn.BasicLSTMCell, - ... n_hidden=hidden_size, n_steps=num_steps, return_last=False, name='lstm1') - >>> net = tl.layers.Dropout(net, keep=keep_prob, is_fix=True, is_train=is_train, name='drop2') - >>> net = tl.layers.RNN(net, cell_fn=tf.contrib.rnn.BasicLSTMCell, - ... n_hidden=hidden_size, n_steps=num_steps, return_last=True, name='lstm2') - >>> net = tl.layers.Dropout(net, keep=keep_prob, is_fix=True, is_train=is_train, name='drop3') - >>> net = tl.layers.Dense(net, n_units=vocab_size, name='output') - - - For CNN+LSTM - - >>> image_size = 100 - >>> batch_size = 10 - >>> num_steps = 5 - >>> x = tf.placeholder(tf.float32, shape=[batch_size, image_size, image_size, 1]) - >>> net = tl.layers.Input(x, name='in') - >>> net = tl.layers.Conv2d(net, 32, (5, 5), (2, 2), tf.nn.relu, name='cnn1') - >>> net = tl.layers.MaxPool2d(net, (2, 2), (2, 2), name='pool1') - >>> net = tl.layers.Conv2d(net, 10, (5, 5), (2, 2), tf.nn.relu, name='cnn2') - >>> net = tl.layers.MaxPool2d(net, (2, 2), (2, 2), name='pool2') - >>> net = tl.layers.Flatten(net, name='flatten') - >>> net = tl.layers.Reshape(net, shape=[-1, num_steps, int(net.outputs._shape[-1])]) - >>> rnn = tl.layers.RNN(net, cell_fn=tf.contrib.rnn.BasicLSTMCell, n_hidden=200, n_steps=num_steps, return_last=False, return_seq_2d=True, name='rnn') - >>> net = tl.layers.Dense(rnn, 3, name='out') - - Notes - ----- - Input dimension should be rank 3 : [batch_size, n_steps, n_features], if no, please see :class:`ReshapeLayer`. - - References - ---------- - - `Neural Network RNN Cells in TensorFlow `__ - - `tensorflow/python/ops/rnn.py `__ - - `tensorflow/python/ops/rnn_cell.py `__ - - see TensorFlow tutorial ``ptb_word_lm.py``, TensorLayer tutorials ``tutorial_ptb_lstm*.py`` and ``tutorial_generate_text.py`` - - """ - - def __init__( - self, - cell_fn, - cell_init_args=None, - n_hidden=100, - initializer=tl.initializers.random_uniform(-0.1, 0.1), - n_steps=None, - initial_state=None, - return_last=False, - return_seq_2d=False, - name=None, # 'rnn' - ): - - if cell_fn is None: - raise Exception("Please put in cell_fn") - - super(RNN, self).__init__(name=name) - - self.cell_fn = cell_fn - self.cell_init_args = cell_init_args - self.n_hidden = n_hidden - self.initializer = initializer - self.n_steps = n_steps - self.initial_state = initial_state - self.return_last = return_last - self.return_seq_2d = return_seq_2d - - if 'GRU' in cell_fn.__name__: - try: - self.cell_init_args.pop('state_is_tuple') - except Exception: - logging.warning('pop state_is_tuple fails.') - - logging.info( - "RNN %s: n_hidden: %d, n_steps: %s, cell_fn: %s " % - (self.name, n_hidden, n_steps if n_steps is not None else 'not_specified', cell_fn.__name__) - ) - - ''' - fixed_batch_size = self.inputs.get_shape().with_rank_at_least(1)[0] - - if fixed_batch_size.value: - batch_size = fixed_batch_size.value - logging.info(" RNN batch_size (concurrent processes): %d" % batch_size) - - else: - batch_size = array_ops.shape(self.inputs)[0] - logging.info(" non specified batch_size, uses a tensor instead.") - - ''' - - # Simplified version of tensorflow.models.rnn.rnn.py's rnn(). - # This builds an unrolled LSTM for tutorial purposes only. - # In general, use the rnn() or state_saving_rnn() from rnn.py. - # - # The alternative version of the code below is: - # - # from tensorflow.models.rnn import rnn - # inputs = [tf.squeeze(input_, [1]) - # for input_ in tf.split(1, num_steps, inputs)] - # outputs, state = rnn.rnn(cell, inputs, initial_state=self._initial_state) - - def __repr__(self): - s = ('{classname}(cell={cellname},n_hidden={n_hidden},n_steps={n_steps}') - if self.name is not None: - s += ', name={name}' - s += ')' - return s.format(classname=self.__class__.__name__, cellname=self.cell_fn.__name__, **self.__dict__) - - def build(self, inputs_shape): - """ - Parameters - ---------- - inputs_shape : tuple - the shape of inputs tensor - """ - # Input dimension should be rank 3 [batch_size, n_steps(max), n_features] - if len(inputs_shape) != 3: - raise Exception("RNN : Input dimension should be rank 3 : [batch_size, n_steps, n_features]") - - # if 'reuse' in getfullargspec(self.cell_fn.__init__).args: - # self.cell = self.cell_fn( - # num_units=self.n_hidden, reuse=tf.compat.v1.get_variable_scope().reuse, **self.cell_init_args - # ) - # else: - self.cell = self.cell_fn(num_units=self.n_hidden, **self.cell_init_args) - - if self._weights is None: - self._weights = list() - self._weights.append(self.cell.weights) - - def forward(self, inputs, **kwargs): - """ - Parameters - ---------- - inputs : input tensor - The input of a network - **kwargs: dict - Some attributes can be updated during forwarding - such as `initial_state`, `n_steps`, `return_last`, `return_seq_2d`. - """ - - if kwargs: - for attr in kwargs: - setattr(self, attr, kwargs[attr]) - - if self.return_last: - outputs = [-1] - else: - outputs = list() - - state = self.cell.zero_state(self._inputs_shape[0], dtype=tf.float32) if self.initial_state is None \ - else self.initial_state - - total_steps = self.n_steps if self.n_steps is not None else self._inputs_shape[1] - - for time_step in range(total_steps): - (cell_output, state) = self.cell(self.inputs[:, time_step, :], state) - if self.return_last: - outputs[-1] = cell_output - else: - outputs.append(cell_output) - - if self.return_last: - outputs = outputs[-1] - else: - if self.return_seq_2d: - # PTB tutorial: stack dense layer after that, or compute the cost from the output - # 2D Tensor [batch_size * n_steps, n_hidden] - outputs = tf.reshape(tf.concat(outputs, 1), [-1, self.n_hidden]) - else: - # : stack more RNN layer after that - # 3D Tensor [batch_size, n_steps, n_hidden] - outputs = tf.reshape(tf.concat(outputs, 1), [-1, self.n_steps, self.n_hidden]) - - self.final_state = state - - ''' - with tf.compat.v1.variable_scope(name, initializer=initializer) as vs: - for time_step in range(n_steps): - if time_step > 0: tf.compat.v1.get_variable_scope().reuse_variables() - (cell_output, state) = cell(self.inputs[:, time_step, :], state) - outputs.append(cell_output) - - # Retrieve just the RNN variables. - # rnn_variables = [v for v in tf.all_variables() if v.name.startswith(vs.name)] - rnn_variables = tf.compat.v1.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) - - logging.info(" n_params : %d" % (len(rnn_variables))) - - if return_last: - # 2D Tensor [batch_size, n_hidden] - self.outputs = outputs[-1] - else: - if return_seq_2d: - # PTB tutorial: stack dense layer after that, or compute the cost from the output - # 2D Tensor [n_example, n_hidden] - - self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, n_hidden]) - - else: - # : stack more RNN layer after that - # 3D Tensor [n_example/n_steps, n_steps, n_hidden] - - self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, n_steps, n_hidden]) - - self.final_state = state - ''' - return outputs - - - -class BiRNN(Layer): - """ - The :class:`BiRNN` class is a fixed length Bidirectional recurrent layer. - - Parameters - ---------- - prev_layer : :class:`Layer` - Previous layer. - cell_fn : TensorFlow cell function - A TensorFlow core RNN cell. - - See `RNN Cells in TensorFlow `__. - - Note TF1.0+ and TF1.0- are different. - cell_init_args : dictionary or None - The arguments for the cell function. - n_hidden : int - The number of hidden units in the layer. - initializer : initializer - The initializer for initializing the model parameters. - n_steps : int - The fixed sequence length. - fw_initial_state : None or forward RNN State - If None, `initial_state` is zero state. - bw_initial_state : None or backward RNN State - If None, `initial_state` is zero state. - dropout : tuple of float or int - The input and output keep probability (input_keep_prob, output_keep_prob). - If one int, input and output keep probability are the same. - n_layer : int - The number of RNN layers, default is 1. - return_last : boolean - Whether return last output or all outputs in each step. - - If True, return the last output, "Sequence input and single output" - - If False, return all outputs, "Synced sequence input and output" - - In other word, if you want to stack more RNNs on this layer, set to False. - return_seq_2d : boolean - Only consider this argument when `return_last` is `False` - - If True, return 2D Tensor [n_example, n_hidden], for stacking DenseLayer after it. - - If False, return 3D Tensor [n_example/n_steps, n_steps, n_hidden], for stacking multiple RNN after it. - name : str - A unique layer name. - - Attributes - ---------- - outputs : tensor - The output of this layer. - fw(bw)_final_state : tensor or StateTuple - The finial state of this layer. - - When `state_is_tuple` is `False`, it is the final hidden and cell states, `states.get_shape() = [?, 2 * n_hidden]`. - - When `state_is_tuple` is `True`, it stores two elements: `(c, h)`. - - In practice, you can get the final state after each iteration during training, then feed it to the initial state of next iteration. - fw(bw)_initial_state : tensor or StateTuple - The initial state of this layer. - - In practice, you can set your state at the begining of each epoch or iteration according to your training procedure. - batch_size : int or tensor - It is an integer, if it is able to compute the `batch_size`; otherwise, tensor for dynamic batch size. - - Notes - ----- - Input dimension should be rank 3 : [batch_size, n_steps, n_features]. If not, please see :class:`ReshapeLayer`. - For predicting, the sequence length has to be the same with the sequence length of training, while, for normal - RNN, we can use sequence length of 1 for predicting. - - References - ---------- - `Source `__ - - """ - - @deprecated_alias(layer='prev_layer', end_support_version=1.9) # TODO remove this line for the 1.9 release - def __init__( - self, - prev_layer, - cell_fn, - cell_init_args=None, - n_hidden=100, - initializer=tf.compat.v1.initializers.random_uniform(-0.1, 0.1), - n_steps=5, - fw_initial_state=None, - bw_initial_state=None, - dropout=None, - n_layer=1, - return_last=False, - return_seq_2d=False, - name='birnn', - ): - super(BiRNN, self).__init__(prev_layer=prev_layer, cell_init_args=cell_init_args, name=name) - - if self.cell_init_args: - self.cell_init_args['state_is_tuple'] = True # 'use_peepholes': True, - - if 'GRU' in cell_fn.__name__: - try: - self.cell_init_args.pop('state_is_tuple') - except Exception: - logging.warning("pop state_is_tuple fails.") - - if cell_fn is None: - raise Exception("Please put in cell_fn") - - logging.info( - "BiRNN %s: n_hidden: %d n_steps: %d in_dim: %d in_shape: %s cell_fn: %s dropout: %s n_layer: %d " % ( - self.name, n_hidden, n_steps, self.inputs.get_shape().ndims, self.inputs.get_shape(), cell_fn.__name__, - dropout, n_layer - ) - ) - - fixed_batch_size = self.inputs.get_shape().with_rank_at_least(1)[0] - - if fixed_batch_size.value: - self.batch_size = fixed_batch_size.value - logging.info(" RNN batch_size (concurrent processes): %d" % self.batch_size) - - else: - self.batch_size = array_ops.shape(self.inputs)[0] - logging.info(" non specified batch_size, uses a tensor instead.") - - # Input dimension should be rank 3 [batch_size, n_steps(max), n_features] - try: - self.inputs.get_shape().with_rank(3) - except Exception: - raise Exception("RNN : Input dimension should be rank 3 : [batch_size, n_steps, n_features]") - - with tf.compat.v1.variable_scope(name, initializer=initializer) as vs: - rnn_creator = lambda: cell_fn(num_units=n_hidden, **self.cell_init_args) - # Apply dropout - if dropout: - - if isinstance(dropout, (tuple, list)): # type(dropout) in [tuple, list]: - in_keep_prob = dropout[0] - out_keep_prob = dropout[1] - - elif isinstance(dropout, float): - in_keep_prob, out_keep_prob = dropout, dropout - - else: - raise Exception("Invalid dropout type (must be a 2-D tuple of " "float)") - - DropoutWrapper_fn = tf.contrib.rnn.DropoutWrapper - - cell_creator = lambda is_last=True: DropoutWrapper_fn( - rnn_creator(), input_keep_prob=in_keep_prob, output_keep_prob=out_keep_prob if is_last else 1.0 - ) - - else: - cell_creator = rnn_creator - - self.fw_cell = cell_creator() - self.bw_cell = cell_creator() - - # Apply multiple layers - if n_layer > 1: - MultiRNNCell_fn = tf.contrib.rnn.MultiRNNCell - - if dropout: - try: - self.fw_cell = MultiRNNCell_fn( - [cell_creator(is_last=i == n_layer - 1) for i in range(n_layer)], state_is_tuple=True - ) - self.bw_cell = MultiRNNCell_fn( - [cell_creator(is_last=i == n_layer - 1) for i in range(n_layer)], state_is_tuple=True - ) - except Exception: - self.fw_cell = MultiRNNCell_fn([cell_creator(is_last=i == n_layer - 1) for i in range(n_layer)]) - self.bw_cell = MultiRNNCell_fn([cell_creator(is_last=i == n_layer - 1) for i in range(n_layer)]) - else: - try: - self.fw_cell = MultiRNNCell_fn([cell_creator() for _ in range(n_layer)], state_is_tuple=True) - self.bw_cell = MultiRNNCell_fn([cell_creator() for _ in range(n_layer)], state_is_tuple=True) - except Exception: - self.fw_cell = MultiRNNCell_fn([cell_creator() for _ in range(n_layer)]) - self.bw_cell = MultiRNNCell_fn([cell_creator() for _ in range(n_layer)]) - - # Initial state of RNN - if fw_initial_state is None: - self.fw_initial_state = self.fw_cell.zero_state( - self.batch_size, dtype=LayersConfig.tf_dtype - ) # dtype=tf.float32) - else: - self.fw_initial_state = fw_initial_state - if bw_initial_state is None: - self.bw_initial_state = self.bw_cell.zero_state( - self.batch_size, dtype=LayersConfig.tf_dtype - ) # dtype=tf.float32) - else: - self.bw_initial_state = bw_initial_state - # exit() - # Feedforward to MultiRNNCell - list_rnn_inputs = tf.unstack(self.inputs, axis=1) - - bidirectional_rnn_fn = tf.contrib.rnn.static_bidirectional_rnn - - outputs, fw_state, bw_state = bidirectional_rnn_fn( # outputs, fw_state, bw_state = tf.contrib.rnn.static_bidirectional_rnn( - cell_fw=self.fw_cell, - cell_bw=self.bw_cell, - inputs=list_rnn_inputs, - initial_state_fw=self.fw_initial_state, - initial_state_bw=self.bw_initial_state - ) - - if return_last: - raise Exception("Do not support return_last at the moment.") - # self.outputs = outputs[-1] - else: - self.outputs = outputs - if return_seq_2d: - # 2D Tensor [n_example, n_hidden] - self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, n_hidden * 2]) - - else: - # : stack more RNN layer after that - # 3D Tensor [n_example/n_steps, n_steps, n_hidden] - - self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, n_steps, n_hidden * 2]) - - self.fw_final_state = fw_state - self.bw_final_state = bw_state - - # Retrieve just the RNN variables. - rnn_variables = tf.compat.v1.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) - - logging.info(" n_params : %d" % (len(rnn_variables))) - - self._add_layers(self.outputs) - self._add_params(rnn_variables) - - -class ConvRNNCell(object): - """Abstract object representing an Convolutional RNN Cell.""" - - def __call__(self, inputs, state, scope=None): - """Run this RNN cell on inputs, starting from the given state.""" - raise NotImplementedError("Abstract method") - - @property - def state_size(self): - """size(s) of state(s) used by this cell.""" - raise NotImplementedError("Abstract method") - - @property - def output_size(self): - """Integer or TensorShape: size of outputs produced by this cell.""" - raise NotImplementedError("Abstract method") - - def zero_state(self, batch_size): #, dtype=LayersConfig.tf_dtype): - """Return zero-filled state tensor(s). - Args: - batch_size: int, float, or unit Tensor representing the batch size. - Returns: - tensor of shape '[batch_size x shape[0] x shape[1] x num_features] - filled with zeros - - """ - dtype = LayersConfig.tf_dtype - shape = self.shape - num_features = self.num_features - # TODO : TypeError: 'NoneType' object is not subscriptable - zeros = tf.zeros([batch_size, shape[0], shape[1], num_features * 2], dtype=dtype) - return zeros - - -class BasicConvLSTMCell(ConvRNNCell): - """Basic Conv LSTM recurrent network cell. - - Parameters - ----------- - shape : tuple of int - The height and width of the cell. - filter_size : tuple of int - The height and width of the filter - num_features : int - The hidden size of the cell - forget_bias : float - The bias added to forget gates (see above). - input_size : int - Deprecated and unused. - state_is_tuple : boolen - If True, accepted and returned states are 2-tuples of the `c_state` and `m_state`. - If False, they are concatenated along the column axis. The latter behavior will soon be deprecated. - act : activation function - The activation function of this layer, tanh as default. - - """ - - def __init__( - self, shape, filter_size, num_features, forget_bias=1.0, input_size=None, state_is_tuple=False, - act=tf.nn.tanh - ): - """Initialize the basic Conv LSTM cell.""" - # if not state_is_tuple: - # logging.warn("%s: Using a concatenated state is slower and will soon be " - # "deprecated. Use state_is_tuple=True.", self) - if input_size is not None: - logging.warn("%s: The input_size parameter is deprecated.", self) - self.shape = shape - self.filter_size = filter_size - self.num_features = num_features - self._forget_bias = forget_bias - self._state_is_tuple = state_is_tuple - self._activation = act - - @property - def state_size(self): - """State size of the LSTMStateTuple.""" - return (LSTMStateTuple(self._num_units, self._num_units) if self._state_is_tuple else 2 * self._num_units) - - @property - def output_size(self): - """Number of units in outputs.""" - return self._num_units - - def __call__(self, inputs, state, scope=None): - """Long short-term memory cell (LSTM).""" - with tf.compat.v1.variable_scope(scope or type(self).__name__): # "BasicLSTMCell" - # Parameters of gates are concatenated into one multiply for efficiency. - if self._state_is_tuple: - c, h = state - else: - # print state - # c, h = tf.split(3, 2, state) - c, h = tf.split(state, 2, 3) - concat = _conv_linear([inputs, h], self.filter_size, self.num_features * 4, True) - - # i = input_gate, j = new_input, f = forget_gate, o = output_gate - # i, j, f, o = tf.split(3, 4, concat) - i, j, f, o = tf.split(concat, 4, 3) - - new_c = (c * tf.nn.sigmoid(f + self._forget_bias) + tf.nn.sigmoid(i) * self._activation(j)) - new_h = self._activation(new_c) * tf.nn.sigmoid(o) - - if self._state_is_tuple: - new_state = LSTMStateTuple(new_c, new_h) - else: - new_state = tf.concat([new_c, new_h], 3) - return new_h, new_state - - -def _conv_linear(args, filter_size, num_features, bias, bias_start=0.0, scope=None): - """convolution: - - Parameters - ---------- - args : tensor - 4D Tensor or a list of 4D, batch x n, Tensors. - filter_size : tuple of int - Filter height and width. - num_features : int - Nnumber of features. - bias_start : float - Starting value to initialize the bias; 0 by default. - scope : VariableScope - For the created subgraph; defaults to "Linear". - - Returns - -------- - - A 4D Tensor with shape [batch h w num_features] - - Raises - ------- - - ValueError : if some of the arguments has unspecified or wrong shape. - - """ - # Calculate the total size of arguments on dimension 1. - total_arg_size_depth = 0 - shapes = [a.get_shape().as_list() for a in args] - for shape in shapes: - if len(shape) != 4: - raise ValueError("Linear is expecting 4D arguments: %s" % str(shapes)) - if not shape[3]: - raise ValueError("Linear expects shape[4] of arguments: %s" % str(shapes)) - else: - total_arg_size_depth += shape[3] - - dtype = [a.dtype for a in args][0] - - # Now the computation. - with tf.compat.v1.variable_scope(scope or "Conv"): - matrix = tf.compat.v1.get_variable( - "Matrix", [filter_size[0], filter_size[1], total_arg_size_depth, num_features], dtype=dtype - ) - if len(args) == 1: - res = tf.nn.conv2d(args[0], matrix, strides=[1, 1, 1, 1], padding='SAME') - else: - res = tf.nn.conv2d(tf.concat(args, 3), matrix, strides=[1, 1, 1, 1], padding='SAME') - if not bias: - return res - bias_term = tf.compat.v1.get_variable( - "Bias", [num_features], dtype=dtype, - initializer=tf.compat.v1.initializers.constant(bias_start, dtype=dtype) - ) - return res + bias_term - - -class ConvLSTM(Layer): - """A fixed length Convolutional LSTM layer. - - See this `paper `__ . - - Parameters - ---------- - prev_layer : :class:`Layer` - Previous layer - cell_shape : tuple of int - The shape of each cell width * height - filter_size : tuple of int - The size of filter width * height - cell_fn : a convolutional RNN cell - Cell function like :class:`BasicConvLSTMCell` - feature_map : int - The number of feature map in the layer. - initializer : initializer - The initializer for initializing the parameters. - n_steps : int - The sequence length. - initial_state : None or ConvLSTM State - If None, `initial_state` is zero state. - return_last : boolean - Whether return last output or all outputs in each step. - - If True, return the last output, "Sequence input and single output". - - If False, return all outputs, "Synced sequence input and output". - - In other word, if you want to stack more RNNs on this layer, set to False. - return_seq_2d : boolean - Only consider this argument when `return_last` is `False` - - If True, return 2D Tensor [n_example, n_hidden], for stacking DenseLayer after it. - - If False, return 3D Tensor [n_example/n_steps, n_steps, n_hidden], for stacking multiple RNN after it. - name : str - A unique layer name. - - Attributes - ---------- - outputs : tensor - The output of this RNN. return_last = False, outputs = all cell_output, which is the hidden state. - cell_output.get_shape() = (?, h, w, c]) - - final_state : tensor or StateTuple - The finial state of this layer. - - When state_is_tuple = False, it is the final hidden and cell states, - - When state_is_tuple = True, You can get the final state after each iteration during training, then feed it to the initial state of next iteration. - - initial_state : tensor or StateTuple - It is the initial state of this ConvLSTM layer, you can use it to initialize - your state at the beginning of each epoch or iteration according to your - training procedure. - - batch_size : int or tensor - Is int, if able to compute the batch_size, otherwise, tensor for ``?``. - - """ - - @deprecated_alias(layer='prev_layer', end_support_version=1.9) # TODO remove this line for the 1.9 release - def __init__( - self, - prev_layer, - cell_shape=None, - feature_map=1, - filter_size=(3, 3), - cell_fn=BasicConvLSTMCell, - initializer=tf.compat.v1.initializers.random_uniform(-0.1, 0.1), - n_steps=5, - initial_state=None, - return_last=False, - return_seq_2d=False, - name='convlstm', - ): - super(ConvLSTM, self).__init__(prev_layer=prev_layer, name=name) - - logging.info( - "ConvLSTM %s: feature_map: %d, n_steps: %d, " - "in_dim: %d %s, cell_fn: %s " % - (self.name, feature_map, n_steps, self.inputs.get_shape().ndims, self.inputs.get_shape(), cell_fn.__name__) - ) - # You can get the dimension by .get_shape() or ._shape, and check the - # dimension by .with_rank() as follow. - # self.inputs.get_shape().with_rank(2) - # self.inputs.get_shape().with_rank(3) - - # Input dimension should be rank 5 [batch_size, n_steps(max), h, w, c] - try: - self.inputs.get_shape().with_rank(5) - except Exception: - raise Exception( - "RNN : Input dimension should be rank 5 : [batch_size, n_steps, input_x, " - "input_y, feature_map]" - ) - - fixed_batch_size = self.inputs.get_shape().with_rank_at_least(1)[0] - - if fixed_batch_size.value: - batch_size = fixed_batch_size.value - logging.info(" RNN batch_size (concurrent processes): %d" % batch_size) - - else: - batch_size = array_ops.shape(self.inputs)[0] - logging.info(" non specified batch_size, uses a tensor instead.") - self.batch_size = batch_size - outputs = [] - self.cell = cell = cell_fn(shape=cell_shape, filter_size=filter_size, num_features=feature_map) - - if initial_state is None: - self.initial_state = cell.zero_state(batch_size, dtype=LayersConfig.tf_dtype) - else: - self.initial_state = initial_state - - state = self.initial_state - - # with tf.variable_scope("model", reuse=None, initializer=initializer): - with tf.compat.v1.variable_scope(name, initializer=initializer) as vs: - for time_step in range(n_steps): - if time_step > 0: tf.compat.v1.get_variable_scope().reuse_variables() - (cell_output, state) = cell(self.inputs[:, time_step, :, :, :], state) - outputs.append(cell_output) - - # Retrieve just the RNN variables. - # rnn_variables = [v for v in tf.all_variables() if v.name.startswith(vs.name)] - rnn_variables = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.VARIABLES, scope=vs.name) - - logging.info(" n_params : %d" % (len(rnn_variables))) - - if return_last: - # 2D Tensor [batch_size, n_hidden] - self.outputs = outputs[-1] - else: - if return_seq_2d: - # PTB tutorial: stack dense layer after that, or compute the cost from the output - # 4D Tensor [n_example, h, w, c] - self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, cell_shape[0] * cell_shape[1] * feature_map]) - else: - # : stack more RNN layer after that - # 5D Tensor [n_example/n_steps, n_steps, h, w, c] - self.outputs = tf.reshape( - tf.concat(outputs, 1), [-1, n_steps, cell_shape[0], cell_shape[1], feature_map] - ) - - self.final_state = state - - self._add_layers(self.outputs) - self._add_params(rnn_variables) - - -# Advanced Ops for Dynamic RNN -def advanced_indexing_op(inputs, index): - """Advanced Indexing for Sequences, returns the outputs by given sequence lengths. - When return the last output :class:`DynamicRNN` uses it to get the last outputs with the sequence lengths. - - Parameters - ----------- - inputs : tensor for data - With shape of [batch_size, n_step(max), n_features] - index : tensor for indexing - Sequence length in Dynamic RNN. [batch_size] - - Examples - --------- - >>> import numpy as np - >>> import tensorflow as tf - >>> import tensorlayer as tl - >>> batch_size, max_length, n_features = 3, 5, 2 - >>> z = np.random.uniform(low=-1, high=1, size=[batch_size, max_length, n_features]).astype(np.float32) - >>> b_z = tf.constant(z) - >>> sl = tf.placeholder(dtype=tf.int32, shape=[batch_size]) - >>> o = advanced_indexing_op(b_z, sl) - >>> - >>> sess = tf.InteractiveSession() - >>> tl.layers.initialize_global_variables(sess) - >>> - >>> order = np.asarray([1,1,2]) - >>> print("real",z[0][order[0]-1], z[1][order[1]-1], z[2][order[2]-1]) - >>> y = sess.run([o], feed_dict={sl:order}) - >>> print("given",order) - >>> print("out", y) - real [-0.93021595 0.53820813] [-0.92548317 -0.77135968] [ 0.89952248 0.19149846] - given [1 1 2] - out [array([[-0.93021595, 0.53820813], - [-0.92548317, -0.77135968], - [ 0.89952248, 0.19149846]], dtype=float32)] - - References - ----------- - - Modified from TFlearn (the original code is used for fixed length rnn), `references `__. - - """ - batch_size = tf.shape(input=inputs)[0] - # max_length = int(inputs.get_shape()[1]) # for fixed length rnn, length is given - max_length = tf.shape(input=inputs)[1] # for dynamic_rnn, length is unknown - dim_size = int(inputs.get_shape()[2]) - index = tf.range(0, batch_size) * max_length + (index - 1) - flat = tf.reshape(inputs, [-1, dim_size]) - relevant = tf.gather(flat, index) - return relevant - - -def retrieve_seq_length_op(data): - """An op to compute the length of a sequence from input shape of [batch_size, n_step(max), n_features], - it can be used when the features of padding (on right hand side) are all zeros. - - Parameters - ----------- - data : tensor - [batch_size, n_step(max), n_features] with zero padding on right hand side. - - Examples - --------- - >>> data = [[[1],[2],[0],[0],[0]], - ... [[1],[2],[3],[0],[0]], - ... [[1],[2],[6],[1],[0]]] - >>> data = np.asarray(data) - >>> print(data.shape) - (3, 5, 1) - >>> data = tf.constant(data) - >>> sl = retrieve_seq_length_op(data) - >>> sess = tf.InteractiveSession() - >>> tl.layers.initialize_global_variables(sess) - >>> y = sl.eval() - [2 3 4] - - Multiple features - >>> data = [[[1,2],[2,2],[1,2],[1,2],[0,0]], - ... [[2,3],[2,4],[3,2],[0,0],[0,0]], - ... [[3,3],[2,2],[5,3],[1,2],[0,0]]] - >>> print(sl) - [4 3 4] - - References - ------------ - Borrow from `TFlearn `__. - - """ - with tf.name_scope('GetLength'): - used = tf.sign(tf.reduce_max(input_tensor=tf.abs(data), axis=2)) - length = tf.reduce_sum(input_tensor=used, axis=1) - - return tf.cast(length, tf.int32) - - -def retrieve_seq_length_op2(data): - """An op to compute the length of a sequence, from input shape of [batch_size, n_step(max)], - it can be used when the features of padding (on right hand side) are all zeros. - - Parameters - ----------- - data : tensor - [batch_size, n_step(max)] with zero padding on right hand side. - - Examples - -------- - >>> data = [[1,2,0,0,0], - ... [1,2,3,0,0], - ... [1,2,6,1,0]] - >>> o = retrieve_seq_length_op2(data) - >>> sess = tf.InteractiveSession() - >>> tl.layers.initialize_global_variables(sess) - >>> print(o.eval()) - [2 3 4] - - """ - return tf.reduce_sum(input_tensor=tf.cast(tf.greater(data, tf.zeros_like(data)), tf.int32), axis=1) - - -def retrieve_seq_length_op3(data, pad_val=0): # HangSheng: return tensor for sequence length, if input is tf.string - """Return tensor for sequence length, if input is ``tf.string``.""" - data_shape_size = data.get_shape().ndims - if data_shape_size == 3: - return tf.reduce_sum( - input_tensor=tf.cast(tf.reduce_any(input_tensor=tf.not_equal(data, pad_val), axis=2), dtype=tf.int32), - axis=1 - ) - elif data_shape_size == 2: - return tf.reduce_sum(input_tensor=tf.cast(tf.not_equal(data, pad_val), dtype=tf.int32), axis=1) - elif data_shape_size == 1: - raise ValueError("retrieve_seq_length_op3: data has wrong shape!") - else: - raise ValueError( - "retrieve_seq_length_op3: handling data_shape_size %s hasn't been implemented!" % (data_shape_size) - ) - - -def target_mask_op(data, pad_val=0): # HangSheng: return tensor for mask,if input is tf.string - """Return tensor for mask, if input is ``tf.string``.""" - data_shape_size = data.get_shape().ndims - if data_shape_size == 3: - return tf.cast(tf.reduce_any(input_tensor=tf.not_equal(data, pad_val), axis=2), dtype=tf.int32) - elif data_shape_size == 2: - return tf.cast(tf.not_equal(data, pad_val), dtype=tf.int32) - elif data_shape_size == 1: - raise ValueError("target_mask_op: data has wrong shape!") - else: - raise ValueError("target_mask_op: handling data_shape_size %s hasn't been implemented!" % (data_shape_size)) - - -class DynamicRNN(Layer): - """ - The :class:`DynamicRNN` class is a dynamic recurrent layer, see ``tf.nn.dynamic_rnn``. - - Parameters - ---------- - prev_layer : :class:`Layer` - Previous layer - cell_fn : TensorFlow cell function - A TensorFlow core RNN cell - - See `RNN Cells in TensorFlow `__ - - Note TF1.0+ and TF1.0- are different - cell_init_args : dictionary or None - The arguments for the cell function. - n_hidden : int - The number of hidden units in the layer. - initializer : initializer - The initializer for initializing the parameters. - sequence_length : tensor, array or None - The sequence length of each row of input data, see ``Advanced Ops for Dynamic RNN``. - - If None, it uses ``retrieve_seq_length_op`` to compute the sequence length, i.e. when the features of padding (on right hand side) are all zeros. - - If using word embedding, you may need to compute the sequence length from the ID array (the integer features before word embedding) by using ``retrieve_seq_length_op2`` or ``retrieve_seq_length_op``. - - You can also input an numpy array. - - More details about TensorFlow dynamic RNN in `Wild-ML Blog `__. - initial_state : None or RNN State - If None, `initial_state` is zero state. - dropout : tuple of float or int - The input and output keep probability (input_keep_prob, output_keep_prob). - - If one int, input and output keep probability are the same. - n_layer : int - The number of RNN layers, default is 1. - return_last : boolean or None - Whether return last output or all outputs in each step. - - If True, return the last output, "Sequence input and single output" - - If False, return all outputs, "Synced sequence input and output" - - In other word, if you want to stack more RNNs on this layer, set to False. - return_seq_2d : boolean - Only consider this argument when `return_last` is `False` - - If True, return 2D Tensor [n_example, n_hidden], for stacking DenseLayer after it. - - If False, return 3D Tensor [n_example/n_steps, n_steps, n_hidden], for stacking multiple RNN after it. - dynamic_rnn_init_args : dictionary - The arguments for ``tf.nn.dynamic_rnn``. - name : str - A unique layer name. - - Attributes - ------------ - outputs : tensor - The output of this layer. - - final_state : tensor or StateTuple - The finial state of this layer. - - When `state_is_tuple` is `False`, it is the final hidden and cell states, `states.get_shape() = [?, 2 * n_hidden]`. - - When `state_is_tuple` is `True`, it stores two elements: `(c, h)`. - - In practice, you can get the final state after each iteration during training, then feed it to the initial state of next iteration. - - initial_state : tensor or StateTuple - The initial state of this layer. - - In practice, you can set your state at the begining of each epoch or iteration according to your training procedure. - - batch_size : int or tensor - It is an integer, if it is able to compute the `batch_size`; otherwise, tensor for dynamic batch size. - - sequence_length : a tensor or array - The sequence lengths computed by Advanced Opt or the given sequence lengths, [batch_size] - - Notes - ----- - Input dimension should be rank 3 : [batch_size, n_steps(max), n_features], if no, please see :class:`ReshapeLayer`. - - Examples - -------- - Synced sequence input and output, for loss function see ``tl.cost.cross_entropy_seq_with_mask``. - - >>> input_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="input") - >>> net = tl.layers.EmbeddingInput( - ... inputs=input_seqs, - ... vocabulary_size=vocab_size, - ... embedding_size=embedding_size, - ... name='embedding') - >>> net = tl.layers.DynamicRNN(net, - ... cell_fn=tf.contrib.rnn.BasicLSTMCell, # for TF0.2 use tf.nn.rnn_cell.BasicLSTMCell, - ... n_hidden=embedding_size, - ... dropout=(0.7 if is_train else None), - ... sequence_length=tl.layers.retrieve_seq_length_op2(input_seqs), - ... return_last=False, # for encoder, set to True - ... return_seq_2d=True, # stack denselayer or compute cost after it - ... name='dynamicrnn') - >>> net = tl.layers.Dense(net, n_units=vocab_size, name="output") - - References - ---------- - - `Wild-ML Blog `__ - - `dynamic_rnn.ipynb `__ - - `tf.nn.dynamic_rnn `__ - - `tflearn rnn `__ - - ``tutorial_dynamic_rnn.py`` - - """ - - @deprecated_alias(layer='prev_layer', end_support_version=1.9) # TODO remove this line for the 1.9 release - def __init__( - self, - prev_layer, - cell_fn, #tf.nn.rnn_cell.LSTMCell, - cell_init_args=None, - n_hidden=256, - initializer=tf.compat.v1.initializers.random_uniform(-0.1, 0.1), - sequence_length=None, - initial_state=None, - dropout=None, - n_layer=1, - return_last=None, - return_seq_2d=False, - dynamic_rnn_init_args=None, - name='dyrnn', - ): - if cell_fn is None: - raise Exception("Please put in cell_fn") - - super(DynamicRNN, self).__init__( - prev_layer=prev_layer, cell_init_args=cell_init_args, dynamic_rnn_init_args=dynamic_rnn_init_args, name=name - ) - - if self.cell_init_args: - self.cell_init_args['state_is_tuple'] = True # 'use_peepholes': True - - if 'GRU' in cell_fn.__name__: - try: - self.cell_init_args.pop('state_is_tuple') - except Exception: - logging.warning("pop state_is_tuple fails.") - - if return_last is None: - return_last = True - - logging.info( - "DynamicRNNLayer %s: n_hidden: %d, in_dim: %d in_shape: %s cell_fn: %s dropout: %s n_layer: %d" % ( - self.name, n_hidden, self.inputs.get_shape().ndims, self.inputs.get_shape(), cell_fn.__name__, dropout, - n_layer - ) - ) - - # Input dimension should be rank 3 [batch_size, n_steps(max), n_features] - try: - self.inputs.get_shape().with_rank(3) - except Exception: - raise Exception("RNN : Input dimension should be rank 3 : [batch_size, n_steps(max), n_features]") - - # Get the batch_size - fixed_batch_size = self.inputs.get_shape().with_rank_at_least(1)[0] - if fixed_batch_size.value: - batch_size = fixed_batch_size.value - logging.info(" batch_size (concurrent processes): %d" % batch_size) - - else: - batch_size = array_ops.shape(self.inputs)[0] - logging.info(" non specified batch_size, uses a tensor instead.") - - self.batch_size = batch_size - - # Creats the cell function - # cell_instance_fn=lambda: cell_fn(num_units=n_hidden, **self.cell_init_args) # HanSheng - rnn_creator = lambda: cell_fn(num_units=n_hidden, **self.cell_init_args) - - # Apply dropout - if dropout: - if isinstance(dropout, (tuple, list)): - in_keep_prob = dropout[0] - out_keep_prob = dropout[1] - - elif isinstance(dropout, float): - in_keep_prob, out_keep_prob = dropout, dropout - - else: - raise Exception("Invalid dropout type (must be a 2-D tuple of " "float)") - - DropoutWrapper_fn = tf.contrib.rnn.DropoutWrapper - - # cell_instance_fn1=cell_instance_fn # HanSheng - # cell_instance_fn=DropoutWrapper_fn( - # cell_instance_fn1(), - # input_keep_prob=in_keep_prob, - # output_keep_prob=out_keep_prob) - cell_creator = lambda is_last=True: DropoutWrapper_fn( - rnn_creator(), input_keep_prob=in_keep_prob, output_keep_prob=out_keep_prob if is_last else 1.0 - ) - else: - cell_creator = rnn_creator - self.cell = cell_creator() - # Apply multiple layers - if n_layer > 1: - try: - MultiRNNCell_fn = tf.contrib.rnn.MultiRNNCell - except Exception: - MultiRNNCell_fn = tf.compat.v1.nn.rnn_cell.MultiRNNCell - - # cell_instance_fn2=cell_instance_fn # HanSheng - if dropout: - try: - # cell_instance_fn=lambda: MultiRNNCell_fn([cell_instance_fn2() for _ in range(n_layer)], state_is_tuple=True) # HanSheng - self.cell = MultiRNNCell_fn( - [cell_creator(is_last=i == n_layer - 1) for i in range(n_layer)], state_is_tuple=True - ) - except Exception: # when GRU - # cell_instance_fn=lambda: MultiRNNCell_fn([cell_instance_fn2() for _ in range(n_layer)]) # HanSheng - self.cell = MultiRNNCell_fn([cell_creator(is_last=i == n_layer - 1) for i in range(n_layer)]) - else: - try: - self.cell = MultiRNNCell_fn([cell_creator() for _ in range(n_layer)], state_is_tuple=True) - except Exception: # when GRU - self.cell = MultiRNNCell_fn([cell_creator() for _ in range(n_layer)]) - - # self.cell=cell_instance_fn() # HanSheng - - # Initialize initial_state - if initial_state is None: - self.initial_state = self.cell.zero_state(batch_size, dtype=LayersConfig.tf_dtype) # dtype=tf.float32) - else: - self.initial_state = initial_state - - # Computes sequence_length - if sequence_length is None: - - sequence_length = retrieve_seq_length_op( - self.inputs if isinstance(self.inputs, tf.Tensor) else tf.stack(self.inputs) - ) - - # Main - Computes outputs and last_states - with tf.compat.v1.variable_scope(name, initializer=initializer) as vs: - outputs, last_states = tf.compat.v1.nn.dynamic_rnn( - cell=self.cell, - # inputs=X - inputs=self.inputs, - # dtype=tf.float64, - sequence_length=sequence_length, - initial_state=self.initial_state, - **self.dynamic_rnn_init_args - ) - rnn_variables = tf.compat.v1.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) - - # logging.info(" n_params : %d" % (len(rnn_variables))) - # Manage the outputs - if return_last: - # [batch_size, n_hidden] - # outputs = tf.transpose(tf.pack(outputs), [1, 0, 2]) - self.outputs = advanced_indexing_op(outputs, sequence_length) - - else: - # [batch_size, n_step(max), n_hidden] - # self.outputs = result[0]["outputs"] - # self.outputs = outputs # it is 3d, but it is a list - if return_seq_2d: - # PTB tutorial: - # 2D Tensor [n_example, n_hidden] - self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, n_hidden]) - - else: - # : - # 3D Tensor [batch_size, n_steps(max), n_hidden] - max_length = tf.shape(input=outputs)[1] - batch_size = tf.shape(input=outputs)[0] - - self.outputs = tf.reshape(tf.concat(outputs, 1), [batch_size, max_length, n_hidden]) - # self.outputs = tf.reshape(tf.concat(1, outputs), [-1, max_length, n_hidden]) - - # Final state - self.final_state = last_states - - self.sequence_length = sequence_length - - self._add_layers(self.outputs) - self._add_params(rnn_variables) - - -class BiDynamicRNN(Layer): - """ - The :class:`BiDynamicRNN` class is a RNN layer, you can implement vanilla RNN, - LSTM and GRU with it. - - Parameters - ---------- - prev_layer : :class:`Layer` - Previous layer. - cell_fn : TensorFlow cell function - A TensorFlow core RNN cell - - See `RNN Cells in TensorFlow `__. - - Note TF1.0+ and TF1.0- are different. - cell_init_args : dictionary - The arguments for the cell initializer. - n_hidden : int - The number of hidden units in the layer. - initializer : initializer - The initializer for initializing the parameters. - sequence_length : tensor, array or None - The sequence length of each row of input data, see ``Advanced Ops for Dynamic RNN``. - - If None, it uses ``retrieve_seq_length_op`` to compute the sequence length, i.e. when the features of padding (on right hand side) are all zeros. - - If using word embedding, you may need to compute the sequence length from the ID array (the integer features before word embedding) by using ``retrieve_seq_length_op2`` or ``retrieve_seq_length_op``. - - You can also input an numpy array. - - More details about TensorFlow dynamic RNN in `Wild-ML Blog `__. - fw_initial_state : None or forward RNN State - If None, `initial_state` is zero state. - bw_initial_state : None or backward RNN State - If None, `initial_state` is zero state. - dropout : tuple of float or int - The input and output keep probability (input_keep_prob, output_keep_prob). - - If one int, input and output keep probability are the same. - n_layer : int - The number of RNN layers, default is 1. - return_last : boolean - Whether return last output or all outputs in each step. - - If True, return the last output, "Sequence input and single output" - - If False, return all outputs, "Synced sequence input and output" - - In other word, if you want to stack more RNNs on this layer, set to False. - return_seq_2d : boolean - Only consider this argument when `return_last` is `False` - - If True, return 2D Tensor [n_example, 2 * n_hidden], for stacking DenseLayer after it. - - If False, return 3D Tensor [n_example/n_steps, n_steps, 2 * n_hidden], for stacking multiple RNN after it. - dynamic_rnn_init_args : dictionary - The arguments for ``tf.nn.bidirectional_dynamic_rnn``. - name : str - A unique layer name. - - Attributes - ----------------------- - outputs : tensor - The output of this layer. (?, 2 * n_hidden) - - fw(bw)_final_state : tensor or StateTuple - The finial state of this layer. - - When `state_is_tuple` is `False`, it is the final hidden and cell states, `states.get_shape() = [?, 2 * n_hidden]`. - - When `state_is_tuple` is `True`, it stores two elements: `(c, h)`. - - In practice, you can get the final state after each iteration during training, then feed it to the initial state of next iteration. - - fw(bw)_initial_state : tensor or StateTuple - The initial state of this layer. - - In practice, you can set your state at the begining of each epoch or iteration according to your training procedure. - - batch_size : int or tensor - It is an integer, if it is able to compute the `batch_size`; otherwise, tensor for dynamic batch size. - - sequence_length : a tensor or array - The sequence lengths computed by Advanced Opt or the given sequence lengths, [batch_size]. - - Notes - ----- - Input dimension should be rank 3 : [batch_size, n_steps(max), n_features], if no, please see :class:`ReshapeLayer`. - - References - ---------- - - `Wild-ML Blog `__ - - `bidirectional_rnn.ipynb `__ - - """ - - @deprecated_alias(layer='prev_layer', end_support_version=1.9) # TODO remove this line for the 1.9 release - def __init__( - self, - prev_layer, - cell_fn, #tf.nn.rnn_cell.LSTMCell, - cell_init_args=None, - n_hidden=256, - initializer=tf.compat.v1.initializers.random_uniform(-0.1, 0.1), - sequence_length=None, - fw_initial_state=None, - bw_initial_state=None, - dropout=None, - n_layer=1, - return_last=False, - return_seq_2d=False, - dynamic_rnn_init_args=None, - name='bi_dyrnn', - ): - super(BiDynamicRNN, self).__init__( - prev_layer=prev_layer, cell_init_args=cell_init_args, dynamic_rnn_init_args=dynamic_rnn_init_args, name=name - ) - - if self.cell_init_args: - self.cell_init_args['state_is_tuple'] = True # 'use_peepholes': True, - - if 'GRU' in cell_fn.__name__: - try: - self.cell_init_args.pop('state_is_tuple') - except Exception: - logging.warning("pop state_is_tuple fails.") - - if cell_fn is None: - raise Exception("Please put in cell_fn") - - logging.info( - "BiDynamicRNNLayer %s: n_hidden: %d in_dim: %d in_shape: %s cell_fn: %s dropout: %s n_layer: %d" % ( - self.name, n_hidden, self.inputs.get_shape().ndims, self.inputs.get_shape(), cell_fn.__name__, dropout, - n_layer - ) - ) - - # Input dimension should be rank 3 [batch_size, n_steps(max), n_features] - try: - self.inputs.get_shape().with_rank(3) - except Exception: - raise Exception("RNN : Input dimension should be rank 3 : [batch_size, n_steps(max), n_features]") - - # Get the batch_size - fixed_batch_size = self.inputs.get_shape().with_rank_at_least(1)[0] - - if fixed_batch_size.value: - batch_size = fixed_batch_size.value - logging.info(" batch_size (concurrent processes): %d" % batch_size) - - else: - batch_size = array_ops.shape(self.inputs)[0] - logging.info(" non specified batch_size, uses a tensor instead.") - - self.batch_size = batch_size - - with tf.compat.v1.variable_scope(name, initializer=initializer) as vs: - # Creats the cell function - # cell_instance_fn=lambda: cell_fn(num_units=n_hidden, **self.cell_init_args) # HanSheng - rnn_creator = lambda: cell_fn(num_units=n_hidden, **self.cell_init_args) - - # Apply dropout - if dropout: - if isinstance(dropout, (tuple, list)): - in_keep_prob = dropout[0] - out_keep_prob = dropout[1] - elif isinstance(dropout, float): - in_keep_prob, out_keep_prob = dropout, dropout - else: - raise Exception("Invalid dropout type (must be a 2-D tuple of " "float)") - try: - DropoutWrapper_fn = tf.contrib.rnn.DropoutWrapper - except Exception: - DropoutWrapper_fn = tf.compat.v1.nn.rnn_cell.DropoutWrapper - - # cell_instance_fn1=cell_instance_fn # HanSheng - # cell_instance_fn=lambda: DropoutWrapper_fn( - # cell_instance_fn1(), - # input_keep_prob=in_keep_prob, - # output_keep_prob=out_keep_prob) - cell_creator = lambda is_last=True: DropoutWrapper_fn( - rnn_creator(), input_keep_prob=in_keep_prob, output_keep_prob=out_keep_prob if is_last else 1.0 - ) - else: - cell_creator = rnn_creator - - # if dropout: - # self.fw_cell = DropoutWrapper_fn(self.fw_cell, input_keep_prob=1.0, output_keep_prob=out_keep_prob) - # self.bw_cell = DropoutWrapper_fn(self.bw_cell, input_keep_prob=1.0, output_keep_prob=out_keep_prob) - - # self.fw_cell=cell_instance_fn() - # self.bw_cell=cell_instance_fn() - # Initial state of RNN - - self.fw_initial_state = fw_initial_state - self.bw_initial_state = bw_initial_state - # Computes sequence_length - if sequence_length is None: - - sequence_length = retrieve_seq_length_op( - self.inputs if isinstance(self.inputs, tf.Tensor) else tf.stack(self.inputs) - ) - - if n_layer > 1: - if dropout: - self.fw_cell = [cell_creator(is_last=i == n_layer - 1) for i in range(n_layer)] - self.bw_cell = [cell_creator(is_last=i == n_layer - 1) for i in range(n_layer)] - - else: - self.fw_cell = [cell_creator() for _ in range(n_layer)] - self.bw_cell = [cell_creator() for _ in range(n_layer)] - - outputs, states_fw, states_bw = stack_bidirectional_dynamic_rnn( - cells_fw=self.fw_cell, cells_bw=self.bw_cell, inputs=self.inputs, sequence_length=sequence_length, - initial_states_fw=self.fw_initial_state, initial_states_bw=self.bw_initial_state, - dtype=LayersConfig.tf_dtype, **self.dynamic_rnn_init_args - ) - - else: - self.fw_cell = cell_creator() - self.bw_cell = cell_creator() - outputs, (states_fw, states_bw) = tf.compat.v1.nn.bidirectional_dynamic_rnn( - cell_fw=self.fw_cell, cell_bw=self.bw_cell, inputs=self.inputs, sequence_length=sequence_length, - initial_state_fw=self.fw_initial_state, initial_state_bw=self.bw_initial_state, - dtype=LayersConfig.tf_dtype, **self.dynamic_rnn_init_args - ) - - rnn_variables = tf.compat.v1.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) - - logging.info(" n_params : %d" % (len(rnn_variables))) - - # Manage the outputs - outputs = tf.concat(outputs, 2) - - if return_last: - # [batch_size, 2 * n_hidden] - raise NotImplementedError("Return last is not implemented yet.") - # self.outputs = advanced_indexing_op(outputs, sequence_length) - else: - # [batch_size, n_step(max), 2 * n_hidden] - if return_seq_2d: - # PTB tutorial: - # 2D Tensor [n_example, 2 * n_hidden] - self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, 2 * n_hidden]) - - else: - # : - # 3D Tensor [batch_size, n_steps(max), 2 * n_hidden] - max_length = tf.shape(input=outputs)[1] - batch_size = tf.shape(input=outputs)[0] - - self.outputs = tf.reshape(tf.concat(outputs, 1), [batch_size, max_length, 2 * n_hidden]) - - # Final state - self.fw_final_states = states_fw - self.bw_final_states = states_bw - - self.sequence_length = sequence_length - - self._add_layers(self.outputs) - self._add_params(rnn_variables) - - -class Seq2Seq(Layer): - """ - The :class:`Seq2Seq` class is a simple :class:`DynamicRNNLayer` based Seq2seq layer without using `tl.contrib.seq2seq `__. - See `Model `__ - and `Sequence to Sequence Learning with Neural Networks `__. - - - Please check this example `Chatbot in 200 lines of code `__. - - The Author recommends users to read the source code of :class:`DynamicRNNLayer` and :class:`Seq2Seq`. - - Parameters - ---------- - net_encode_in : :class:`Layer` - Encode sequences, [batch_size, None, n_features]. - net_decode_in : :class:`Layer` - Decode sequences, [batch_size, None, n_features]. - cell_fn : TensorFlow cell function - A TensorFlow core RNN cell - - see `RNN Cells in TensorFlow `__ - - Note TF1.0+ and TF1.0- are different - cell_init_args : dictionary or None - The arguments for the cell initializer. - n_hidden : int - The number of hidden units in the layer. - initializer : initializer - The initializer for the parameters. - encode_sequence_length : tensor - For encoder sequence length, see :class:`DynamicRNNLayer` . - decode_sequence_length : tensor - For decoder sequence length, see :class:`DynamicRNNLayer` . - initial_state_encode : None or RNN state - If None, `initial_state_encode` is zero state, it can be set by placeholder or other RNN. - initial_state_decode : None or RNN state - If None, `initial_state_decode` is the final state of the RNN encoder, it can be set by placeholder or other RNN. - dropout : tuple of float or int - The input and output keep probability (input_keep_prob, output_keep_prob). - - If one int, input and output keep probability are the same. - n_layer : int - The number of RNN layers, default is 1. - return_seq_2d : boolean - Only consider this argument when `return_last` is `False` - - If True, return 2D Tensor [n_example, 2 * n_hidden], for stacking DenseLayer after it. - - If False, return 3D Tensor [n_example/n_steps, n_steps, 2 * n_hidden], for stacking multiple RNN after it. - name : str - A unique layer name. - - Attributes - ------------ - outputs : tensor - The output of RNN decoder. - initial_state_encode : tensor or StateTuple - Initial state of RNN encoder. - initial_state_decode : tensor or StateTuple - Initial state of RNN decoder. - final_state_encode : tensor or StateTuple - Final state of RNN encoder. - final_state_decode : tensor or StateTuple - Final state of RNN decoder. - - Notes - -------- - - How to feed data: `Sequence to Sequence Learning with Neural Networks `__ - - input_seqs : ``['how', 'are', 'you', '']`` - - decode_seqs : ``['', 'I', 'am', 'fine', '']`` - - target_seqs : ``['I', 'am', 'fine', '', '']`` - - target_mask : ``[1, 1, 1, 1, 0]`` - - related functions : tl.prepro - - Examples - ---------- - >>> from tensorlayer.layers import * - >>> batch_size = 32 - >>> encode_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="encode_seqs") - >>> decode_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="decode_seqs") - >>> target_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="target_seqs") - >>> target_mask = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="target_mask") # tl.prepro.sequences_get_mask() - >>> with tf.variable_scope("model"): - >>> # for chatbot, you can use the same embedding layer, - >>> # for translation, you may want to use 2 seperated embedding layers - >>> with tf.variable_scope("embedding") as vs: - >>> net_encode = EmbeddingInput( - ... inputs = encode_seqs, - ... vocabulary_size = 10000, - ... embedding_size = 200, - ... name = 'seq_embedding') - >>> vs.reuse_variables() - >>> net_decode = EmbeddingInput( - ... inputs = decode_seqs, - ... vocabulary_size = 10000, - ... embedding_size = 200, - ... name = 'seq_embedding') - >>> net = Seq2Seq(net_encode, net_decode, - ... cell_fn = tf.contrib.rnn.BasicLSTMCell, - ... n_hidden = 200, - ... initializer = tf.random_uniform_initializer(-0.1, 0.1), - ... encode_sequence_length = retrieve_seq_length_op2(encode_seqs), - ... decode_sequence_length = retrieve_seq_length_op2(decode_seqs), - ... initial_state_encode = None, - ... dropout = None, - ... n_layer = 1, - ... return_seq_2d = True, - ... name = 'seq2seq') - >>> net_out = Dense(net, n_units=10000, act=None, name='output') - >>> e_loss = tl.cost.cross_entropy_seq_with_mask(logits=net_out.outputs, target_seqs=target_seqs, input_mask=target_mask, return_details=False, name='cost') - >>> y = tf.nn.softmax(net_out.outputs) - >>> net_out.print_params(False) - - """ - - def __init__( - self, - net_encode_in, - net_decode_in, - cell_fn, #tf.nn.rnn_cell.LSTMCell, - cell_init_args=None, - n_hidden=256, - initializer=tf.compat.v1.initializers.random_uniform(-0.1, 0.1), - encode_sequence_length=None, - decode_sequence_length=None, - initial_state_encode=None, - initial_state_decode=None, - dropout=None, - n_layer=1, - return_seq_2d=False, - name='seq2seq', - ): - super(Seq2Seq, - self).__init__(prev_layer=[net_encode_in, net_decode_in], cell_init_args=cell_init_args, name=name) - - if self.cell_init_args: - self.cell_init_args['state_is_tuple'] = True # 'use_peepholes': True, - - if cell_fn is None: - raise ValueError("cell_fn cannot be set to None") - - if 'GRU' in cell_fn.__name__: - try: - cell_init_args.pop('state_is_tuple') - except Exception: - logging.warning("pop state_is_tuple fails.") - - logging.info( - "[*] Seq2Seq %s: n_hidden: %d cell_fn: %s dropout: %s n_layer: %d" % - (self.name, n_hidden, cell_fn.__name__, dropout, n_layer) - ) - - with tf.compat.v1.variable_scope(name): - # tl.layers.set_name_reuse(reuse) - # network = InputLayer(self.inputs, name=name+'/input') - network_encode = DynamicRNN( - net_encode_in, cell_fn=cell_fn, cell_init_args=self.cell_init_args, n_hidden=n_hidden, - initializer=initializer, initial_state=initial_state_encode, dropout=dropout, n_layer=n_layer, - sequence_length=encode_sequence_length, return_last=False, return_seq_2d=True, name='encode' - ) - # vs.reuse_variables() - # tl.layers.set_name_reuse(True) - network_decode = DynamicRNN( - net_decode_in, cell_fn=cell_fn, cell_init_args=self.cell_init_args, n_hidden=n_hidden, - initializer=initializer, - initial_state=(network_encode.final_state if initial_state_decode is None else initial_state_decode), - dropout=dropout, n_layer=n_layer, sequence_length=decode_sequence_length, return_last=False, - return_seq_2d=return_seq_2d, name='decode' - ) - self.outputs = network_decode.outputs - - # rnn_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) - - # Initial state - self.initial_state_encode = network_encode.initial_state - self.initial_state_decode = network_decode.initial_state - - # Final state - self.final_state_encode = network_encode.final_state - self.final_state_decode = network_decode.final_state - - # self.sequence_length = sequence_length - self._add_layers(network_encode.all_layers) - self._add_params(network_encode.all_params) - self._add_dropout_layers(network_encode.all_drop) - - self._add_layers(network_decode.all_layers) - self._add_params(network_decode.all_params) - self._add_dropout_layers(network_decode.all_drop) - - self._add_layers(self.outputs) diff --git a/tensorlayer/layers/scale.py b/tensorlayer/layers/scale.py deleted file mode 100644 index 593d217..0000000 --- a/tensorlayer/layers/scale.py +++ /dev/null @@ -1,48 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf - -from tensorlayer.layers.core import Layer - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -__all__ = [ - 'Scale', -] - - -class Scale(Layer): - """The :class:`Scale` class is for multipling a trainble scale value to the layer outputs. Usually be used on the output of binary net. - - Parameters - ---------- - init_scale : float - The initial value for the scale factor. - name : a str - A unique layer name. - - """ - - def __init__( - self, - init_scale=0.05, - name='scale', - ): - # super(Scale, self).__init__(prev_layer=prev_layer, name=name) - super().__init__(name) - self.init_scale = init_scale - logging.info("Scale %s: init_scale: %f" % (self.name, self.init_scale)) - - def build(self, inputs_shape): - self.scale = self._get_weights( - "scale", shape=[1], init=tf.compat.v1.initializers.constant(value=self.init_scale) - ) #, init_args=self.W_init_args) - # self.scale = tf.compat.v1.get_variable("scale", shape=[1], initializer=tf.compat.v1.initializers.constant(value=self.init_scale)) - # self.add_weights(self.scale) - - def forward(self, inputs): - outputs = inputs * self.scale - return outputs diff --git a/tensorlayer/layers/shape.py b/tensorlayer/layers/shape.py deleted file mode 100644 index eb33c74..0000000 --- a/tensorlayer/layers/shape.py +++ /dev/null @@ -1,147 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf - -from tensorlayer.layers.core import Layer -from tensorlayer.layers.utils import flatten_reshape - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -__all__ = [ - 'Flatten', - 'Reshape', - 'Transpose', -] - - -class Flatten(Layer): - """A layer that reshapes high-dimension input into a vector. - - Then we often apply Dense, RNN, Concat and etc on the top of a flatten layer. - [batch_size, mask_row, mask_col, n_mask] ---> [batch_size, mask_row * mask_col * n_mask] - - Parameters - ---------- - name : None or str - A unique layer name. - - Examples - -------- - >>> import tensorflow as tf - >>> import tensorlayer as tl - >>> x = tf.placeholder(tf.float32, shape=[None, 28, 28, 1]) - >>> net = tl.layers.Input(x, name='input') - >>> net = tl.layers.Flatten(net, name='flatten') - [?, 784] - - """ - - def __init__(self, name=None): #'flatten'): - # super(Flatten, self).__init__(prev_layer=prev_layer, name=name) - super().__init__(name) - - self.build() - self._built = True - - logging.info("Flatten %s:" % (self.name)) - - def __repr__(self): - s = '{classname}(' - if self.name is not None: - s += 'name=\'{name}\'' - s += ')' - return s.format(classname=self.__class__.__name__, **self.__dict__) - - def build(self, inputs_shape=None): - pass - - def forward(self, inputs): - outputs = flatten_reshape(inputs, name=self.name) - return outputs - - -class Reshape(Layer): - """A layer that reshapes a given tensor. - - Parameters - ---------- - shape : tuple of int - The output shape, see ``tf.reshape``. - name : str - A unique layer name. - - Examples - -------- - >>> import tensorflow as tf - >>> import tensorlayer as tl - >>> x = tf.placeholder(tf.float32, shape=(None, 784)) - >>> net = tl.layers.Input(x, name='input') - >>> net = tl.layers.Reshape(net, [-1, 28, 28, 1], name='reshape') - >>> print(net.outputs) - (?, 28, 28, 1) - - """ - - def __init__(self, shape, name=None): #'reshape'): - # super(Reshape, self).__init__(prev_layer=prev_layer, name=name) - super().__init__(name) - self.shape = shape - logging.info("Reshape %s" % (self.name)) - if not self.shape: - raise ValueError("Shape list can not be empty") - - self.build() - self._built = True - - def build(self, inputs_shape=None): - pass - - def forward(self, inputs): - outputs = tf.reshape(inputs, shape=self.shape, name=self.name) - return outputs - - -class Transpose(Layer): - """A layer that transposes the dimension of a tensor. - - See `tf.transpose() `__ . - - Parameters - ---------- - perm: list of int - The permutation of the dimensions, similar with ``numpy.transpose``. - name : str - A unique layer name. - - Examples - ---------- - >>> import tensorflow as tf - >>> import tensorlayer as tl - >>> x = tf.placeholder(tf.float32, shape=[None, 28, 28, 1]) - >>> net = tl.layers.Input(x, name='input') - >>> net = tl.layers.Transpose(net, perm=[0, 1, 3, 2], name='trans') - [None, 28, 1, 28] - - """ - - def __init__(self, perm, name=None): #'transpose'): - # super(Transpose, self).__init__(prev_layer=prev_layer, name=name) - super().__init__(name) - self.perm = perm - - logging.info("Transpose %s: perm: %s" % (self.name, self.perm)) - if self.perm is None: - raise AssertionError("The `perm` argument cannot be None") - - self.build() - self._built = None - - def build(self, inputs_shape=None): - pass - - def forward(self, inputs): - outputs = tf.transpose(a=inputs, perm=self.perm, name=self.name) - return outputs diff --git a/tensorlayer/layers/spatial_transformer.py b/tensorlayer/layers/spatial_transformer.py deleted file mode 100644 index 2e2c558..0000000 --- a/tensorlayer/layers/spatial_transformer.py +++ /dev/null @@ -1,313 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -from six.moves import xrange - -import numpy as np - -import tensorflow as tf -from tensorflow.python.ops import array_ops - -from tensorlayer.layers.core import Layer -# from tensorlayer.layers.core import LayersConfig -# from tensorlayer.layers.core import TF_GRAPHKEYS_VARIABLES - -from tensorlayer.layers.utils import flatten_reshape - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -__all__ = [ - 'transformer', - 'batch_transformer', - 'SpatialTransformer2dAffine', -] - - -def transformer(U, theta, out_size, name='SpatialTransformer2dAffine'): - """Spatial Transformer Layer for `2D Affine Transformation `__ - , see :class:`SpatialTransformer2dAffine` class. - - Parameters - ---------- - U : list of float - The output of a convolutional net should have the - shape [num_batch, height, width, num_channels]. - theta: float - The output of the localisation network should be [num_batch, 6], value range should be [0, 1] (via tanh). - out_size: tuple of int - The size of the output of the network (height, width) - name: str - Optional function name - - Returns - ------- - Tensor - The transformed tensor. - - References - ---------- - - `Spatial Transformer Networks `__ - - `TensorFlow/Models `__ - - Notes - ----- - To initialize the network to the identity transform init. - - >>> import tensorflow as tf - >>> # ``theta`` to - >>> identity = np.array([[1., 0., 0.], [0., 1., 0.]]) - >>> identity = identity.flatten() - >>> theta = tf.Variable(initial_value=identity) - - """ - - def _repeat(x, n_repeats): - with tf.compat.v1.variable_scope('_repeat'): - rep = tf.transpose(a=tf.expand_dims(tf.ones(shape=tf.stack([ - n_repeats, - ])), 1), perm=[1, 0]) - rep = tf.cast(rep, 'int32') - x = tf.matmul(tf.reshape(x, (-1, 1)), rep) - return tf.reshape(x, [-1]) - - def _interpolate(im, x, y, out_size): - with tf.compat.v1.variable_scope('_interpolate'): - # constants - num_batch = tf.shape(input=im)[0] - height = tf.shape(input=im)[1] - width = tf.shape(input=im)[2] - channels = tf.shape(input=im)[3] - - x = tf.cast(x, 'float32') - y = tf.cast(y, 'float32') - height_f = tf.cast(height, 'float32') - width_f = tf.cast(width, 'float32') - out_height = out_size[0] - out_width = out_size[1] - zero = tf.zeros([], dtype='int32') - max_y = tf.cast(tf.shape(input=im)[1] - 1, 'int32') - max_x = tf.cast(tf.shape(input=im)[2] - 1, 'int32') - - # scale indices from [-1, 1] to [0, width/height] - x = (x + 1.0) * (width_f) / 2.0 - y = (y + 1.0) * (height_f) / 2.0 - - # do sampling - x0 = tf.cast(tf.floor(x), 'int32') - x1 = x0 + 1 - y0 = tf.cast(tf.floor(y), 'int32') - y1 = y0 + 1 - - x0 = tf.clip_by_value(x0, zero, max_x) - x1 = tf.clip_by_value(x1, zero, max_x) - y0 = tf.clip_by_value(y0, zero, max_y) - y1 = tf.clip_by_value(y1, zero, max_y) - dim2 = width - dim1 = width * height - base = _repeat(tf.range(num_batch) * dim1, out_height * out_width) - base_y0 = base + y0 * dim2 - base_y1 = base + y1 * dim2 - idx_a = base_y0 + x0 - idx_b = base_y1 + x0 - idx_c = base_y0 + x1 - idx_d = base_y1 + x1 - - # use indices to lookup pixels in the flat image and restore - # channels dim - im_flat = tf.reshape(im, tf.stack([-1, channels])) - im_flat = tf.cast(im_flat, 'float32') - Ia = tf.gather(im_flat, idx_a) - Ib = tf.gather(im_flat, idx_b) - Ic = tf.gather(im_flat, idx_c) - Id = tf.gather(im_flat, idx_d) - - # and finally calculate interpolated values - x0_f = tf.cast(x0, 'float32') - x1_f = tf.cast(x1, 'float32') - y0_f = tf.cast(y0, 'float32') - y1_f = tf.cast(y1, 'float32') - wa = tf.expand_dims(((x1_f - x) * (y1_f - y)), 1) - wb = tf.expand_dims(((x1_f - x) * (y - y0_f)), 1) - wc = tf.expand_dims(((x - x0_f) * (y1_f - y)), 1) - wd = tf.expand_dims(((x - x0_f) * (y - y0_f)), 1) - output = tf.add_n([wa * Ia, wb * Ib, wc * Ic, wd * Id]) - return output - - def _meshgrid(height, width): - with tf.compat.v1.variable_scope('_meshgrid'): - # This should be equivalent to: - # x_t, y_t = np.meshgrid(np.linspace(-1, 1, width), - # np.linspace(-1, 1, height)) - # ones = np.ones(np.prod(x_t.shape)) - # grid = np.vstack([x_t.flatten(), y_t.flatten(), ones]) - x_t = tf.matmul( - tf.ones(shape=tf.stack([height, 1])), - tf.transpose(a=tf.expand_dims(tf.linspace(-1.0, 1.0, width), 1), perm=[1, 0]) - ) - y_t = tf.matmul(tf.expand_dims(tf.linspace(-1.0, 1.0, height), 1), tf.ones(shape=tf.stack([1, width]))) - - x_t_flat = tf.reshape(x_t, (1, -1)) - y_t_flat = tf.reshape(y_t, (1, -1)) - - ones = tf.ones_like(x_t_flat) - grid = tf.concat(axis=0, values=[x_t_flat, y_t_flat, ones]) - return grid - - def _transform(theta, input_dim, out_size): - with tf.compat.v1.variable_scope('_transform'): - num_batch = tf.shape(input=input_dim)[0] - num_channels = tf.shape(input=input_dim)[3] - theta = tf.reshape(theta, (-1, 2, 3)) - theta = tf.cast(theta, 'float32') - - # grid of (x_t, y_t, 1), eq (1) in ref [1] - out_height = out_size[0] - out_width = out_size[1] - grid = _meshgrid(out_height, out_width) - grid = tf.expand_dims(grid, 0) - grid = tf.reshape(grid, [-1]) - grid = tf.tile(grid, tf.stack([num_batch])) - grid = tf.reshape(grid, tf.stack([num_batch, 3, -1])) - - # Transform A x (x_t, y_t, 1)^T -> (x_s, y_s) - T_g = tf.matmul(theta, grid) - x_s = tf.slice(T_g, [0, 0, 0], [-1, 1, -1]) - y_s = tf.slice(T_g, [0, 1, 0], [-1, 1, -1]) - x_s_flat = tf.reshape(x_s, [-1]) - y_s_flat = tf.reshape(y_s, [-1]) - - input_transformed = _interpolate(input_dim, x_s_flat, y_s_flat, out_size) - - output = tf.reshape(input_transformed, tf.stack([num_batch, out_height, out_width, num_channels])) - return output - - with tf.compat.v1.variable_scope(name): - output = _transform(theta, U, out_size) - return output - - -def batch_transformer(U, thetas, out_size, name='BatchSpatialTransformer2dAffine'): - """Batch Spatial Transformer function for `2D Affine Transformation `__. - - Parameters - ---------- - U : list of float - tensor of inputs [batch, height, width, num_channels] - thetas : list of float - a set of transformations for each input [batch, num_transforms, 6] - out_size : list of int - the size of the output [out_height, out_width] - name : str - optional function name - - Returns - ------ - float - Tensor of size [batch * num_transforms, out_height, out_width, num_channels] - - """ - with tf.compat.v1.variable_scope(name): - num_batch, num_transforms = map(int, thetas.get_shape().as_list()[:2]) - indices = [[i] * num_transforms for i in xrange(num_batch)] - input_repeated = tf.gather(U, tf.reshape(indices, [-1])) - return transformer(input_repeated, thetas, out_size) - - -class SpatialTransformer2dAffine(Layer): - """The :class:`SpatialTransformer2dAffine` class is a 2D `Spatial Transformer Layer `__ for - `2D Affine Transformation `__. - - Parameters - ----------- - prev_layer : :class:`Layer` - Previous layer. - theta_layer : :class:`Layer` - The localisation network. - - We will use a :class:`Dense` to make the theta size to [batch, 6], value range to [0, 1] (via tanh). - out_size : tuple of int or None - The size of the output of the network (height, width), the feature maps will be resized by this. - name : str - A unique layer name. - - References - ----------- - - `Spatial Transformer Networks `__ - - `TensorFlow/Models `__ - - """ - - @deprecated_alias(layer='prev_layer', end_support_version=1.9) # TODO remove this line for the 1.9 release - def __init__( - self, - prev_layer, - theta_layer, - out_size=None, - name='spatial_trans_2d_affine', - ): - - super(SpatialTransformer2dAffine, self).__init__(prev_layer=[prev_layer, theta_layer], name=name) - - self.inputs = prev_layer.outputs # Do not remove - self.theta_layer = theta_layer - - if out_size is None: - out_size = [40, 40] - - logging.info( - "SpatialTransformer2dAffine %s: in_size: %s out_size: %s" % - (self.name, self.inputs.get_shape().as_list(), out_size) - ) - - with tf.compat.v1.variable_scope(name) as vs: - - # 1. make the localisation network to [batch, 6] via Flatten and Dense. - if self.theta_layer.outputs.get_shape().ndims > 2: - self.theta_layer.outputs = flatten_reshape(self.theta_layer.outputs, 'flatten') - - # 2. To initialize the network to the identity transform init. - # 2.1 W - n_in = int(self.theta_layer.outputs.get_shape()[-1]) - shape = (n_in, 6) - - W = tf.compat.v1.get_variable(name='W', initializer=tf.zeros(shape), dtype=LayersConfig.tf_dtype) - # 2.2 b - - identity = tf.constant(np.array([[1., 0, 0], [0, 1., 0]]).astype('float32').flatten()) - - b = tf.compat.v1.get_variable(name='b', initializer=identity, dtype=LayersConfig.tf_dtype) - # 2.3 transformation matrix - - self.theta = tf.nn.tanh(tf.matmul(self.theta_layer.outputs, W) + b) - # 3. Spatial Transformer Sampling - # 3.1 transformation - - self.outputs = transformer(self.inputs, self.theta, out_size=out_size) - - # 3.2 automatically set batch_size and channels - # e.g. [?, 40, 40, ?] --> [64, 40, 40, 1] or [64, 20, 20, 4]/ Hao Dong - # - fixed_batch_size = self.inputs.get_shape().with_rank_at_least(1)[0] - - if fixed_batch_size.value: - batch_size = fixed_batch_size.value - - else: - batch_size = array_ops.shape(self.inputs)[0] - - n_channels = self.inputs.get_shape().as_list()[-1] - # logging.info(self.outputs) - self.outputs = tf.reshape(self.outputs, shape=[batch_size, out_size[0], out_size[1], n_channels]) - # logging.info(self.outputs) - # exit() - # 4. Get all parameters - variables = tf.compat.v1.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) - - # # theta_layer - # self._add_layers(theta_layer.all_layers) - # self._add_params(theta_layer.all_params) - # self.all_drop.update(theta_layer.all_drop) - - self._add_layers(self.outputs) - self._add_params(variables) diff --git a/tensorlayer/layers/stack.py b/tensorlayer/layers/stack.py deleted file mode 100644 index 9ebdda2..0000000 --- a/tensorlayer/layers/stack.py +++ /dev/null @@ -1,93 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf - -from tensorlayer.layers.core import Layer - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated_alias - -__all__ = [ - 'Stack', - 'UnStack', -] - - -class Stack(Layer): - """ - The :class:`Stack` class is a layer for stacking a list of rank-R tensors into one rank-(R+1) tensor, see `tf.stack() `__. - - Parameters - ---------- - axis : int - Dimension along which to concatenate. - name : str - A unique layer name. - - Examples - --------- - >>> import tensorflow as tf - >>> import tensorlayer as tl - >>> x = tf.placeholder(tf.float32, shape=[None, 30]) - >>> net = tl.layers.Input(x, name='input') - >>> net1 = tl.layers.Dense(net, 10, name='dense1') - >>> net2 = tl.layers.Dense(net, 10, name='dense2') - >>> net3 = tl.layers.Dense(net, 10, name='dense3') - >>> net = tl.layers.Stack([net1, net2, net3], axis=1, name='stack') - (?, 3, 10) - - """ - - def __init__( - self, - axis=1, - name=None, #'stack', - ): - # super(Stack, self).__init__(prev_layer=layers, name=name) - super().__init__(name) - self.axis = axis - logging.info("Stack %s: axis: %d" % (self.name, self.axis)) - - def build(self, inputs_shape): - pass - - def forward(self, inputs): - outputs = tf.stack(inputs, axis=self.axis, name=self.name) - return outputs - - -class UnStack(Layer): - """ - The :class:`UnStack` class is a layer for unstacking the given dimension of a rank-R tensor into rank-(R-1) tensors., see `tf.unstack() `__. - - Parameters - ---------- - num : int or None - The length of the dimension axis. Automatically inferred if None (the default). - axis : int - Dimension along which axis to concatenate. - name : str - A unique layer name. - - Returns - ------- - list of :class:`Layer` - The list of layer objects unstacked from the input. - - """ - - def __init__(self, num=None, axis=0, name=None): #'unstack'): - # super(UnStack, self).__init__(prev_layer=prev_layer, name=name) - super().__init__(name) - self.num = num - self.axis = axis - logging.info("UnStack %s: num: %s axis: %d" % (self.name, self.num, self.axis)) - - def build(self, inputs_shape): - pass - - def forward(self, inputs): - outputs = tf.unstack(inputs, num=self.num, axis=self.axis, name=self.name) - return outputs diff --git a/tensorlayer/layers/utils.py b/tensorlayer/layers/utils.py deleted file mode 100644 index 4048105..0000000 --- a/tensorlayer/layers/utils.py +++ /dev/null @@ -1,442 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import numpy as np -import tensorflow as tf - -from tensorflow.python.ops.rnn_cell import LSTMStateTuple - -from tensorlayer import logging - -from tensorlayer.decorators import deprecated -from tensorlayer.decorators import deprecated_alias - -__all__ = [ - 'cabs', - 'compute_alpha', - 'flatten_reshape', - 'get_collection_trainable', - 'get_layers_with_name', - 'get_variables_with_name', - 'initialize_global_variables', - 'initialize_rnn_state', - 'list_remove_repeat', - 'merge_networks', - 'print_all_variables', - 'quantize', - 'quantize_active', - 'quantize_weight', - 'quantize_active_overflow', - 'quantize_weight_overflow', - 'set_name_reuse', - 'ternary_operation', -] - -########## Module Public Functions ########## - - -def cabs(x): - return tf.minimum(1.0, tf.abs(x), name='cabs') - - -def compute_alpha(x): - """Computing the scale parameter.""" - threshold = _compute_threshold(x) - alpha1_temp1 = tf.where(tf.greater(x, threshold), x, tf.zeros_like(x, tf.float32)) - alpha1_temp2 = tf.where(tf.less(x, -threshold), x, tf.zeros_like(x, tf.float32)) - alpha_array = tf.add(alpha1_temp1, alpha1_temp2, name=None) - alpha_array_abs = tf.abs(alpha_array) - alpha_array_abs1 = tf.where( - tf.greater(alpha_array_abs, 0), tf.ones_like(alpha_array_abs, tf.float32), - tf.zeros_like(alpha_array_abs, tf.float32) - ) - alpha_sum = tf.reduce_sum(input_tensor=alpha_array_abs) - n = tf.reduce_sum(input_tensor=alpha_array_abs1) - alpha = tf.compat.v1.div(alpha_sum, n) - return alpha - - -def flatten_reshape(variable, name='flatten'): - """Reshapes a high-dimension vector input. - - [batch_size, mask_row, mask_col, n_mask] ---> [batch_size, mask_row x mask_col x n_mask] - - Parameters - ---------- - variable : TensorFlow variable or tensor - The variable or tensor to be flatten. - name : str - A unique layer name. - - Returns - ------- - Tensor - Flatten Tensor - - Examples - -------- - >>> import tensorflow as tf - >>> import tensorlayer as tl - >>> x = tf.placeholder(tf.float32, [None, 128, 128, 3]) - >>> # Convolution Layer with 32 filters and a kernel size of 5 - >>> network = tf.layers.conv2d(x, 32, 5, activation=tf.nn.relu) - >>> # Max Pooling (down-sampling) with strides of 2 and kernel size of 2 - >>> network = tf.layers.max_pooling2d(network, 2, 2) - >>> print(network.get_shape()[:].as_list()) - >>> [None, 62, 62, 32] - >>> network = tl.layers.flatten_reshape(network) - >>> print(network.get_shape()[:].as_list()[1:]) - >>> [None, 123008] - """ - dim = 1 - for d in variable.get_shape()[1:].as_list(): - dim *= d - return tf.reshape(variable, shape=[-1, dim], name=name) - - -def get_collection_trainable(name=''): - variables = [] - for p in tf.compat.v1.trainable_variables(): - # print(p.name.rpartition('/')[0], self.name) - if p.name.rpartition('/')[0] == name: - variables.append(p) - return variables - - -@deprecated_alias(printable='verbose', end_support_version=1.9) # TODO remove this line for the 1.9 release -def get_layers_with_name(net, name="", verbose=False): - """Get a list of layers' output in a network by a given name scope. - - Parameters - ----------- - net : :class:`Layer` - The last layer of the network. - name : str - Get the layers' output that contain this name. - verbose : boolean - If True, print information of all the layers' output - - Returns - -------- - list of Tensor - A list of layers' output (TensorFlow tensor) - - Examples - --------- - >>> import tensorlayer as tl - >>> layers = tl.layers.get_layers_with_name(net, "CNN", True) - - """ - logging.info(" [*] geting layers with %s" % name) - - layers = [] - i = 0 - - for layer in net.all_layers: - # logging.info(type(layer.name)) - if name in layer.name: - layers.append(layer) - - if verbose: - logging.info(" got {:3}: {:15} {}".format(i, layer.name, str(layer.get_shape()))) - i = i + 1 - - return layers - - -def get_variable_with_initializer(scope_name, var_name, shape, init=tf.compat.v1.initializers.random_normal()): - # FIXME: documentation needed - # if tf.executing_eagerly(): - var_name = scope_name + "/" + var_name - # if init_args is not None and len(init_args) != 0: - # initial_value = init(**init_args)(shape=shape) - # else: - # initial_value = init()(shape=shape) - # var = tf.Variable(initial_value=initial_value, name=var_name) - # FIXME: not sure whether this is correct? - initial_value = init(shape=shape) - var = tf.Variable(initial_value=initial_value, name=var_name)#, **init_args) - - # else: - # with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE): - # var = tf.get_variable(name=var_name, initializer=tf.zeros(shape), trainable=train) - return var - - -@deprecated_alias(printable='verbose', end_support_version=1.9) # TODO remove this line for the 1.9 release -def get_variables_with_name(name=None, train_only=True, verbose=False): - """Get a list of TensorFlow variables by a given name scope. - - Parameters - ---------- - name : str - Get the variables that contain this name. - train_only : boolean - If Ture, only get the trainable variables. - verbose : boolean - If True, print the information of all variables. - - Returns - ------- - list of Tensor - A list of TensorFlow variables - - Examples - -------- - >>> import tensorlayer as tl - >>> dense_vars = tl.layers.get_variables_with_name('dense', True, True) - - """ - if name is None: - raise Exception("please input a name") - - logging.info(" [*] geting variables with %s" % name) - - # tvar = tf.trainable_variables() if train_only else tf.all_variables() - if train_only: - t_vars = tf.compat.v1.trainable_variables() - - else: - t_vars = tf.compat.v1.global_variables() - - d_vars = [var for var in t_vars if name in var.name] - - if verbose: - for idx, v in enumerate(d_vars): - logging.info(" got {:3}: {:15} {}".format(idx, v.name, str(v.get_shape()))) - - return d_vars - - -@deprecated( - date="2018-09-30", instructions="This API is deprecated in favor of `sess.run(tf.global_variables_initializer())`" -) -def initialize_global_variables(sess): - """Initialize the global variables of TensorFlow. - - Run ``sess.run(tf.global_variables_initializer())`` for TF 0.12+ or - ``sess.run(tf.initialize_all_variables())`` for TF 0.11. - - Parameters - ---------- - sess : Session - TensorFlow session. - - """ - if sess is None: - raise AssertionError('The session must be defined') - - sess.run(tf.compat.v1.global_variables_initializer()) - - -def initialize_rnn_state(state, feed_dict=None): - """Returns the initialized RNN state. - The inputs are `LSTMStateTuple` or `State` of `RNNCells`, and an optional `feed_dict`. - - Parameters - ---------- - state : RNN state. - The TensorFlow's RNN state. - feed_dict : dictionary - Initial RNN state; if None, returns zero state. - - Returns - ------- - RNN state - The TensorFlow's RNN state. - - """ - if isinstance(state, LSTMStateTuple): - c = state.c.eval(feed_dict=feed_dict) - h = state.h.eval(feed_dict=feed_dict) - return c, h - else: - new_state = state.eval(feed_dict=feed_dict) - return new_state - - -def list_remove_repeat(x): - """Remove the repeated items in a list, and return the processed list. - You may need it to create merged layer like Concat, Elementwise and etc. - - Parameters - ---------- - x : list - Input - - Returns - ------- - list - A list that after removing it's repeated items - - Examples - ------- - >>> l = [2, 3, 4, 2, 3] - >>> l = list_remove_repeat(l) - [2, 3, 4] - - """ - y = [] - for i in x: - if i not in y: - y.append(i) - - return y - - -def merge_networks(layers=None): - """Merge all parameters, layers and dropout probabilities to a :class:`Layer`. - The output of return network is the first network in the list. - - Parameters - ---------- - layers : list of :class:`Layer` - Merge all parameters, layers and dropout probabilities to the first layer in the list. - - Returns - -------- - :class:`Layer` - The network after merging all parameters, layers and dropout probabilities to the first network in the list. - - Examples - --------- - >>> import tensorlayer as tl - >>> n1 = ... - >>> n2 = ... - >>> n1 = tl.layers.merge_networks([n1, n2]) - - """ - if layers is None: - raise Exception("layers should be a list of TensorLayer's Layers.") - layer = layers[0] - - all_params = [] - all_layers = [] - all_drop = {} - - for l in layers: - all_params.extend(l.all_params) - all_layers.extend(l.all_layers) - all_drop.update(l.all_drop) - - layer.all_params = list(all_params) - layer.all_layers = list(all_layers) - layer.all_drop = dict(all_drop) - - layer.all_layers = list_remove_repeat(layer.all_layers) - layer.all_params = list_remove_repeat(layer.all_params) - - return layer - - -def print_all_variables(train_only=False): - """Print information of trainable or all variables, - without ``tl.layers.initialize_global_variables(sess)``. - - Parameters - ---------- - train_only : boolean - Whether print trainable variables only. - - If True, print the trainable variables. - - If False, print all variables. - - """ - # tvar = tf.trainable_variables() if train_only else tf.all_variables() - if train_only: - t_vars = tf.compat.v1.trainable_variables() - logging.info(" [*] printing trainable variables") - - else: - t_vars = tf.compat.v1.global_variables() - logging.info(" [*] printing global variables") - - for idx, v in enumerate(t_vars): - logging.info(" var {:3}: {:15} {}".format(idx, str(v.get_shape()), v.name)) - - -def quantize(x): - # ref: https://github.com/AngusG/tensorflow-xnor-bnn/blob/master/models/binary_net.py#L70 - # https://github.com/itayhubara/BinaryNet.tf/blob/master/nnUtils.py - with tf.compat.v1.get_default_graph().gradient_override_map({"Sign": "TL_Sign_QuantizeGrad"}): - return tf.sign(x) - - -def quantize_active(x, bitA): - if bitA == 32: - return x - return _quantize_dorefa(x, bitA) - - -def quantize_weight(x, bitW, force_quantization=False): - G = tf.compat.v1.get_default_graph() - if bitW == 32 and not force_quantization: - return x - if bitW == 1: # BWN - with G.gradient_override_map({"Sign": "Identity"}): - E = tf.stop_gradient(tf.reduce_mean(input_tensor=tf.abs(x))) - return tf.sign(x / E) * E - x = tf.clip_by_value(x * 0.5 + 0.5, 0.0, 1.0) # it seems as though most weights are within -1 to 1 region anyways - return 2 * _quantize_dorefa(x, bitW) - 1 - - -def quantize_active_overflow(x, bitA): - if bitA == 32: - return x - return _quantize_overflow(x, bitA) - - -def quantize_weight_overflow(x, bitW): - if bitW == 32: - return x - return _quantize_overflow(x, bitW) - - -@deprecated(date="2018-06-30", instructions="TensorLayer relies on TensorFlow to check name reusing") -def set_name_reuse(enable=True): - logging.warning('this method is DEPRECATED and has no effect, please remove it from your code.') - - -def ternary_operation(x): - """Ternary operation use threshold computed with weights.""" - g = tf.compat.v1.get_default_graph() - with g.gradient_override_map({"Sign": "Identity"}): - threshold = _compute_threshold(x) - x = tf.sign(tf.add(tf.sign(tf.add(x, threshold)), tf.sign(tf.add(x, -threshold)))) - return x - - -########## Module Private Functions ########## - - -@tf.RegisterGradient("TL_Sign_QuantizeGrad") -def _quantize_grad(op, grad): - """Clip and binarize tensor using the straight through estimator (STE) for the gradient.""" - return tf.clip_by_value(grad, -1, 1) - - -def _quantize_dorefa(x, k): - G = tf.compat.v1.get_default_graph() - n = float(2**k - 1) - with G.gradient_override_map({"Round": "Identity"}): - return tf.round(x * n) / n - - -def _quantize_overflow(x, k): - G = tf.compat.v1.get_default_graph() - n = float(2**k - 1) - max_value = tf.reduce_max(input_tensor=x) - min_value = tf.reduce_min(input_tensor=x) - with G.gradient_override_map({"Round": "Identity"}): - step = tf.stop_gradient((max_value - min_value) / n) - return tf.round((tf.maximum(tf.minimum(x, max_value), min_value) - min_value) / step) * step + min_value - - -def _compute_threshold(x): - """ - ref: https://github.com/XJTUWYD/TWN - Computing the threshold. - """ - x_sum = tf.reduce_sum(input_tensor=tf.abs(x), axis=None, keepdims=False, name=None) - threshold = tf.compat.v1.div(x_sum, tf.cast(tf.size(input=x), tf.float32), name=None) - threshold = tf.multiply(0.7, threshold, name=None) - return threshold diff --git a/tensorlayer/lazy_imports.py b/tensorlayer/lazy_imports.py deleted file mode 100644 index 4a079a4..0000000 --- a/tensorlayer/lazy_imports.py +++ /dev/null @@ -1,99 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- -"""This module provides lazy import functionality to improve the import -performance of nitime. For example, some parts of nitime leverage and import -matplotlib, which is quite a big package, yet most of the nitime code does not -depend on matplotlib. By lazily-loading a module, we defer the overhead of -importing it until the first time it is actually used, thereby speeding up -nitime imports. - -A generic :class:`LazyImport` class is implemented which takes the module name -as a parameter, and acts as a proxy for that module, importing it only when -the module is used, but effectively acting as the module in every other way -(including inside IPython with respect to introspection and tab completion) -with the *exception* of reload() - reloading a :class:`LazyImport` raises an -:class:`ImportError`. - -Commonly used nitime lazy imports are also defined in :mod:`nitime.lazy`, so -they can be reused throughout nitime. -""" -import os -import sys -import types - - -class LazyImport(types.ModuleType): - """ - This class takes the module name as a parameter, and acts as a proxy for - that module, importing it only when the module is used, but effectively - acting as the module in every other way (including inside IPython with - respect to introspection and tab completion) with the *exception* of - reload()- reloading a :class:`LazyImport` raises an :class:`ImportError`. - - >>> mlab = LazyImport('matplotlib.mlab') - - No import happens on the above line, until we do something like call an - ``mlab`` method or try to do tab completion or introspection on ``mlab`` - in IPython. - - >>> mlab - - - Now the :class:`LazyImport` will do an actual import, and call the dist - function of the imported module. - - >>> mlab.dist(1969,2011) - 42.0 - """ - - def __getattribute__(self, x): - # This method will be called only once, since we'll change - # self.__class__ to LoadedLazyImport, and __getattribute__ will point - # to module.__getattribute__ - - name = object.__getattribute__(self, '__name__') - __import__(name) - - # if name above is 'package.foo.bar', package is returned, the docs - # recommend that in order to get back the full thing, that we import - # and then lookup the full name is sys.modules, see: - # http://docs.python.org/library/functions.html#__import__ - - module = sys.modules[name] - - # Now that we've done the import, cutout the middleman and make self - # act as the imported module - - class LoadedLazyImport(types.ModuleType): - __getattribute__ = module.__getattribute__ - __repr__ = module.__repr__ - - object.__setattr__(self, '__class__', LoadedLazyImport) - - # The next line will make "reload(l)" a silent no-op - return module.__getattribute__(x) - - def __repr__(self): - return "" % object.__getattribute__(self, '__name__') - - -if 'READTHEDOCS' in os.environ: - lazy_doc = """ - WARNING: To get Sphinx documentation to build we disable - LazyImports, which makes Sphinx incorrectly report this - class as having a base class of object. In reality, - :class:`LazyImport`'s base class is - :class:`types.ModuleType`. - """ - - lazy_doc += LazyImport.__doc__ - - class LazyImport(object): - __doc__ = lazy_doc - - def __init__(self, x): - __import__(x) - self.module = sys.modules[x] - - def __getattr__(self, x): - return self.module.__getattribute__(x) diff --git a/tensorlayer/logging/__init__.py b/tensorlayer/logging/__init__.py deleted file mode 100644 index 274eef0..0000000 --- a/tensorlayer/logging/__init__.py +++ /dev/null @@ -1,34 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- -""" -TensorLayer provides rich layer implementations trailed for -various benchmarks and domain-specific problems. In addition, we also -support transparent access to native TensorFlow parameters. -For example, we provide not only layers for local response normalization, but also -layers that allow user to apply ``tf.nn.lrn`` on ``network.outputs``. -More functions can be found in `TensorFlow API `__. -""" - -from tensorlayer.lazy_imports import LazyImport - -from .tl_logging import * - -# Lazy Imports -contrib = LazyImport("tensorlayer.logging.contrib") - -__all__ = [ - # tl_logging - 'DEBUG', - 'debug', - 'ERROR', - 'error', - 'FATAL', - 'fatal', - 'INFO', - 'info', - 'WARN', - 'warn', - 'warning', - 'set_verbosity', - 'get_verbosity' -] diff --git a/tensorlayer/logging/contrib/__init__.py b/tensorlayer/logging/contrib/__init__.py deleted file mode 100644 index dfb2f18..0000000 --- a/tensorlayer/logging/contrib/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- -""" -TensorLayer provides rich layer implementations trailed for -various benchmarks and domain-specific problems. In addition, we also -support transparent access to native TensorFlow parameters. -For example, we provide not only layers for local response normalization, but also -layers that allow user to apply ``tf.nn.lrn`` on ``network.outputs``. -More functions can be found in `TensorFlow API `__. -""" - -from .hyperdash import * diff --git a/tensorlayer/logging/contrib/hyperdash.py b/tensorlayer/logging/contrib/hyperdash.py deleted file mode 100644 index 4ff3298..0000000 --- a/tensorlayer/logging/contrib/hyperdash.py +++ /dev/null @@ -1,64 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -from __future__ import absolute_import - -import tensorlayer as tl - -import hyperdash as hd - -__all__ = ["HyperDashHandler", "monitor", "Experiment", "IPythonMagicsWrapper"] - - -class HyperDashHandler(object): - apikey = None - - @classmethod - def reset_apikey(cls): - cls.apikey = None - - @classmethod - def set_apikey(cls, apikey): - cls.apikey = apikey - - @classmethod - def get_apikey(cls): - - if cls.apikey is None: - raise ValueError( - "Hyperdash API is not set.\n" - "You can obtain your API Key using: `hyperdash login --email` or `hyperdash login --github`\n" - "You should first call `HyperDashHandler.set_apikey('my_api_key')` in order to use `hyperdash`" - ) - - tl.logging.debug("Hyperdash API Key: %s" % cls.apikey) - - return cls.apikey - - @classmethod - def monitor(cls, model_name, api_key=None, capture_io=True): - - if api_key is not None: - cls.set_apikey(api_key) - - return hd.monitor(model_name, api_key_getter=cls.get_apikey, capture_io=capture_io) - - -class Experiment(hd.Experiment): - - def __init__( - self, - model_name, - api_key=None, - capture_io=True, - ): - - if api_key is not None: - HyperDashHandler.set_apikey(api_key) - - super(Experiment, - self).__init__(model_name=model_name, api_key_getter=HyperDashHandler.get_apikey, capture_io=capture_io) - - -monitor = HyperDashHandler.monitor -IPythonMagicsWrapper = hd.IPythonMagicsWrapper diff --git a/tensorlayer/logging/tl_logging.py b/tensorlayer/logging/tl_logging.py deleted file mode 100644 index 95c07ff..0000000 --- a/tensorlayer/logging/tl_logging.py +++ /dev/null @@ -1,269 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import os as _os -import sys as _sys -import time as _time - -import logging as _logging - -from logging import DEBUG -from logging import ERROR -from logging import FATAL -from logging import INFO -from logging import WARN - -import threading - -import six - -from tensorlayer.decorators import deprecated - -__all__ = [ - 'DEBUG', - 'debug', - 'ERROR', - 'error', - 'FATAL', - 'fatal', - 'INFO', - 'info', - 'WARN', - 'warning', - 'warn', # Deprecated - 'set_verbosity', - 'get_verbosity' -] - -# Don't use this directly. Use _get_logger() instead. -_logger = None -_logger_lock = threading.Lock() - -_level_names = { - FATAL: 'FATAL', - ERROR: 'ERROR', - WARN: 'WARN', - INFO: 'INFO', - DEBUG: 'DEBUG', -} - - -def _get_logger(): - global _logger - - # Use double-checked locking to avoid taking lock unnecessarily. - if _logger is not None: - return _logger - - _logger_lock.acquire() - - try: - if _logger: - return _logger - - # Scope the TensorFlow logger to not conflict with users' loggers. - logger = _logging.getLogger('tensorlayer') - - # Don't further configure the TensorFlow logger if the root logger is - # already configured. This prevents double logging in those cases. - if not _logging.getLogger().handlers: - # Determine whether we are in an interactive environment - # This is only defined in interactive shells. - if hasattr(_sys, "ps1"): - _interactive = True - else: - _interactive = _sys.flags.interactive - - # If we are in an interactive environment (like Jupyter), set loglevel - # to INFO and pipe the output to stdout. - if _interactive: - logger.setLevel(INFO) - _logging_target = _sys.stdout - else: - _logging_target = _sys.stderr - - # Add the output handler. - _handler = _logging.StreamHandler(_logging_target) - _handler.setFormatter(_logging.Formatter('[TL] %(message)s')) - logger.addHandler(_handler) - - _logger = logger - return _logger - - finally: - _logger_lock.release() - - -def log(level, msg, *args, **kwargs): - _get_logger().log(level, msg, *args, **kwargs) - - -def debug(msg, *args, **kwargs): - _get_logger().debug(msg, *args, **kwargs) - - -def info(msg, *args, **kwargs): - _get_logger().info(msg, *args, **kwargs) - - -def error(msg, *args, **kwargs): - _get_logger().error("ERROR: %s" % msg, *args, **kwargs) - - -def fatal(msg, *args, **kwargs): - _get_logger().fatal("FATAL: %s" % msg, *args, **kwargs) - - -@deprecated(date="2018-09-30", instructions="This API is deprecated. Please use as `tl.logging.warning`") -def warn(msg, *args, **kwargs): - warning(msg, *args, **kwargs) - - -def warning(msg, *args, **kwargs): - _get_logger().warning("WARNING: %s" % msg, *args, **kwargs) - - -# Mask to convert integer thread ids to unsigned quantities for logging -# purposes -_THREAD_ID_MASK = 2 * _sys.maxsize + 1 - -_log_prefix = None # later set to google2_log_prefix - -# Counter to keep track of number of log entries per token. -_log_counter_per_token = {} - - -def TaskLevelStatusMessage(msg): - error(msg) - - -def flush(): - raise NotImplementedError() - - -def vlog(level, msg, *args, **kwargs): - _get_logger().log(level, msg, *args, **kwargs) - - -def _GetNextLogCountPerToken(token): - """Wrapper for _log_counter_per_token. - - Args: - token: The token for which to look up the count. - - Returns: - The number of times this function has been called with - *token* as an argument (starting at 0) - """ - global _log_counter_per_token # pylint: disable=global-variable-not-assigned - _log_counter_per_token[token] = 1 + _log_counter_per_token.get(token, -1) - return _log_counter_per_token[token] - - -def log_every_n(level, msg, n, *args): - """Log 'msg % args' at level 'level' once per 'n' times. - - Logs the 1st call, (N+1)st call, (2N+1)st call, etc. - Not threadsafe. - - Args: - level: The level at which to log. - msg: The message to be logged. - n: The number of times this should be called before it is logged. - *args: The args to be substituted into the msg. - """ - count = _GetNextLogCountPerToken(_GetFileAndLine()) - log_if(level, msg, not (count % n), *args) - - -def log_first_n(level, msg, n, *args): # pylint: disable=g-bad-name - """Log 'msg % args' at level 'level' only first 'n' times. - - Not threadsafe. - - Args: - level: The level at which to log. - msg: The message to be logged. - n: The number of times this should be called before it is logged. - *args: The args to be substituted into the msg. - """ - count = _GetNextLogCountPerToken(_GetFileAndLine()) - log_if(level, msg, count < n, *args) - - -def log_if(level, msg, condition, *args): - """Log 'msg % args' at level 'level' only if condition is fulfilled.""" - if condition: - vlog(level, msg, *args) - - -def _GetFileAndLine(): - """Returns (filename, linenumber) for the stack frame.""" - # Use sys._getframe(). This avoids creating a traceback object. - # pylint: disable=protected-access - f = _sys._getframe() - # pylint: enable=protected-access - our_file = f.f_code.co_filename - f = f.f_back - while f: - code = f.f_code - if code.co_filename != our_file: - return (code.co_filename, f.f_lineno) - f = f.f_back - return ('', 0) - - -def google2_log_prefix(level, timestamp=None, file_and_line=None): - """Assemble a logline prefix using the google2 format.""" - # pylint: disable=global-variable-not-assigned - global _level_names - # pylint: enable=global-variable-not-assigned - - # Record current time - now = timestamp or _time.time() - now_tuple = _time.localtime(now) - now_microsecond = int(1e6 * (now % 1.0)) - - (filename, line) = file_and_line or _GetFileAndLine() - basename = _os.path.basename(filename) - - # Severity string - severity = 'I' - if level in _level_names: - severity = _level_names[level][0] - - s = '%c%02d%02d %02d: %02d: %02d.%06d %5d %s: %d] ' % ( - severity, - now_tuple[1], # month - now_tuple[2], # day - now_tuple[3], # hour - now_tuple[4], # min - now_tuple[5], # sec - now_microsecond, - _get_thread_id(), - basename, - line - ) - - return s - - -def get_verbosity(): - """Return how much logging output will be produced.""" - return _get_logger().getEffectiveLevel() - - -def set_verbosity(v): - """Sets the threshold for what messages will be logged.""" - _get_logger().setLevel(v) - - -def _get_thread_id(): - """Get id of current thread, suitable for logging as an unsigned quantity.""" - # pylint: disable=protected-access - thread_id = six.moves._thread.get_ident() - # pylint:enable=protected-access - return thread_id & _THREAD_ID_MASK - - -_log_prefix = google2_log_prefix diff --git a/tensorlayer/models/__init__.py b/tensorlayer/models/__init__.py deleted file mode 100644 index f5c0402..0000000 --- a/tensorlayer/models/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -# """A collections of pre-defined well known models.""" - -from .core import * -from .vgg16 import VGG16 -from .squeezenetv1 import SqueezeNetV1 -from .mobilenetv1 import MobileNetV1 -from .vgg19 import VGG19 -from .vgg import * diff --git a/tensorlayer/models/core.py b/tensorlayer/models/core.py deleted file mode 100644 index 41bba37..0000000 --- a/tensorlayer/models/core.py +++ /dev/null @@ -1,660 +0,0 @@ -import numpy as np -from abc import ABCMeta, abstractmethod -import tensorflow as tf -from tensorlayer.layers import Layer, ModelLayer -from tensorlayer import logging -from queue import Queue -from tensorlayer.files import utils -import os - -__all__ = [ - 'Model', -] - - -def _addindent(s_, numSpaces): - s = s_.split('\n') - # don't do anything for single-line stuff - if len(s) == 1: - return s_ - first = s.pop(0) - s = [(numSpaces * ' ') + line for line in s] - s = '\n'.join(s) - s = first + '\n' + s - return s - - -class Model(): - """The :class:`Model` class represents a neural network. - - Parameters - ----------- - inputs : a Layer or list of Layer - The input(s) to the model. - outputs : a Layer or list of Layer - The output(s) to the model. - name : None or str - The name of the model. - """ - - @property - def inputs(self): - return self._inputs - - @property - def outputs(self): - return self._outputs - - def __init__(self, inputs=None, outputs=None, name=None): - ''' - - :param inputs: Layer or list of Layer - :param outputs: Layer or list of Layer - :param name: str - ''' - # Model properties - # TODO: model auto naming - self.name = name - - # Model state: train or test - self.is_train = None - - # Model weights - self._weights = None - - # Model inputs and outputs - # TODO: note that in dynamic network, inputs and outputs are both None, may cause problem, test needed - self._inputs = inputs - self._outputs = outputs - - # Model converted into a Layer - self._model_layer = None - - if inputs is None and outputs is None: - pass - - else: - # check type of inputs and outputs - check_order = ['inputs', 'outputs'] - for co, check_argu in enumerate([inputs, outputs]): - if isinstance(check_argu, Layer): - pass - elif isinstance(check_argu, list): - if len(check_argu) == 0: - raise ValueError( - "The argument `%s` is detected as an empty list. " % check_order[co] + - "It should be either Layer or a list of Layer." - ) - for idx in range(len(check_argu)): - if not isinstance(check_argu[idx], Layer): - raise TypeError( - "The argument `%s` should be either Layer or a list of Layer " - % (check_order[co]) + - "but the %s[%d] is detected as %s" - % (check_order[co], idx, type(check_argu[idx])) - ) - else: - raise TypeError("The argument `%s` should be either Layer or a list of Layer but received %s" % - (check_order[co], type(check_argu))) - - # build network graph - self.layer_dict, edges, self.layer_by_depth = self._construct_graph() - - # automatically connecting layers - # outputs_list = self._outputs if isinstance(self._outputs, list) else [self._outputs] - # self._stacked_layers = list() - # - # for out in outputs_list: - # stacked_layers = list() - # current = out - # while current is not None: - # stacked_layers.append(current) - # # FIXME: assume each layer has only one prev layer - # current = current._input_layer - # - # if isinstance(self._inputs, list): - # # check if the input_layer is in self._inputs - # idx_of_input = self._find_idx_of_inputs(stacked_layers[-1]) - # flag_input_not_found = True if idx_of_input == -1 else False - # else: - # flag_input_not_found = True if self._inputs is not stacked_layers[-1] else False - # if flag_input_not_found: - # raise ValueError( - # "The layer named `%s` not found in the inputs of the model. " % stacked_layers[-1].name + - # "Please check the argument `inputs` when the model is created." - # ) - # - # self._stacked_layers.append(stacked_layers) - - def __call__(self, inputs, is_train=None, **kwargs): - """ - - :param inputs: Tensor or list of Tensor, numpy.ndarray of list of numpy.ndarray (if in eager mode) - :param is_train: boolean - :return: - """ - - self._check_mode(is_train) - - # set training / inference mode if necessary - if is_train is not None: - self._set_mode_for_layers(is_train) - - # if self._input is a list, then it must be a static network - if isinstance(self._inputs, list): - if not isinstance(inputs, list): - raise ValueError("The argument `inputs` should be a list of values but detected as %s." % type(inputs)) - elif len(inputs) != len(self._inputs): - raise ValueError("The argument `inputs` should be a list with len=%d but detected as len=%d." - % (len(self._inputs), len(inputs))) - - # convert inputs to tensor if it is originally not - # FIXME: not sure convert_to_tensor here or ask user to do it - if isinstance(inputs, list): - for idx in range(len(inputs)): - inputs[idx] = tf.convert_to_tensor(inputs[idx]) - else: - inputs = tf.convert_to_tensor(inputs) - - return self.forward(inputs, **kwargs) - - @abstractmethod - def forward(self, *inputs): - # FIXME: currently using self._outputs to judge static network or dynamic network - if self._outputs is None: - raise ValueError("Outputs not defined. Please define inputs and outputs when the model is created. Or overwrite forward() function.") - - # results = list() - # TODO: clear memory when necessary - memory = dict() - - # get each layer's output by going through the graph in depth order - for depth, layers in enumerate(self.layer_by_depth): - if depth == 0: - # the first level of layers should contains all Input layers - if isinstance(self._inputs, list): - assert len(inputs[0]) == len(layers) - for idx, layer in enumerate(layers): - memory[layer.name] = layer(inputs[0][idx]) - else: - memory[layers[0].name] = layers[0](inputs[0]) - else: - for layer in layers: - prev_layer = layer._input_layer - if isinstance(prev_layer, list): - layer_input = [memory[player.name] for player in prev_layer] - else: - layer_input = memory[prev_layer.name] - memory[layer.name] = layer(layer_input) - - # for stacked_layers in self._stacked_layers: - # # TODO: how to reuse model - # - # # idx_of_input should not be -1 as it has been checked in __init__ - # if isinstance(self._inputs, list): - # idx_of_input = self._find_idx_of_inputs(stacked_layers[-1]) - # z = inputs[0][idx_of_input] - # else: - # z = inputs[0] - # - # for layer in stacked_layers[::-1]: - # # if layer.name in memory: - # # z = memory[layer.name] - # # else: - # # FIXME: assume each layer has only one prev layer - # z = layer(z) - # # memory[layer.name] = z - # results.append(z) - - if not isinstance(self._outputs, list): - return memory[self._outputs.name].outputs - # return results[0] - else: - return [memory[layer.name].outputs for layer in self._outputs] - # return results - - @property - def weights(self): - if self._weights is not None and len(self._weights) > 0: - # self._weights already extracted, so do nothing - pass - # FIXME: currently using self._outputs to judge static network or dynamic network - elif self._outputs is not None: - # self._inputs and self._outputs are defined when self is created - self._weights = list() - for depth_layers in self.layer_by_depth: - for layer in depth_layers: - if layer.weights is not None: - self._weights.extend(layer.weights) - - # # TODO: weights order compatible with TL1.0 - # outputs_list = self._outputs if isinstance(self._outputs, list) else [self._outputs] - # for out in outputs_list: - # current = out - # while current is not None: - # if current.weights is not None: - # self._weights.extend(current.weights) - # # FIXME: assume each layer has only one prev layer - # current = current._input_layer - else: - # self._inputs and self._outputs are NOT defined when self is created (eager mode) - self._weights = list() - attr_list = [attr for attr in dir(self) if attr[:2] != "__"] - attr_list.remove("weights") - for idx, attr in enumerate(attr_list): - try: - if isinstance(getattr(self, attr), Layer): - nowlayer = getattr(self, attr) - if not nowlayer._built: - raise AttributeError( - "Layer %s not built yet." % repr(nowlayer) - ) - if (nowlayer.weights != None): - self._weights.extend(getattr(self, attr).weights) - # if isinstance(getattr(self, attr), list): - # nowlist = getattr(self, attr) - # for itms in nowlist: - # if isinstance(itms, Layer): - # if (itms.weights == None): - # continue - # self._weights.extend(itms.weights) - except Exception: - pass - - return self._weights - - def train(self): - if self.is_train != True: - self.is_train = True - self._set_mode_for_layers(True) - - def eval(self): - if self.is_train != False: - self.is_train = False - self._set_mode_for_layers(False) - - def test(self): - self.eval() - - def infer(self): - self.eval() - - def as_layer(self): - - if self._outputs is None: - raise AttributeError( - "Dynamic network cannot be converted to Layer." - ) - - if self._model_layer is None: - self._model_layer = ModelLayer(self) - - return self._model_layer - - def _check_mode(self, is_train): - # contradiction test - if is_train is None and self.is_train is None: - raise ValueError("Training / inference mode not defined. Argument `is_train` should be set as True / False. Otherwise please use `Model.train()` / `Model.eval()` to switch the mode.") - elif is_train is not None and self.is_train is not None: - if is_train == self.is_train: - logging.warning("Training / inference mode redefined redundantly. Please EITHER use the argument `is_train` OR `Model.train()` / `Model.eval()` to define the mode.") - else: - raise AttributeError("Training / inference mode mismatch. The argument `is_train` is set as %s, " % is_train + - "but the mode is currently set as %s. " % ('Training by Model.train()' if self.is_train else 'Inference by Model.eval()') + - "Please EITHER use the argument `is_train` OR `Model.train()` / `Model.eval()` to define the mode.") - - def _set_mode_for_layers(self, is_train): - # FIXME: currently using self._outputs to judge static network or dynamic network - if self._outputs is not None: - for depth_layers in self.layer_by_depth: - for layer in depth_layers: - layer._set_mode_for_layers(is_train) - else: - attr_list = [attr for attr in dir(self) if attr[:2] != "__"] - attr_list.remove("weights") - for idx, attr in enumerate(attr_list): - try: - if isinstance(getattr(self, attr), Layer): - getattr(self, attr)._set_mode_for_layers(is_train) - except Exception: - pass - - def _find_idx_of_inputs(self, target_input): - """ - Return the index of the target_input in self._inputs. - Return -1 if not found. - - :param target_input: the input layer needs to be located - :return: - """ - if isinstance(self._inputs, list): - for idx, input in enumerate(self._inputs): - if input is target_input: - return idx - return -1 - - def __repr__(self): - tmpstr = self.__class__.__name__ + '(\n' - attr_list = [attr for attr in dir(self) if attr[:2] != "__"] - attr_list.remove("weights") - attr_list.remove("_set_mode_for_layers") - attr_list.remove("release_memory") - attr_list.remove("_inputs") - attr_list.remove("_outputs") - for idx, attr in enumerate(attr_list): - try: - if isinstance(getattr(self, attr), Layer) or isinstance(getattr(self, attr), Model): - nowlayer = getattr(self, attr) - modstr = nowlayer.__repr__() - modstr = _addindent(modstr, 2) - tmpstr = tmpstr + ' (' + attr + '): ' + modstr + '\n' - elif isinstance(getattr(self, attr), list) and (isinstance(getattr(self, attr)[0], Layer) or - isinstance(getattr(self, attr)[0], Model)): - for idx, element in enumerate(getattr(self, attr)): - modstr = element.__repr__() - modstr = _addindent(modstr, 2) - tmpstr = tmpstr + ' (' + attr + '[%d]): ' % idx + modstr + '\n' - - except Exception: - pass - tmpstr = tmpstr + ')' - return tmpstr - - # def __str__(self): - # return " {} ({}) outputs_shape: {}".format( - # self.__class__.__name__, self.name, [tuple(['batch_size'] + o._outputs_shape[1:]) for o in self.outputs] - # ) #_outputs_shape)#outputs.get_shape().as_list()) - - def print_all_layers(self): - nowoutputs = self._outputs - if (isinstance(nowoutputs, list) == False): - nowoutputs = [nowoutputs] - for out in nowoutputs: - stacked_layers = list() - current = out - while current is not None: - print(current.name, current == self._inputs) - stacked_layers.append(current) - current = current._input_layer - pass - - ## raise Exceptions for old version codes - def count_params(self, **kwargs): - raise Exception("please change count_params --> count_weights") - - def print_params(self, **kwargs): - raise Exception("please change print_params --> print_weights") - - @property - def all_params(self): - raise Exception("please change all_params --> weights") - - @property - def all_drop(self): - raise Exception("all_drop is deprecated") - - def _construct_graph(self): - layer_dict = {} # {'layer_name': layer, ...} - edges = {} # {'father_layer_name': 'child_layer_name', ...} - layer_by_depth = [] # [ [layer0, layer1, ...], [layer2, layer3, ...], ... ] - - queue_layer = Queue() - indegrees = {} - - # BFS to visit all layers - outputs_list = self.outputs if isinstance(self.outputs, list) else [self.outputs] - for output_layer in outputs_list: - queue_layer.put(output_layer) - - while not queue_layer.empty(): - cur_layer = queue_layer.get() - - prev_layers = cur_layer._input_layer - - if not cur_layer.name in layer_dict.keys(): - layer_dict[cur_layer.name] = cur_layer - - if prev_layers is None: - # find input layer - indegrees[cur_layer.name] = 0 - continue - - if not isinstance(prev_layers, list): - prev_layers = [prev_layers] - - indegrees[cur_layer.name] = len(prev_layers) - - for layer in prev_layers: - if layer.name not in edges.keys(): - queue_layer.put(layer) - edges[layer.name] = [cur_layer.name] - else: - edges[layer.name].append(cur_layer.name) - - cur_depth = [] - next_depth = [] - - # find input layers, whose indegrees should be zero - # TODO : judge whether they are input layers - for k, v in indegrees.items(): - if v == 0: - cur_depth.append(layer_dict[k]) - - # top-sort style to decide each layer's depth - while not len(cur_depth) == 0: - layer_by_depth.append(cur_depth) - for layer in cur_depth: - if layer.name in edges.keys(): - for child_layer_name in edges[layer.name]: - indegrees[child_layer_name] -= 1 - if indegrees[child_layer_name] == 0: - next_depth.append(layer_dict[child_layer_name]) - - cur_depth = next_depth - next_depth = [] - - return layer_dict, edges, layer_by_depth - - def release_memory(self): - ''' - WARNING: This function should be called with great caution. - - Release objects that MAY NOT be necessary such as layer.outputs (if in a tf.GradientTape() scope). - For each layer in the model, layer.inputs and layer.outputs will be set as None but not deleted. - - A void function. - ''' - - if self._outputs is not None: - for depth_layers in self.layer_by_depth: - for layer in depth_layers: - layer._release_memory() - else: - attr_list = [attr for attr in dir(self) if attr[:2] != "__"] - attr_list.remove("release_memory") - for idx, attr in enumerate(attr_list): - try: - if isinstance(getattr(self, attr), Layer): - getattr(self, attr)._release_memory() - except Exception: - pass - - def save_weights(self, filepath, sess=None, format='hdf5'): - # TODO: Documentation pending - """Input filepath and the session(optional), save model weights into a file of given format. - Use self.load_weights() to restore. - - Parameters - ---------- - filepath : str - Filename to which the model weights will be saved. - sess : None or a tensorflow session - In eager mode, this should be left as None. In graph mode, must specify it with a tensorflow session. - format : Save file format - Value should be 'hdf5', 'npz', 'npz_dict' or 'ckpt'. Other format is not supported now. - 'hdf5' will save model weights name in a list and each layer has its weights stored in a group of - the hdf5 file. - 'npz' will save model weights sequentially into a npz file. - 'npz_dict' will save model weights along with its name as a dict into a npz file. - 'ckpt' will save model weights into a tensorflow ckpt file. - - Examples - -------- - 1) Save model to hdf5 in eager mode - >>> net = tl.models.vgg.vgg16() - >>> net.save_weights('./model.h5') - - 2) Save model to npz in graph mode - >>> sess = tf.Session() - >>> sess.run(tf.global_variables_initializer()) - >>> net.save_weights('./model.npz', sess=sess, format='npz') - - Returns - ------- - - """ - if self.weights is None: - logging.warning("Model contains no weights or layers haven't been built, nothing will be saved") - return - - if format == 'hdf5': - utils.save_weights_to_hdf5(filepath, self.weights, sess) - elif format == 'npz': - utils.save_npz(self.weights, filepath, sess) - elif format == 'npz_dict': - utils.save_npz_dict(self.weights, filepath, sess) - elif format == 'ckpt': - # TODO: enable this when tf save ckpt is enabled - raise NotImplementedError("ckpt load/save is not supported now.") - else: - raise ValueError("Save format must be 'hdf5', 'npz', 'npz_dict' or 'ckpt'." - "Other format is not supported now.") - - def load_weights(self, filepath, sess=None, format='hdf5', in_order=True, skip=False): - # TODO: Documentation pending - """Load model weights from a given file, which should be previously saved by self.save_weights(). - - Parameters - ---------- - filepath : str - Filename from which the model weights will be loaded. - sess : None or a tensorflow session - In eager mode, this should be left as None. In graph mode, must specify it with a tensorflow session. - Default is 'None'. - format : Loaded file format - Value should be 'hdf5', 'npz', 'npz_dict' or 'ckpt'. Other format is not supported now. - In addition, it should be the same format when you saved the file using self.save_weights(). - Default is 'hdf5'. - in_order : bool - Allow loading weights into model in a sequential way or by name. Only useful when 'format' is 'hdf5'. - If 'in_order' is True, weights from the file will be loaded into model in a sequential way. - If 'in_order' is False, weights from the file will be loaded into model by matching the name - with the weights of the model, particularly useful when trying to restore model in eager(graph) mode from - a weights file which is saved in graph(eager) mode. - Default is True. - skip : bool - Allow skipping weights whose name is mismatched between the file and model. Only useful when 'format' is - 'hdf5' or 'npz_dict'. If 'skip' is True, 'in_order' argument will be ignored and those loaded weights - whose name is not found in model weights (self.weights) will be skipped. If 'skip' is False, error will - occur when mismatch is found. - Default is False. - - Examples - -------- - 1) load model from a hdf5 file in eager mode. - >>> net = tl.models.vgg.vgg16() - >>> net.load_weights('./model_graph.h5', in_order=False, skip=True) # load weights by name, skipping mismatch - >>> net.load_weights('./model_eager.h5') # load sequentially - - 2) load model from a npz file in graph mode - >>> sess = tf.Session() - >>> sess.run(tf.global_variables_initializer()) - >>> net.load_weights('./model.npz', sess=sess, format='npz') - - Notes - ------- - 1) 'in_order' is only useful when 'format' is 'hdf5'. If you are trying to load a weights file which is - saved in a different mode, it is recommended to set 'in_order' be True. - 2) 'skip' is useful when 'format' is 'hdf5' or 'npz_dict'. If 'skip' is True, - 'in_order' argument will be ignored. - - Returns - ------- - - """ - if not os.path.exists(filepath): - raise FileNotFoundError("file {} doesn't exist.".format(filepath)) - - if format == 'hdf5': - if skip == True or in_order == False: - # load by weights name - utils.load_hdf5_to_weights(filepath, self.weights, sess, skip) - else: - # load in order - utils.load_hdf5_to_weights_in_order(filepath, self.weights, sess) - elif format == 'npz': - utils.load_and_assign_npz(sess, filepath, self) - elif format == 'npz_dict': - utils.load_and_assign_npz_dict(sess, filepath, self, skip) - elif format == 'ckpt': - # TODO: enable this when tf save ckpt is enabled - raise NotImplementedError("ckpt load/save is not supported now.") - else: - raise ValueError("File format must be 'hdf5', 'npz', 'npz_dict' or 'ckpt'. " - "Other format is not supported now.") - - def save_ckpt(self, sess=None, mode_name='model.ckpt', save_dir='checkpoint', global_step=None, printable=False): - # TODO: Documentation pending - """""" - if not os.path.exists(save_dir): - raise FileNotFoundError("Save directory {} doesn't exist.".format(save_dir)) - utils.save_ckpt(sess, mode_name, save_dir, self.weights, global_step, printable) - - def load_ckpt(self, sess=None, mode_name='model.ckpt', save_dir='checkpoint', is_latest=True, printable=False): - # TODO: Documentation pending - """""" - utils.load_ckpt(sess, mode_name, save_dir, self.weights, is_latest, printable) - - -if __name__ == '__main__': - import tensorlayer as tl - from tensorlayer.layers import Input, Conv2d, BatchNorm, MaxPool2d, Flatten, Dense, LocalResponseNorm, Concat - from tensorlayer.models import Model - from tensorlayer.layers import LayerList, Dropout - - # def get_model(inputs_shape): - # ni = Input(inputs_shape) - # nn1 = Dense(384, act=tf.nn.relu, name='dense1relu')(ni) - # nn2 = Dense(192, act=tf.nn.relu, name='dense2relu')(ni) - # nn2 = Dense(64, act=tf.nn.relu, name='dense3relu')(nn2) - # nn = Concat(name='concat')([nn1, nn2]) - # - # M = Model(inputs=ni, outputs=nn, name='cnn') - # return M - def get_model(inputs_shape): - ni = Input(inputs_shape) - nn = Dropout(keep=0.8)(ni) - nn = Dense(n_units=800, act=tf.nn.relu)(nn) - nn = Dropout(keep=0.8)(nn) - nn = Dense(n_units=800, act=tf.nn.relu)(nn) - - # FIXME: currently assume the inputs and outputs are both Layer. They can be lists. - M_hidden = Model(inputs=ni, outputs=nn, name="mlp_hidden") - - nn = Dropout(keep=0.8)(M_hidden.as_layer()) - nn = Dense(n_units=10, act=tf.nn.relu)(nn) - return Model(inputs=ni, outputs=nn, name="mlp") - - net = get_model((None, 784)) - for x in net.weights: - print(x) - # for i, l in enumerate(net.layer_by_depth): - # print(i, l) - # - # x = tf.placeholder(tf.float32, shape=[None, 784], name='inputs') - # y_ = tf.placeholder(tf.int64, shape=[None], name='targets') - # - # ## get output tensors for training and testing - # # 1) use ``is_train'' - # y1 = net(x, is_train=True).outputs - # ce = tl.cost.cross_entropy(y1, y_, name='cost') \ No newline at end of file diff --git a/tensorlayer/models/imagenet_class_index.json b/tensorlayer/models/imagenet_class_index.json deleted file mode 100644 index 5fe0dfe..0000000 --- a/tensorlayer/models/imagenet_class_index.json +++ /dev/null @@ -1 +0,0 @@ -{"0": ["n01440764", "tench"], "1": ["n01443537", "goldfish"], "2": ["n01484850", "great_white_shark"], "3": ["n01491361", "tiger_shark"], "4": ["n01494475", "hammerhead"], "5": ["n01496331", "electric_ray"], "6": ["n01498041", "stingray"], "7": ["n01514668", "cock"], "8": ["n01514859", "hen"], "9": ["n01518878", "ostrich"], "10": ["n01530575", "brambling"], "11": ["n01531178", "goldfinch"], "12": ["n01532829", "house_finch"], "13": ["n01534433", "junco"], "14": ["n01537544", "indigo_bunting"], "15": ["n01558993", "robin"], "16": ["n01560419", "bulbul"], "17": ["n01580077", "jay"], "18": ["n01582220", "magpie"], "19": ["n01592084", "chickadee"], "20": ["n01601694", "water_ouzel"], "21": ["n01608432", "kite"], "22": ["n01614925", "bald_eagle"], "23": ["n01616318", "vulture"], "24": ["n01622779", "great_grey_owl"], "25": ["n01629819", "European_fire_salamander"], "26": ["n01630670", "common_newt"], "27": ["n01631663", "eft"], "28": ["n01632458", "spotted_salamander"], "29": ["n01632777", "axolotl"], "30": ["n01641577", "bullfrog"], "31": ["n01644373", "tree_frog"], "32": ["n01644900", "tailed_frog"], "33": ["n01664065", "loggerhead"], "34": ["n01665541", "leatherback_turtle"], "35": ["n01667114", "mud_turtle"], "36": ["n01667778", "terrapin"], "37": ["n01669191", "box_turtle"], "38": ["n01675722", "banded_gecko"], "39": ["n01677366", "common_iguana"], "40": ["n01682714", "American_chameleon"], "41": ["n01685808", "whiptail"], "42": ["n01687978", "agama"], "43": ["n01688243", "frilled_lizard"], "44": ["n01689811", "alligator_lizard"], "45": ["n01692333", "Gila_monster"], "46": ["n01693334", "green_lizard"], "47": ["n01694178", "African_chameleon"], "48": ["n01695060", "Komodo_dragon"], "49": ["n01697457", "African_crocodile"], "50": ["n01698640", "American_alligator"], "51": ["n01704323", "triceratops"], "52": ["n01728572", "thunder_snake"], "53": ["n01728920", "ringneck_snake"], "54": ["n01729322", "hognose_snake"], "55": ["n01729977", "green_snake"], "56": ["n01734418", "king_snake"], "57": ["n01735189", "garter_snake"], "58": ["n01737021", "water_snake"], "59": ["n01739381", "vine_snake"], "60": ["n01740131", "night_snake"], "61": ["n01742172", "boa_constrictor"], "62": ["n01744401", "rock_python"], "63": ["n01748264", "Indian_cobra"], "64": ["n01749939", "green_mamba"], "65": ["n01751748", "sea_snake"], "66": ["n01753488", "horned_viper"], "67": ["n01755581", "diamondback"], "68": ["n01756291", "sidewinder"], "69": ["n01768244", "trilobite"], "70": ["n01770081", "harvestman"], "71": ["n01770393", "scorpion"], "72": ["n01773157", "black_and_gold_garden_spider"], "73": ["n01773549", "barn_spider"], "74": ["n01773797", "garden_spider"], "75": ["n01774384", "black_widow"], "76": ["n01774750", "tarantula"], "77": ["n01775062", "wolf_spider"], "78": ["n01776313", "tick"], "79": ["n01784675", "centipede"], "80": ["n01795545", "black_grouse"], "81": ["n01796340", "ptarmigan"], "82": ["n01797886", "ruffed_grouse"], "83": ["n01798484", "prairie_chicken"], "84": ["n01806143", "peacock"], "85": ["n01806567", "quail"], "86": ["n01807496", "partridge"], "87": ["n01817953", "African_grey"], "88": ["n01818515", "macaw"], "89": ["n01819313", "sulphur-crested_cockatoo"], "90": ["n01820546", "lorikeet"], "91": ["n01824575", "coucal"], "92": ["n01828970", "bee_eater"], "93": ["n01829413", "hornbill"], "94": ["n01833805", "hummingbird"], "95": ["n01843065", "jacamar"], "96": ["n01843383", "toucan"], "97": ["n01847000", "drake"], "98": ["n01855032", "red-breasted_merganser"], "99": ["n01855672", "goose"], "100": ["n01860187", "black_swan"], "101": ["n01871265", "tusker"], "102": ["n01872401", "echidna"], "103": ["n01873310", "platypus"], "104": ["n01877812", "wallaby"], "105": ["n01882714", "koala"], "106": ["n01883070", "wombat"], "107": ["n01910747", "jellyfish"], "108": ["n01914609", "sea_anemone"], "109": ["n01917289", "brain_coral"], "110": ["n01924916", "flatworm"], "111": ["n01930112", "nematode"], "112": ["n01943899", "conch"], "113": ["n01944390", "snail"], "114": ["n01945685", "slug"], "115": ["n01950731", "sea_slug"], "116": ["n01955084", "chiton"], "117": ["n01968897", "chambered_nautilus"], "118": ["n01978287", "Dungeness_crab"], "119": ["n01978455", "rock_crab"], "120": ["n01980166", "fiddler_crab"], "121": ["n01981276", "king_crab"], "122": ["n01983481", "American_lobster"], "123": ["n01984695", "spiny_lobster"], "124": ["n01985128", "crayfish"], "125": ["n01986214", "hermit_crab"], "126": ["n01990800", "isopod"], "127": ["n02002556", "white_stork"], "128": ["n02002724", "black_stork"], "129": ["n02006656", "spoonbill"], "130": ["n02007558", "flamingo"], "131": ["n02009229", "little_blue_heron"], "132": ["n02009912", "American_egret"], "133": ["n02011460", "bittern"], "134": ["n02012849", "crane"], "135": ["n02013706", "limpkin"], "136": ["n02017213", "European_gallinule"], "137": ["n02018207", "American_coot"], "138": ["n02018795", "bustard"], "139": ["n02025239", "ruddy_turnstone"], "140": ["n02027492", "red-backed_sandpiper"], "141": ["n02028035", "redshank"], "142": ["n02033041", "dowitcher"], "143": ["n02037110", "oystercatcher"], "144": ["n02051845", "pelican"], "145": ["n02056570", "king_penguin"], "146": ["n02058221", "albatross"], "147": ["n02066245", "grey_whale"], "148": ["n02071294", "killer_whale"], "149": ["n02074367", "dugong"], "150": ["n02077923", "sea_lion"], "151": ["n02085620", "Chihuahua"], "152": ["n02085782", "Japanese_spaniel"], "153": ["n02085936", "Maltese_dog"], "154": ["n02086079", "Pekinese"], "155": ["n02086240", "Shih-Tzu"], "156": ["n02086646", "Blenheim_spaniel"], "157": ["n02086910", "papillon"], "158": ["n02087046", "toy_terrier"], "159": ["n02087394", "Rhodesian_ridgeback"], "160": ["n02088094", "Afghan_hound"], "161": ["n02088238", "basset"], "162": ["n02088364", "beagle"], "163": ["n02088466", "bloodhound"], "164": ["n02088632", "bluetick"], "165": ["n02089078", "black-and-tan_coonhound"], "166": ["n02089867", "Walker_hound"], "167": ["n02089973", "English_foxhound"], "168": ["n02090379", "redbone"], "169": ["n02090622", "borzoi"], "170": ["n02090721", "Irish_wolfhound"], "171": ["n02091032", "Italian_greyhound"], "172": ["n02091134", "whippet"], "173": ["n02091244", "Ibizan_hound"], "174": ["n02091467", "Norwegian_elkhound"], "175": ["n02091635", "otterhound"], "176": ["n02091831", "Saluki"], "177": ["n02092002", "Scottish_deerhound"], "178": ["n02092339", "Weimaraner"], "179": ["n02093256", "Staffordshire_bullterrier"], "180": ["n02093428", "American_Staffordshire_terrier"], "181": ["n02093647", "Bedlington_terrier"], "182": ["n02093754", "Border_terrier"], "183": ["n02093859", "Kerry_blue_terrier"], "184": ["n02093991", "Irish_terrier"], "185": ["n02094114", "Norfolk_terrier"], "186": ["n02094258", "Norwich_terrier"], "187": ["n02094433", "Yorkshire_terrier"], "188": ["n02095314", "wire-haired_fox_terrier"], "189": ["n02095570", "Lakeland_terrier"], "190": ["n02095889", "Sealyham_terrier"], "191": ["n02096051", "Airedale"], "192": ["n02096177", "cairn"], "193": ["n02096294", "Australian_terrier"], "194": ["n02096437", "Dandie_Dinmont"], "195": ["n02096585", "Boston_bull"], "196": ["n02097047", "miniature_schnauzer"], "197": ["n02097130", "giant_schnauzer"], "198": ["n02097209", "standard_schnauzer"], "199": ["n02097298", "Scotch_terrier"], "200": ["n02097474", "Tibetan_terrier"], "201": ["n02097658", "silky_terrier"], "202": ["n02098105", "soft-coated_wheaten_terrier"], "203": ["n02098286", "West_Highland_white_terrier"], "204": ["n02098413", "Lhasa"], "205": ["n02099267", "flat-coated_retriever"], "206": ["n02099429", "curly-coated_retriever"], "207": ["n02099601", "golden_retriever"], "208": ["n02099712", "Labrador_retriever"], "209": ["n02099849", "Chesapeake_Bay_retriever"], "210": ["n02100236", "German_short-haired_pointer"], "211": ["n02100583", "vizsla"], "212": ["n02100735", "English_setter"], "213": ["n02100877", "Irish_setter"], "214": ["n02101006", "Gordon_setter"], "215": ["n02101388", "Brittany_spaniel"], "216": ["n02101556", "clumber"], "217": ["n02102040", "English_springer"], "218": ["n02102177", "Welsh_springer_spaniel"], "219": ["n02102318", "cocker_spaniel"], "220": ["n02102480", "Sussex_spaniel"], "221": ["n02102973", "Irish_water_spaniel"], "222": ["n02104029", "kuvasz"], "223": ["n02104365", "schipperke"], "224": ["n02105056", "groenendael"], "225": ["n02105162", "malinois"], "226": ["n02105251", "briard"], "227": ["n02105412", "kelpie"], "228": ["n02105505", "komondor"], "229": ["n02105641", "Old_English_sheepdog"], "230": ["n02105855", "Shetland_sheepdog"], "231": ["n02106030", "collie"], "232": ["n02106166", "Border_collie"], "233": ["n02106382", "Bouvier_des_Flandres"], "234": ["n02106550", "Rottweiler"], "235": ["n02106662", "German_shepherd"], "236": ["n02107142", "Doberman"], "237": ["n02107312", "miniature_pinscher"], "238": ["n02107574", "Greater_Swiss_Mountain_dog"], "239": ["n02107683", "Bernese_mountain_dog"], "240": ["n02107908", "Appenzeller"], "241": ["n02108000", "EntleBucher"], "242": ["n02108089", "boxer"], "243": ["n02108422", "bull_mastiff"], "244": ["n02108551", "Tibetan_mastiff"], "245": ["n02108915", "French_bulldog"], "246": ["n02109047", "Great_Dane"], "247": ["n02109525", "Saint_Bernard"], "248": ["n02109961", "Eskimo_dog"], "249": ["n02110063", "malamute"], "250": ["n02110185", "Siberian_husky"], "251": ["n02110341", "dalmatian"], "252": ["n02110627", "affenpinscher"], "253": ["n02110806", "basenji"], "254": ["n02110958", "pug"], "255": ["n02111129", "Leonberg"], "256": ["n02111277", "Newfoundland"], "257": ["n02111500", "Great_Pyrenees"], "258": ["n02111889", "Samoyed"], "259": ["n02112018", "Pomeranian"], "260": ["n02112137", "chow"], "261": ["n02112350", "keeshond"], "262": ["n02112706", "Brabancon_griffon"], "263": ["n02113023", "Pembroke"], "264": ["n02113186", "Cardigan"], "265": ["n02113624", "toy_poodle"], "266": ["n02113712", "miniature_poodle"], "267": ["n02113799", "standard_poodle"], "268": ["n02113978", "Mexican_hairless"], "269": ["n02114367", "timber_wolf"], "270": ["n02114548", "white_wolf"], "271": ["n02114712", "red_wolf"], "272": ["n02114855", "coyote"], "273": ["n02115641", "dingo"], "274": ["n02115913", "dhole"], "275": ["n02116738", "African_hunting_dog"], "276": ["n02117135", "hyena"], "277": ["n02119022", "red_fox"], "278": ["n02119789", "kit_fox"], "279": ["n02120079", "Arctic_fox"], "280": ["n02120505", "grey_fox"], "281": ["n02123045", "tabby"], "282": ["n02123159", "tiger_cat"], "283": ["n02123394", "Persian_cat"], "284": ["n02123597", "Siamese_cat"], "285": ["n02124075", "Egyptian_cat"], "286": ["n02125311", "cougar"], "287": ["n02127052", "lynx"], "288": ["n02128385", "leopard"], "289": ["n02128757", "snow_leopard"], "290": ["n02128925", "jaguar"], "291": ["n02129165", "lion"], "292": ["n02129604", "tiger"], "293": ["n02130308", "cheetah"], "294": ["n02132136", "brown_bear"], "295": ["n02133161", "American_black_bear"], "296": ["n02134084", "ice_bear"], "297": ["n02134418", "sloth_bear"], "298": ["n02137549", "mongoose"], "299": ["n02138441", "meerkat"], "300": ["n02165105", "tiger_beetle"], "301": ["n02165456", "ladybug"], "302": ["n02167151", "ground_beetle"], "303": ["n02168699", "long-horned_beetle"], "304": ["n02169497", "leaf_beetle"], "305": ["n02172182", "dung_beetle"], "306": ["n02174001", "rhinoceros_beetle"], "307": ["n02177972", "weevil"], "308": ["n02190166", "fly"], "309": ["n02206856", "bee"], "310": ["n02219486", "ant"], "311": ["n02226429", "grasshopper"], "312": ["n02229544", "cricket"], "313": ["n02231487", "walking_stick"], "314": ["n02233338", "cockroach"], "315": ["n02236044", "mantis"], "316": ["n02256656", "cicada"], "317": ["n02259212", "leafhopper"], "318": ["n02264363", "lacewing"], "319": ["n02268443", "dragonfly"], "320": ["n02268853", "damselfly"], "321": ["n02276258", "admiral"], "322": ["n02277742", "ringlet"], "323": ["n02279972", "monarch"], "324": ["n02280649", "cabbage_butterfly"], "325": ["n02281406", "sulphur_butterfly"], "326": ["n02281787", "lycaenid"], "327": ["n02317335", "starfish"], "328": ["n02319095", "sea_urchin"], "329": ["n02321529", "sea_cucumber"], "330": ["n02325366", "wood_rabbit"], "331": ["n02326432", "hare"], "332": ["n02328150", "Angora"], "333": ["n02342885", "hamster"], "334": ["n02346627", "porcupine"], "335": ["n02356798", "fox_squirrel"], "336": ["n02361337", "marmot"], "337": ["n02363005", "beaver"], "338": ["n02364673", "guinea_pig"], "339": ["n02389026", "sorrel"], "340": ["n02391049", "zebra"], "341": ["n02395406", "hog"], "342": ["n02396427", "wild_boar"], "343": ["n02397096", "warthog"], "344": ["n02398521", "hippopotamus"], "345": ["n02403003", "ox"], "346": ["n02408429", "water_buffalo"], "347": ["n02410509", "bison"], "348": ["n02412080", "ram"], "349": ["n02415577", "bighorn"], "350": ["n02417914", "ibex"], "351": ["n02422106", "hartebeest"], "352": ["n02422699", "impala"], "353": ["n02423022", "gazelle"], "354": ["n02437312", "Arabian_camel"], "355": ["n02437616", "llama"], "356": ["n02441942", "weasel"], "357": ["n02442845", "mink"], "358": ["n02443114", "polecat"], "359": ["n02443484", "black-footed_ferret"], "360": ["n02444819", "otter"], "361": ["n02445715", "skunk"], "362": ["n02447366", "badger"], "363": ["n02454379", "armadillo"], "364": ["n02457408", "three-toed_sloth"], "365": ["n02480495", "orangutan"], "366": ["n02480855", "gorilla"], "367": ["n02481823", "chimpanzee"], "368": ["n02483362", "gibbon"], "369": ["n02483708", "siamang"], "370": ["n02484975", "guenon"], "371": ["n02486261", "patas"], "372": ["n02486410", "baboon"], "373": ["n02487347", "macaque"], "374": ["n02488291", "langur"], "375": ["n02488702", "colobus"], "376": ["n02489166", "proboscis_monkey"], "377": ["n02490219", "marmoset"], "378": ["n02492035", "capuchin"], "379": ["n02492660", "howler_monkey"], "380": ["n02493509", "titi"], "381": ["n02493793", "spider_monkey"], "382": ["n02494079", "squirrel_monkey"], "383": ["n02497673", "Madagascar_cat"], "384": ["n02500267", "indri"], "385": ["n02504013", "Indian_elephant"], "386": ["n02504458", "African_elephant"], "387": ["n02509815", "lesser_panda"], "388": ["n02510455", "giant_panda"], "389": ["n02514041", "barracouta"], "390": ["n02526121", "eel"], "391": ["n02536864", "coho"], "392": ["n02606052", "rock_beauty"], "393": ["n02607072", "anemone_fish"], "394": ["n02640242", "sturgeon"], "395": ["n02641379", "gar"], "396": ["n02643566", "lionfish"], "397": ["n02655020", "puffer"], "398": ["n02666196", "abacus"], "399": ["n02667093", "abaya"], "400": ["n02669723", "academic_gown"], "401": ["n02672831", "accordion"], "402": ["n02676566", "acoustic_guitar"], "403": ["n02687172", "aircraft_carrier"], "404": ["n02690373", "airliner"], "405": ["n02692877", "airship"], "406": ["n02699494", "altar"], "407": ["n02701002", "ambulance"], "408": ["n02704792", "amphibian"], "409": ["n02708093", "analog_clock"], "410": ["n02727426", "apiary"], "411": ["n02730930", "apron"], "412": ["n02747177", "ashcan"], "413": ["n02749479", "assault_rifle"], "414": ["n02769748", "backpack"], "415": ["n02776631", "bakery"], "416": ["n02777292", "balance_beam"], "417": ["n02782093", "balloon"], "418": ["n02783161", "ballpoint"], "419": ["n02786058", "Band_Aid"], "420": ["n02787622", "banjo"], "421": ["n02788148", "bannister"], "422": ["n02790996", "barbell"], "423": ["n02791124", "barber_chair"], "424": ["n02791270", "barbershop"], "425": ["n02793495", "barn"], "426": ["n02794156", "barometer"], "427": ["n02795169", "barrel"], "428": ["n02797295", "barrow"], "429": ["n02799071", "baseball"], "430": ["n02802426", "basketball"], "431": ["n02804414", "bassinet"], "432": ["n02804610", "bassoon"], "433": ["n02807133", "bathing_cap"], "434": ["n02808304", "bath_towel"], "435": ["n02808440", "bathtub"], "436": ["n02814533", "beach_wagon"], "437": ["n02814860", "beacon"], "438": ["n02815834", "beaker"], "439": ["n02817516", "bearskin"], "440": ["n02823428", "beer_bottle"], "441": ["n02823750", "beer_glass"], "442": ["n02825657", "bell_cote"], "443": ["n02834397", "bib"], "444": ["n02835271", "bicycle-built-for-two"], "445": ["n02837789", "bikini"], "446": ["n02840245", "binder"], "447": ["n02841315", "binoculars"], "448": ["n02843684", "birdhouse"], "449": ["n02859443", "boathouse"], "450": ["n02860847", "bobsled"], "451": ["n02865351", "bolo_tie"], "452": ["n02869837", "bonnet"], "453": ["n02870880", "bookcase"], "454": ["n02871525", "bookshop"], "455": ["n02877765", "bottlecap"], "456": ["n02879718", "bow"], "457": ["n02883205", "bow_tie"], "458": ["n02892201", "brass"], "459": ["n02892767", "brassiere"], "460": ["n02894605", "breakwater"], "461": ["n02895154", "breastplate"], "462": ["n02906734", "broom"], "463": ["n02909870", "bucket"], "464": ["n02910353", "buckle"], "465": ["n02916936", "bulletproof_vest"], "466": ["n02917067", "bullet_train"], "467": ["n02927161", "butcher_shop"], "468": ["n02930766", "cab"], "469": ["n02939185", "caldron"], "470": ["n02948072", "candle"], "471": ["n02950826", "cannon"], "472": ["n02951358", "canoe"], "473": ["n02951585", "can_opener"], "474": ["n02963159", "cardigan"], "475": ["n02965783", "car_mirror"], "476": ["n02966193", "carousel"], "477": ["n02966687", "carpenter's_kit"], "478": ["n02971356", "carton"], "479": ["n02974003", "car_wheel"], "480": ["n02977058", "cash_machine"], "481": ["n02978881", "cassette"], "482": ["n02979186", "cassette_player"], "483": ["n02980441", "castle"], "484": ["n02981792", "catamaran"], "485": ["n02988304", "CD_player"], "486": ["n02992211", "cello"], "487": ["n02992529", "cellular_telephone"], "488": ["n02999410", "chain"], "489": ["n03000134", "chainlink_fence"], "490": ["n03000247", "chain_mail"], "491": ["n03000684", "chain_saw"], "492": ["n03014705", "chest"], "493": ["n03016953", "chiffonier"], "494": ["n03017168", "chime"], "495": ["n03018349", "china_cabinet"], "496": ["n03026506", "Christmas_stocking"], "497": ["n03028079", "church"], "498": ["n03032252", "cinema"], "499": ["n03041632", "cleaver"], "500": ["n03042490", "cliff_dwelling"], "501": ["n03045698", "cloak"], "502": ["n03047690", "clog"], "503": ["n03062245", "cocktail_shaker"], "504": ["n03063599", "coffee_mug"], "505": ["n03063689", "coffeepot"], "506": ["n03065424", "coil"], "507": ["n03075370", "combination_lock"], "508": ["n03085013", "computer_keyboard"], "509": ["n03089624", "confectionery"], "510": ["n03095699", "container_ship"], "511": ["n03100240", "convertible"], "512": ["n03109150", "corkscrew"], "513": ["n03110669", "cornet"], "514": ["n03124043", "cowboy_boot"], "515": ["n03124170", "cowboy_hat"], "516": ["n03125729", "cradle"], "517": ["n03126707", "crane"], "518": ["n03127747", "crash_helmet"], "519": ["n03127925", "crate"], "520": ["n03131574", "crib"], "521": ["n03133878", "Crock_Pot"], "522": ["n03134739", "croquet_ball"], "523": ["n03141823", "crutch"], "524": ["n03146219", "cuirass"], "525": ["n03160309", "dam"], "526": ["n03179701", "desk"], "527": ["n03180011", "desktop_computer"], "528": ["n03187595", "dial_telephone"], "529": ["n03188531", "diaper"], "530": ["n03196217", "digital_clock"], "531": ["n03197337", "digital_watch"], "532": ["n03201208", "dining_table"], "533": ["n03207743", "dishrag"], "534": ["n03207941", "dishwasher"], "535": ["n03208938", "disk_brake"], "536": ["n03216828", "dock"], "537": ["n03218198", "dogsled"], "538": ["n03220513", "dome"], "539": ["n03223299", "doormat"], "540": ["n03240683", "drilling_platform"], "541": ["n03249569", "drum"], "542": ["n03250847", "drumstick"], "543": ["n03255030", "dumbbell"], "544": ["n03259280", "Dutch_oven"], "545": ["n03271574", "electric_fan"], "546": ["n03272010", "electric_guitar"], "547": ["n03272562", "electric_locomotive"], "548": ["n03290653", "entertainment_center"], "549": ["n03291819", "envelope"], "550": ["n03297495", "espresso_maker"], "551": ["n03314780", "face_powder"], "552": ["n03325584", "feather_boa"], "553": ["n03337140", "file"], "554": ["n03344393", "fireboat"], "555": ["n03345487", "fire_engine"], "556": ["n03347037", "fire_screen"], "557": ["n03355925", "flagpole"], "558": ["n03372029", "flute"], "559": ["n03376595", "folding_chair"], "560": ["n03379051", "football_helmet"], "561": ["n03384352", "forklift"], "562": ["n03388043", "fountain"], "563": ["n03388183", "fountain_pen"], "564": ["n03388549", "four-poster"], "565": ["n03393912", "freight_car"], "566": ["n03394916", "French_horn"], "567": ["n03400231", "frying_pan"], "568": ["n03404251", "fur_coat"], "569": ["n03417042", "garbage_truck"], "570": ["n03424325", "gasmask"], "571": ["n03425413", "gas_pump"], "572": ["n03443371", "goblet"], "573": ["n03444034", "go-kart"], "574": ["n03445777", "golf_ball"], "575": ["n03445924", "golfcart"], "576": ["n03447447", "gondola"], "577": ["n03447721", "gong"], "578": ["n03450230", "gown"], "579": ["n03452741", "grand_piano"], "580": ["n03457902", "greenhouse"], "581": ["n03459775", "grille"], "582": ["n03461385", "grocery_store"], "583": ["n03467068", "guillotine"], "584": ["n03476684", "hair_slide"], "585": ["n03476991", "hair_spray"], "586": ["n03478589", "half_track"], "587": ["n03481172", "hammer"], "588": ["n03482405", "hamper"], "589": ["n03483316", "hand_blower"], "590": ["n03485407", "hand-held_computer"], "591": ["n03485794", "handkerchief"], "592": ["n03492542", "hard_disc"], "593": ["n03494278", "harmonica"], "594": ["n03495258", "harp"], "595": ["n03496892", "harvester"], "596": ["n03498962", "hatchet"], "597": ["n03527444", "holster"], "598": ["n03529860", "home_theater"], "599": ["n03530642", "honeycomb"], "600": ["n03532672", "hook"], "601": ["n03534580", "hoopskirt"], "602": ["n03535780", "horizontal_bar"], "603": ["n03538406", "horse_cart"], "604": ["n03544143", "hourglass"], "605": ["n03584254", "iPod"], "606": ["n03584829", "iron"], "607": ["n03590841", "jack-o'-lantern"], "608": ["n03594734", "jean"], "609": ["n03594945", "jeep"], "610": ["n03595614", "jersey"], "611": ["n03598930", "jigsaw_puzzle"], "612": ["n03599486", "jinrikisha"], "613": ["n03602883", "joystick"], "614": ["n03617480", "kimono"], "615": ["n03623198", "knee_pad"], "616": ["n03627232", "knot"], "617": ["n03630383", "lab_coat"], "618": ["n03633091", "ladle"], "619": ["n03637318", "lampshade"], "620": ["n03642806", "laptop"], "621": ["n03649909", "lawn_mower"], "622": ["n03657121", "lens_cap"], "623": ["n03658185", "letter_opener"], "624": ["n03661043", "library"], "625": ["n03662601", "lifeboat"], "626": ["n03666591", "lighter"], "627": ["n03670208", "limousine"], "628": ["n03673027", "liner"], "629": ["n03676483", "lipstick"], "630": ["n03680355", "Loafer"], "631": ["n03690938", "lotion"], "632": ["n03691459", "loudspeaker"], "633": ["n03692522", "loupe"], "634": ["n03697007", "lumbermill"], "635": ["n03706229", "magnetic_compass"], "636": ["n03709823", "mailbag"], "637": ["n03710193", "mailbox"], "638": ["n03710637", "maillot"], "639": ["n03710721", "maillot"], "640": ["n03717622", "manhole_cover"], "641": ["n03720891", "maraca"], "642": ["n03721384", "marimba"], "643": ["n03724870", "mask"], "644": ["n03729826", "matchstick"], "645": ["n03733131", "maypole"], "646": ["n03733281", "maze"], "647": ["n03733805", "measuring_cup"], "648": ["n03742115", "medicine_chest"], "649": ["n03743016", "megalith"], "650": ["n03759954", "microphone"], "651": ["n03761084", "microwave"], "652": ["n03763968", "military_uniform"], "653": ["n03764736", "milk_can"], "654": ["n03769881", "minibus"], "655": ["n03770439", "miniskirt"], "656": ["n03770679", "minivan"], "657": ["n03773504", "missile"], "658": ["n03775071", "mitten"], "659": ["n03775546", "mixing_bowl"], "660": ["n03776460", "mobile_home"], "661": ["n03777568", "Model_T"], "662": ["n03777754", "modem"], "663": ["n03781244", "monastery"], "664": ["n03782006", "monitor"], "665": ["n03785016", "moped"], "666": ["n03786901", "mortar"], "667": ["n03787032", "mortarboard"], "668": ["n03788195", "mosque"], "669": ["n03788365", "mosquito_net"], "670": ["n03791053", "motor_scooter"], "671": ["n03792782", "mountain_bike"], "672": ["n03792972", "mountain_tent"], "673": ["n03793489", "mouse"], "674": ["n03794056", "mousetrap"], "675": ["n03796401", "moving_van"], "676": ["n03803284", "muzzle"], "677": ["n03804744", "nail"], "678": ["n03814639", "neck_brace"], "679": ["n03814906", "necklace"], "680": ["n03825788", "nipple"], "681": ["n03832673", "notebook"], "682": ["n03837869", "obelisk"], "683": ["n03838899", "oboe"], "684": ["n03840681", "ocarina"], "685": ["n03841143", "odometer"], "686": ["n03843555", "oil_filter"], "687": ["n03854065", "organ"], "688": ["n03857828", "oscilloscope"], "689": ["n03866082", "overskirt"], "690": ["n03868242", "oxcart"], "691": ["n03868863", "oxygen_mask"], "692": ["n03871628", "packet"], "693": ["n03873416", "paddle"], "694": ["n03874293", "paddlewheel"], "695": ["n03874599", "padlock"], "696": ["n03876231", "paintbrush"], "697": ["n03877472", "pajama"], "698": ["n03877845", "palace"], "699": ["n03884397", "panpipe"], "700": ["n03887697", "paper_towel"], "701": ["n03888257", "parachute"], "702": ["n03888605", "parallel_bars"], "703": ["n03891251", "park_bench"], "704": ["n03891332", "parking_meter"], "705": ["n03895866", "passenger_car"], "706": ["n03899768", "patio"], "707": ["n03902125", "pay-phone"], "708": ["n03903868", "pedestal"], "709": ["n03908618", "pencil_box"], "710": ["n03908714", "pencil_sharpener"], "711": ["n03916031", "perfume"], "712": ["n03920288", "Petri_dish"], "713": ["n03924679", "photocopier"], "714": ["n03929660", "pick"], "715": ["n03929855", "pickelhaube"], "716": ["n03930313", "picket_fence"], "717": ["n03930630", "pickup"], "718": ["n03933933", "pier"], "719": ["n03935335", "piggy_bank"], "720": ["n03937543", "pill_bottle"], "721": ["n03938244", "pillow"], "722": ["n03942813", "ping-pong_ball"], "723": ["n03944341", "pinwheel"], "724": ["n03947888", "pirate"], "725": ["n03950228", "pitcher"], "726": ["n03954731", "plane"], "727": ["n03956157", "planetarium"], "728": ["n03958227", "plastic_bag"], "729": ["n03961711", "plate_rack"], "730": ["n03967562", "plow"], "731": ["n03970156", "plunger"], "732": ["n03976467", "Polaroid_camera"], "733": ["n03976657", "pole"], "734": ["n03977966", "police_van"], "735": ["n03980874", "poncho"], "736": ["n03982430", "pool_table"], "737": ["n03983396", "pop_bottle"], "738": ["n03991062", "pot"], "739": ["n03992509", "potter's_wheel"], "740": ["n03995372", "power_drill"], "741": ["n03998194", "prayer_rug"], "742": ["n04004767", "printer"], "743": ["n04005630", "prison"], "744": ["n04008634", "projectile"], "745": ["n04009552", "projector"], "746": ["n04019541", "puck"], "747": ["n04023962", "punching_bag"], "748": ["n04026417", "purse"], "749": ["n04033901", "quill"], "750": ["n04033995", "quilt"], "751": ["n04037443", "racer"], "752": ["n04039381", "racket"], "753": ["n04040759", "radiator"], "754": ["n04041544", "radio"], "755": ["n04044716", "radio_telescope"], "756": ["n04049303", "rain_barrel"], "757": ["n04065272", "recreational_vehicle"], "758": ["n04067472", "reel"], "759": ["n04069434", "reflex_camera"], "760": ["n04070727", "refrigerator"], "761": ["n04074963", "remote_control"], "762": ["n04081281", "restaurant"], "763": ["n04086273", "revolver"], "764": ["n04090263", "rifle"], "765": ["n04099969", "rocking_chair"], "766": ["n04111531", "rotisserie"], "767": ["n04116512", "rubber_eraser"], "768": ["n04118538", "rugby_ball"], "769": ["n04118776", "rule"], "770": ["n04120489", "running_shoe"], "771": ["n04125021", "safe"], "772": ["n04127249", "safety_pin"], "773": ["n04131690", "saltshaker"], "774": ["n04133789", "sandal"], "775": ["n04136333", "sarong"], "776": ["n04141076", "sax"], "777": ["n04141327", "scabbard"], "778": ["n04141975", "scale"], "779": ["n04146614", "school_bus"], "780": ["n04147183", "schooner"], "781": ["n04149813", "scoreboard"], "782": ["n04152593", "screen"], "783": ["n04153751", "screw"], "784": ["n04154565", "screwdriver"], "785": ["n04162706", "seat_belt"], "786": ["n04179913", "sewing_machine"], "787": ["n04192698", "shield"], "788": ["n04200800", "shoe_shop"], "789": ["n04201297", "shoji"], "790": ["n04204238", "shopping_basket"], "791": ["n04204347", "shopping_cart"], "792": ["n04208210", "shovel"], "793": ["n04209133", "shower_cap"], "794": ["n04209239", "shower_curtain"], "795": ["n04228054", "ski"], "796": ["n04229816", "ski_mask"], "797": ["n04235860", "sleeping_bag"], "798": ["n04238763", "slide_rule"], "799": ["n04239074", "sliding_door"], "800": ["n04243546", "slot"], "801": ["n04251144", "snorkel"], "802": ["n04252077", "snowmobile"], "803": ["n04252225", "snowplow"], "804": ["n04254120", "soap_dispenser"], "805": ["n04254680", "soccer_ball"], "806": ["n04254777", "sock"], "807": ["n04258138", "solar_dish"], "808": ["n04259630", "sombrero"], "809": ["n04263257", "soup_bowl"], "810": ["n04264628", "space_bar"], "811": ["n04265275", "space_heater"], "812": ["n04266014", "space_shuttle"], "813": ["n04270147", "spatula"], "814": ["n04273569", "speedboat"], "815": ["n04275548", "spider_web"], "816": ["n04277352", "spindle"], "817": ["n04285008", "sports_car"], "818": ["n04286575", "spotlight"], "819": ["n04296562", "stage"], "820": ["n04310018", "steam_locomotive"], "821": ["n04311004", "steel_arch_bridge"], "822": ["n04311174", "steel_drum"], "823": ["n04317175", "stethoscope"], "824": ["n04325704", "stole"], "825": ["n04326547", "stone_wall"], "826": ["n04328186", "stopwatch"], "827": ["n04330267", "stove"], "828": ["n04332243", "strainer"], "829": ["n04335435", "streetcar"], "830": ["n04336792", "stretcher"], "831": ["n04344873", "studio_couch"], "832": ["n04346328", "stupa"], "833": ["n04347754", "submarine"], "834": ["n04350905", "suit"], "835": ["n04355338", "sundial"], "836": ["n04355933", "sunglass"], "837": ["n04356056", "sunglasses"], "838": ["n04357314", "sunscreen"], "839": ["n04366367", "suspension_bridge"], "840": ["n04367480", "swab"], "841": ["n04370456", "sweatshirt"], "842": ["n04371430", "swimming_trunks"], "843": ["n04371774", "swing"], "844": ["n04372370", "switch"], "845": ["n04376876", "syringe"], "846": ["n04380533", "table_lamp"], "847": ["n04389033", "tank"], "848": ["n04392985", "tape_player"], "849": ["n04398044", "teapot"], "850": ["n04399382", "teddy"], "851": ["n04404412", "television"], "852": ["n04409515", "tennis_ball"], "853": ["n04417672", "thatch"], "854": ["n04418357", "theater_curtain"], "855": ["n04423845", "thimble"], "856": ["n04428191", "thresher"], "857": ["n04429376", "throne"], "858": ["n04435653", "tile_roof"], "859": ["n04442312", "toaster"], "860": ["n04443257", "tobacco_shop"], "861": ["n04447861", "toilet_seat"], "862": ["n04456115", "torch"], "863": ["n04458633", "totem_pole"], "864": ["n04461696", "tow_truck"], "865": ["n04462240", "toyshop"], "866": ["n04465501", "tractor"], "867": ["n04467665", "trailer_truck"], "868": ["n04476259", "tray"], "869": ["n04479046", "trench_coat"], "870": ["n04482393", "tricycle"], "871": ["n04483307", "trimaran"], "872": ["n04485082", "tripod"], "873": ["n04486054", "triumphal_arch"], "874": ["n04487081", "trolleybus"], "875": ["n04487394", "trombone"], "876": ["n04493381", "tub"], "877": ["n04501370", "turnstile"], "878": ["n04505470", "typewriter_keyboard"], "879": ["n04507155", "umbrella"], "880": ["n04509417", "unicycle"], "881": ["n04515003", "upright"], "882": ["n04517823", "vacuum"], "883": ["n04522168", "vase"], "884": ["n04523525", "vault"], "885": ["n04525038", "velvet"], "886": ["n04525305", "vending_machine"], "887": ["n04532106", "vestment"], "888": ["n04532670", "viaduct"], "889": ["n04536866", "violin"], "890": ["n04540053", "volleyball"], "891": ["n04542943", "waffle_iron"], "892": ["n04548280", "wall_clock"], "893": ["n04548362", "wallet"], "894": ["n04550184", "wardrobe"], "895": ["n04552348", "warplane"], "896": ["n04553703", "washbasin"], "897": ["n04554684", "washer"], "898": ["n04557648", "water_bottle"], "899": ["n04560804", "water_jug"], "900": ["n04562935", "water_tower"], "901": ["n04579145", "whiskey_jug"], "902": ["n04579432", "whistle"], "903": ["n04584207", "wig"], "904": ["n04589890", "window_screen"], "905": ["n04590129", "window_shade"], "906": ["n04591157", "Windsor_tie"], "907": ["n04591713", "wine_bottle"], "908": ["n04592741", "wing"], "909": ["n04596742", "wok"], "910": ["n04597913", "wooden_spoon"], "911": ["n04599235", "wool"], "912": ["n04604644", "worm_fence"], "913": ["n04606251", "wreck"], "914": ["n04612504", "yawl"], "915": ["n04613696", "yurt"], "916": ["n06359193", "web_site"], "917": ["n06596364", "comic_book"], "918": ["n06785654", "crossword_puzzle"], "919": ["n06794110", "street_sign"], "920": ["n06874185", "traffic_light"], "921": ["n07248320", "book_jacket"], "922": ["n07565083", "menu"], "923": ["n07579787", "plate"], "924": ["n07583066", "guacamole"], "925": ["n07584110", "consomme"], "926": ["n07590611", "hot_pot"], "927": ["n07613480", "trifle"], "928": ["n07614500", "ice_cream"], "929": ["n07615774", "ice_lolly"], "930": ["n07684084", "French_loaf"], "931": ["n07693725", "bagel"], "932": ["n07695742", "pretzel"], "933": ["n07697313", "cheeseburger"], "934": ["n07697537", "hotdog"], "935": ["n07711569", "mashed_potato"], "936": ["n07714571", "head_cabbage"], "937": ["n07714990", "broccoli"], "938": ["n07715103", "cauliflower"], "939": ["n07716358", "zucchini"], "940": ["n07716906", "spaghetti_squash"], "941": ["n07717410", "acorn_squash"], "942": ["n07717556", "butternut_squash"], "943": ["n07718472", "cucumber"], "944": ["n07718747", "artichoke"], "945": ["n07720875", "bell_pepper"], "946": ["n07730033", "cardoon"], "947": ["n07734744", "mushroom"], "948": ["n07742313", "Granny_Smith"], "949": ["n07745940", "strawberry"], "950": ["n07747607", "orange"], "951": ["n07749582", "lemon"], "952": ["n07753113", "fig"], "953": ["n07753275", "pineapple"], "954": ["n07753592", "banana"], "955": ["n07754684", "jackfruit"], "956": ["n07760859", "custard_apple"], "957": ["n07768694", "pomegranate"], "958": ["n07802026", "hay"], "959": ["n07831146", "carbonara"], "960": ["n07836838", "chocolate_sauce"], "961": ["n07860988", "dough"], "962": ["n07871810", "meat_loaf"], "963": ["n07873807", "pizza"], "964": ["n07875152", "potpie"], "965": ["n07880968", "burrito"], "966": ["n07892512", "red_wine"], "967": ["n07920052", "espresso"], "968": ["n07930864", "cup"], "969": ["n07932039", "eggnog"], "970": ["n09193705", "alp"], "971": ["n09229709", "bubble"], "972": ["n09246464", "cliff"], "973": ["n09256479", "coral_reef"], "974": ["n09288635", "geyser"], "975": ["n09332890", "lakeside"], "976": ["n09399592", "promontory"], "977": ["n09421951", "sandbar"], "978": ["n09428293", "seashore"], "979": ["n09468604", "valley"], "980": ["n09472597", "volcano"], "981": ["n09835506", "ballplayer"], "982": ["n10148035", "groom"], "983": ["n10565667", "scuba_diver"], "984": ["n11879895", "rapeseed"], "985": ["n11939491", "daisy"], "986": ["n12057211", "yellow_lady's_slipper"], "987": ["n12144580", "corn"], "988": ["n12267677", "acorn"], "989": ["n12620546", "hip"], "990": ["n12768682", "buckeye"], "991": ["n12985857", "coral_fungus"], "992": ["n12998815", "agaric"], "993": ["n13037406", "gyromitra"], "994": ["n13040303", "stinkhorn"], "995": ["n13044778", "earthstar"], "996": ["n13052670", "hen-of-the-woods"], "997": ["n13054560", "bolete"], "998": ["n13133613", "ear"], "999": ["n15075141", "toilet_tissue"]} \ No newline at end of file diff --git a/tensorlayer/models/imagenet_classes.py b/tensorlayer/models/imagenet_classes.py deleted file mode 100644 index d13cfda..0000000 --- a/tensorlayer/models/imagenet_classes.py +++ /dev/null @@ -1,1003 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -class_names = '''tench, Tinca tinca -goldfish, Carassius auratus -great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias -tiger shark, Galeocerdo cuvieri -hammerhead, hammerhead shark -electric ray, crampfish, numbfish, torpedo -stingray -cock -hen -ostrich, Struthio camelus -brambling, Fringilla montifringilla -goldfinch, Carduelis carduelis -house finch, linnet, Carpodacus mexicanus -junco, snowbird -indigo bunting, indigo finch, indigo bird, Passerina cyanea -robin, American robin, Turdus migratorius -bulbul -jay -magpie -chickadee -water ouzel, dipper -kite -bald eagle, American eagle, Haliaeetus leucocephalus -vulture -great grey owl, great gray owl, Strix nebulosa -European fire salamander, Salamandra salamandra -common newt, Triturus vulgaris -eft -spotted salamander, Ambystoma maculatum -axolotl, mud puppy, Ambystoma mexicanum -bullfrog, Rana catesbeiana -tree frog, tree-frog -tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui -loggerhead, loggerhead turtle, Caretta caretta -leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea -mud turtle -terrapin -box turtle, box tortoise -banded gecko -common iguana, iguana, Iguana iguana -American chameleon, anole, Anolis carolinensis -whiptail, whiptail lizard -agama -frilled lizard, Chlamydosaurus kingi -alligator lizard -Gila monster, Heloderma suspectum -green lizard, Lacerta viridis -African chameleon, Chamaeleo chamaeleon -Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis -African crocodile, Nile crocodile, Crocodylus niloticus -American alligator, Alligator mississipiensis -triceratops -thunder snake, worm snake, Carphophis amoenus -ringneck snake, ring-necked snake, ring snake -hognose snake, puff adder, sand viper -green snake, grass snake -king snake, kingsnake -garter snake, grass snake -water snake -vine snake -night snake, Hypsiglena torquata -boa constrictor, Constrictor constrictor -rock python, rock snake, Python sebae -Indian cobra, Naja naja -green mamba -sea snake -horned viper, cerastes, sand viper, horned asp, Cerastes cornutus -diamondback, diamondback rattlesnake, Crotalus adamanteus -sidewinder, horned rattlesnake, Crotalus cerastes -trilobite -harvestman, daddy longlegs, Phalangium opilio -scorpion -black and gold garden spider, Argiope aurantia -barn spider, Araneus cavaticus -garden spider, Aranea diademata -black widow, Latrodectus mactans -tarantula -wolf spider, hunting spider -tick -centipede -black grouse -ptarmigan -ruffed grouse, partridge, Bonasa umbellus -prairie chicken, prairie grouse, prairie fowl -peacock -quail -partridge -African grey, African gray, Psittacus erithacus -macaw -sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita -lorikeet -coucal -bee eater -hornbill -hummingbird -jacamar -toucan -drake -red-breasted merganser, Mergus serrator -goose -black swan, Cygnus atratus -tusker -echidna, spiny anteater, anteater -platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus -wallaby, brush kangaroo -koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus -wombat -jellyfish -sea anemone, anemone -brain coral -flatworm, platyhelminth -nematode, nematode worm, roundworm -conch -snail -slug -sea slug, nudibranch -chiton, coat-of-mail shell, sea cradle, polyplacophore -chambered nautilus, pearly nautilus, nautilus -Dungeness crab, Cancer magister -rock crab, Cancer irroratus -fiddler crab -king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica -American lobster, Northern lobster, Maine lobster, Homarus americanus -spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish -crayfish, crawfish, crawdad, crawdaddy -hermit crab -isopod -white stork, Ciconia ciconia -black stork, Ciconia nigra -spoonbill -flamingo -little blue heron, Egretta caerulea -American egret, great white heron, Egretta albus -bittern -crane -limpkin, Aramus pictus -European gallinule, Porphyrio porphyrio -American coot, marsh hen, mud hen, water hen, Fulica americana -bustard -ruddy turnstone, Arenaria interpres -red-backed sandpiper, dunlin, Erolia alpina -redshank, Tringa totanus -dowitcher -oystercatcher, oyster catcher -pelican -king penguin, Aptenodytes patagonica -albatross, mollymawk -grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus -killer whale, killer, orca, grampus, sea wolf, Orcinus orca -dugong, Dugong dugon -sea lion -Chihuahua -Japanese spaniel -Maltese dog, Maltese terrier, Maltese -Pekinese, Pekingese, Peke -Shih-Tzu -Blenheim spaniel -papillon -toy terrier -Rhodesian ridgeback -Afghan hound, Afghan -basset, basset hound -beagle -bloodhound, sleuthhound -bluetick -black-and-tan coonhound -Walker hound, Walker foxhound -English foxhound -redbone -borzoi, Russian wolfhound -Irish wolfhound -Italian greyhound -whippet -Ibizan hound, Ibizan Podenco -Norwegian elkhound, elkhound -otterhound, otter hound -Saluki, gazelle hound -Scottish deerhound, deerhound -Weimaraner -Staffordshire bullterrier, Staffordshire bull terrier -American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier -Bedlington terrier -Border terrier -Kerry blue terrier -Irish terrier -Norfolk terrier -Norwich terrier -Yorkshire terrier -wire-haired fox terrier -Lakeland terrier -Sealyham terrier, Sealyham -Airedale, Airedale terrier -cairn, cairn terrier -Australian terrier -Dandie Dinmont, Dandie Dinmont terrier -Boston bull, Boston terrier -miniature schnauzer -giant schnauzer -standard schnauzer -Scotch terrier, Scottish terrier, Scottie -Tibetan terrier, chrysanthemum dog -silky terrier, Sydney silky -soft-coated wheaten terrier -West Highland white terrier -Lhasa, Lhasa apso -flat-coated retriever -curly-coated retriever -golden retriever -Labrador retriever -Chesapeake Bay retriever -German short-haired pointer -vizsla, Hungarian pointer -English setter -Irish setter, red setter -Gordon setter -Brittany spaniel -clumber, clumber spaniel -English springer, English springer spaniel -Welsh springer spaniel -cocker spaniel, English cocker spaniel, cocker -Sussex spaniel -Irish water spaniel -kuvasz -schipperke -groenendael -malinois -briard -kelpie -komondor -Old English sheepdog, bobtail -Shetland sheepdog, Shetland sheep dog, Shetland -collie -Border collie -Bouvier des Flandres, Bouviers des Flandres -Rottweiler -German shepherd, German shepherd dog, German police dog, alsatian -Doberman, Doberman pinscher -miniature pinscher -Greater Swiss Mountain dog -Bernese mountain dog -Appenzeller -EntleBucher -boxer -bull mastiff -Tibetan mastiff -French bulldog -Great Dane -Saint Bernard, St Bernard -Eskimo dog, husky -malamute, malemute, Alaskan malamute -Siberian husky -dalmatian, coach dog, carriage dog -affenpinscher, monkey pinscher, monkey dog -basenji -pug, pug-dog -Leonberg -Newfoundland, Newfoundland dog -Great Pyrenees -Samoyed, Samoyede -Pomeranian -chow, chow chow -keeshond -Brabancon griffon -Pembroke, Pembroke Welsh corgi -Cardigan, Cardigan Welsh corgi -toy poodle -miniature poodle -standard poodle -Mexican hairless -timber wolf, grey wolf, gray wolf, Canis lupus -white wolf, Arctic wolf, Canis lupus tundrarum -red wolf, maned wolf, Canis rufus, Canis niger -coyote, prairie wolf, brush wolf, Canis latrans -dingo, warrigal, warragal, Canis dingo -dhole, Cuon alpinus -African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus -hyena, hyaena -red fox, Vulpes vulpes -kit fox, Vulpes macrotis -Arctic fox, white fox, Alopex lagopus -grey fox, gray fox, Urocyon cinereoargenteus -tabby, tabby cat -tiger cat -Persian cat -Siamese cat, Siamese -Egyptian cat -cougar, puma, catamount, mountain lion, painter, panther, Felis concolor -lynx, catamount -leopard, Panthera pardus -snow leopard, ounce, Panthera uncia -jaguar, panther, Panthera onca, Felis onca -lion, king of beasts, Panthera leo -tiger, Panthera tigris -cheetah, chetah, Acinonyx jubatus -brown bear, bruin, Ursus arctos -American black bear, black bear, Ursus americanus, Euarctos americanus -ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus -sloth bear, Melursus ursinus, Ursus ursinus -mongoose -meerkat, mierkat -tiger beetle -ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle -ground beetle, carabid beetle -long-horned beetle, longicorn, longicorn beetle -leaf beetle, chrysomelid -dung beetle -rhinoceros beetle -weevil -fly -bee -ant, emmet, pismire -grasshopper, hopper -cricket -walking stick, walkingstick, stick insect -cockroach, roach -mantis, mantid -cicada, cicala -leafhopper -lacewing, lacewing fly -dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk -damselfly -admiral -ringlet, ringlet butterfly -monarch, monarch butterfly, milkweed butterfly, Danaus plexippus -cabbage butterfly -sulphur butterfly, sulfur butterfly -lycaenid, lycaenid butterfly -starfish, sea star -sea urchin -sea cucumber, holothurian -wood rabbit, cottontail, cottontail rabbit -hare -Angora, Angora rabbit -hamster -porcupine, hedgehog -fox squirrel, eastern fox squirrel, Sciurus niger -marmot -beaver -guinea pig, Cavia cobaya -sorrel -zebra -hog, pig, grunter, squealer, Sus scrofa -wild boar, boar, Sus scrofa -warthog -hippopotamus, hippo, river horse, Hippopotamus amphibius -ox -water buffalo, water ox, Asiatic buffalo, Bubalus bubalis -bison -ram, tup -bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis -ibex, Capra ibex -hartebeest -impala, Aepyceros melampus -gazelle -Arabian camel, dromedary, Camelus dromedarius -llama -weasel -mink -polecat, fitch, foulmart, foumart, Mustela putorius -black-footed ferret, ferret, Mustela nigripes -otter -skunk, polecat, wood pussy -badger -armadillo -three-toed sloth, ai, Bradypus tridactylus -orangutan, orang, orangutang, Pongo pygmaeus -gorilla, Gorilla gorilla -chimpanzee, chimp, Pan troglodytes -gibbon, Hylobates lar -siamang, Hylobates syndactylus, Symphalangus syndactylus -guenon, guenon monkey -patas, hussar monkey, Erythrocebus patas -baboon -macaque -langur -colobus, colobus monkey -proboscis monkey, Nasalis larvatus -marmoset -capuchin, ringtail, Cebus capucinus -howler monkey, howler -titi, titi monkey -spider monkey, Ateles geoffroyi -squirrel monkey, Saimiri sciureus -Madagascar cat, ring-tailed lemur, Lemur catta -indri, indris, Indri indri, Indri brevicaudatus -Indian elephant, Elephas maximus -African elephant, Loxodonta africana -lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens -giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca -barracouta, snoek -eel -coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch -rock beauty, Holocanthus tricolor -anemone fish -sturgeon -gar, garfish, garpike, billfish, Lepisosteus osseus -lionfish -puffer, pufferfish, blowfish, globefish -abacus -abaya -academic gown, academic robe, judge's robe -accordion, piano accordion, squeeze box -acoustic guitar -aircraft carrier, carrier, flattop, attack aircraft carrier -airliner -airship, dirigible -altar -ambulance -amphibian, amphibious vehicle -analog clock -apiary, bee house -apron -ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin -assault rifle, assault gun -backpack, back pack, knapsack, packsack, rucksack, haversack -bakery, bakeshop, bakehouse -balance beam, beam -balloon -ballpoint, ballpoint pen, ballpen, Biro -Band Aid -banjo -bannister, banister, balustrade, balusters, handrail -barbell -barber chair -barbershop -barn -barometer -barrel, cask -barrow, garden cart, lawn cart, wheelbarrow -baseball -basketball -bassinet -bassoon -bathing cap, swimming cap -bath towel -bathtub, bathing tub, bath, tub -beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon -beacon, lighthouse, beacon light, pharos -beaker -bearskin, busby, shako -beer bottle -beer glass -bell cote, bell cot -bib -bicycle-built-for-two, tandem bicycle, tandem -bikini, two-piece -binder, ring-binder -binoculars, field glasses, opera glasses -birdhouse -boathouse -bobsled, bobsleigh, bob -bolo tie, bolo, bola tie, bola -bonnet, poke bonnet -bookcase -bookshop, bookstore, bookstall -bottlecap -bow -bow tie, bow-tie, bowtie -brass, memorial tablet, plaque -brassiere, bra, bandeau -breakwater, groin, groyne, mole, bulwark, seawall, jetty -breastplate, aegis, egis -broom -bucket, pail -buckle -bulletproof vest -bullet train, bullet -butcher shop, meat market -cab, hack, taxi, taxicab -caldron, cauldron -candle, taper, wax light -cannon -canoe -can opener, tin opener -cardigan -car mirror -carousel, carrousel, merry-go-round, roundabout, whirligig -carpenter's kit, tool kit -carton -car wheel -cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM -cassette -cassette player -castle -catamaran -CD player -cello, violoncello -cellular telephone, cellular phone, cellphone, cell, mobile phone -chain -chainlink fence -chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour -chain saw, chainsaw -chest -chiffonier, commode -chime, bell, gong -china cabinet, china closet -Christmas stocking -church, church building -cinema, movie theater, movie theatre, movie house, picture palace -cleaver, meat cleaver, chopper -cliff dwelling -cloak -clog, geta, patten, sabot -cocktail shaker -coffee mug -coffeepot -coil, spiral, volute, whorl, helix -combination lock -computer keyboard, keypad -confectionery, confectionary, candy store -container ship, containership, container vessel -convertible -corkscrew, bottle screw -cornet, horn, trumpet, trump -cowboy boot -cowboy hat, ten-gallon hat -cradle -crane -crash helmet -crate -crib, cot -Crock Pot -croquet ball -crutch -cuirass -dam, dike, dyke -desk -desktop computer -dial telephone, dial phone -diaper, nappy, napkin -digital clock -digital watch -dining table, board -dishrag, dishcloth -dishwasher, dish washer, dishwashing machine -disk brake, disc brake -dock, dockage, docking facility -dogsled, dog sled, dog sleigh -dome -doormat, welcome mat -drilling platform, offshore rig -drum, membranophone, tympan -drumstick -dumbbell -Dutch oven -electric fan, blower -electric guitar -electric locomotive -entertainment center -envelope -espresso maker -face powder -feather boa, boa -file, file cabinet, filing cabinet -fireboat -fire engine, fire truck -fire screen, fireguard -flagpole, flagstaff -flute, transverse flute -folding chair -football helmet -forklift -fountain -fountain pen -four-poster -freight car -French horn, horn -frying pan, frypan, skillet -fur coat -garbage truck, dustcart -gasmask, respirator, gas helmet -gas pump, gasoline pump, petrol pump, island dispenser -goblet -go-kart -golf ball -golfcart, golf cart -gondola -gong, tam-tam -gown -grand piano, grand -greenhouse, nursery, glasshouse -grille, radiator grille -grocery store, grocery, food market, market -guillotine -hair slide -hair spray -half track -hammer -hamper -hand blower, blow dryer, blow drier, hair dryer, hair drier -hand-held computer, hand-held microcomputer -handkerchief, hankie, hanky, hankey -hard disc, hard disk, fixed disk -harmonica, mouth organ, harp, mouth harp -harp -harvester, reaper -hatchet -holster -home theater, home theatre -honeycomb -hook, claw -hoopskirt, crinoline -horizontal bar, high bar -horse cart, horse-cart -hourglass -iPod -iron, smoothing iron -jack-o'-lantern -jean, blue jean, denim -jeep, landrover -jersey, T-shirt, tee shirt -jigsaw puzzle -jinrikisha, ricksha, rickshaw -joystick -kimono -knee pad -knot -lab coat, laboratory coat -ladle -lampshade, lamp shade -laptop, laptop computer -lawn mower, mower -lens cap, lens cover -letter opener, paper knife, paperknife -library -lifeboat -lighter, light, igniter, ignitor -limousine, limo -liner, ocean liner -lipstick, lip rouge -Loafer -lotion -loudspeaker, speaker, speaker unit, loudspeaker system, speaker system -loupe, jeweler's loupe -lumbermill, sawmill -magnetic compass -mailbag, postbag -mailbox, letter box -maillot -maillot, tank suit -manhole cover -maraca -marimba, xylophone -mask -matchstick -maypole -maze, labyrinth -measuring cup -medicine chest, medicine cabinet -megalith, megalithic structure -microphone, mike -microwave, microwave oven -military uniform -milk can -minibus -miniskirt, mini -minivan -missile -mitten -mixing bowl -mobile home, manufactured home -Model T -modem -monastery -monitor -moped -mortar -mortarboard -mosque -mosquito net -motor scooter, scooter -mountain bike, all-terrain bike, off-roader -mountain tent -mouse, computer mouse -mousetrap -moving van -muzzle -nail -neck brace -necklace -nipple -notebook, notebook computer -obelisk -oboe, hautboy, hautbois -ocarina, sweet potato -odometer, hodometer, mileometer, milometer -oil filter -organ, pipe organ -oscilloscope, scope, cathode-ray oscilloscope, CRO -overskirt -oxcart -oxygen mask -packet -paddle, boat paddle -paddlewheel, paddle wheel -padlock -paintbrush -pajama, pyjama, pj's, jammies -palace -panpipe, pandean pipe, syrinx -paper towel -parachute, chute -parallel bars, bars -park bench -parking meter -passenger car, coach, carriage -patio, terrace -pay-phone, pay-station -pedestal, plinth, footstall -pencil box, pencil case -pencil sharpener -perfume, essence -Petri dish -photocopier -pick, plectrum, plectron -pickelhaube -picket fence, paling -pickup, pickup truck -pier -piggy bank, penny bank -pill bottle -pillow -ping-pong ball -pinwheel -pirate, pirate ship -pitcher, ewer -plane, carpenter's plane, woodworking plane -planetarium -plastic bag -plate rack -plow, plough -plunger, plumber's helper -Polaroid camera, Polaroid Land camera -pole -police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria -poncho -pool table, billiard table, snooker table -pop bottle, soda bottle -pot, flowerpot -potter's wheel -power drill -prayer rug, prayer mat -printer -prison, prison house -projectile, missile -projector -puck, hockey puck -punching bag, punch bag, punching ball, punchball -purse -quill, quill pen -quilt, comforter, comfort, puff -racer, race car, racing car -racket, racquet -radiator -radio, wireless -radio telescope, radio reflector -rain barrel -recreational vehicle, RV, R.V. -reel -reflex camera -refrigerator, icebox -remote control, remote -restaurant, eating house, eating place, eatery -revolver, six-gun, six-shooter -rifle -rocking chair, rocker -rotisserie -rubber eraser, rubber, pencil eraser -rugby ball -rule, ruler -running shoe -safe -safety pin -saltshaker, salt shaker -sandal -sarong -sax, saxophone -scabbard -scale, weighing machine -school bus -schooner -scoreboard -screen, CRT screen -screw -screwdriver -seat belt, seatbelt -sewing machine -shield, buckler -shoe shop, shoe-shop, shoe store -shoji -shopping basket -shopping cart -shovel -shower cap -shower curtain -ski -ski mask -sleeping bag -slide rule, slipstick -sliding door -slot, one-armed bandit -snorkel -snowmobile -snowplow, snowplough -soap dispenser -soccer ball -sock -solar dish, solar collector, solar furnace -sombrero -soup bowl -space bar -space heater -space shuttle -spatula -speedboat -spider web, spider's web -spindle -sports car, sport car -spotlight, spot -stage -steam locomotive -steel arch bridge -steel drum -stethoscope -stole -stone wall -stopwatch, stop watch -stove -strainer -streetcar, tram, tramcar, trolley, trolley car -stretcher -studio couch, day bed -stupa, tope -submarine, pigboat, sub, U-boat -suit, suit of clothes -sundial -sunglass -sunglasses, dark glasses, shades -sunscreen, sunblock, sun blocker -suspension bridge -swab, swob, mop -sweatshirt -swimming trunks, bathing trunks -swing -switch, electric switch, electrical switch -syringe -table lamp -tank, army tank, armored combat vehicle, armoured combat vehicle -tape player -teapot -teddy, teddy bear -television, television system -tennis ball -thatch, thatched roof -theater curtain, theatre curtain -thimble -thresher, thrasher, threshing machine -throne -tile roof -toaster -tobacco shop, tobacconist shop, tobacconist -toilet seat -torch -totem pole -tow truck, tow car, wrecker -toyshop -tractor -trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi -tray -trench coat -tricycle, trike, velocipede -trimaran -tripod -triumphal arch -trolleybus, trolley coach, trackless trolley -trombone -tub, vat -turnstile -typewriter keyboard -umbrella -unicycle, monocycle -upright, upright piano -vacuum, vacuum cleaner -vase -vault -velvet -vending machine -vestment -viaduct -violin, fiddle -volleyball -waffle iron -wall clock -wallet, billfold, notecase, pocketbook -wardrobe, closet, press -warplane, military plane -washbasin, handbasin, washbowl, lavabo, wash-hand basin -washer, automatic washer, washing machine -water bottle -water jug -water tower -whiskey jug -whistle -wig -window screen -window shade -Windsor tie -wine bottle -wing -wok -wooden spoon -wool, woolen, woollen -worm fence, snake fence, snake-rail fence, Virginia fence -wreck -yawl -yurt -web site, website, internet site, site -comic book -crossword puzzle, crossword -street sign -traffic light, traffic signal, stoplight -book jacket, dust cover, dust jacket, dust wrapper -menu -plate -guacamole -consomme -hot pot, hotpot -trifle -ice cream, icecream -ice lolly, lolly, lollipop, popsicle -French loaf -bagel, beigel -pretzel -cheeseburger -hotdog, hot dog, red hot -mashed potato -head cabbage -broccoli -cauliflower -zucchini, courgette -spaghetti squash -acorn squash -butternut squash -cucumber, cuke -artichoke, globe artichoke -bell pepper -cardoon -mushroom -Granny Smith -strawberry -orange -lemon -fig -pineapple, ananas -banana -jackfruit, jak, jack -custard apple -pomegranate -hay -carbonara -chocolate sauce, chocolate syrup -dough -meat loaf, meatloaf -pizza, pizza pie -potpie -burrito -red wine -espresso -cup -eggnog -alp -bubble -cliff, drop, drop-off -coral reef -geyser -lakeside, lakeshore -promontory, headland, head, foreland -sandbar, sand bar -seashore, coast, seacoast, sea-coast -valley, vale -volcano -ballplayer, baseball player -groom, bridegroom -scuba diver -rapeseed -daisy -yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum -corn -acorn -hip, rose hip, rosehip -buckeye, horse chestnut, conker -coral fungus -agaric -gyromitra -stinkhorn, carrion fungus -earthstar -hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa -bolete -ear, spike, capitulum -toilet tissue, toilet paper, bathroom tissue'''.split("\n") diff --git a/tensorlayer/models/mobilenetv1.py b/tensorlayer/models/mobilenetv1.py deleted file mode 100644 index 3cb8601..0000000 --- a/tensorlayer/models/mobilenetv1.py +++ /dev/null @@ -1,187 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- -"""MobileNet for ImageNet.""" - -import os - -import tensorflow as tf - -from tensorlayer import logging - -from tensorlayer.layers import Layer -from tensorlayer.layers import BatchNormLayer -from tensorlayer.layers import Conv2d -from tensorlayer.layers import DepthwiseConv2d -from tensorlayer.layers import FlattenLayer -from tensorlayer.layers import GlobalMeanPool2d -from tensorlayer.layers import InputLayer -from tensorlayer.layers import ReshapeLayer - -from tensorlayer.files import maybe_download_and_extract, assign_weights, load_npz - -__all__ = [ - 'MobileNetV1', -] - - -class MobileNetV1(Layer): - """Pre-trained MobileNetV1 model. - - Parameters - ------------ - x : placeholder - shape [None, 224, 224, 3], value range [0, 1]. - end_with : str - The end point of the model [conv, depth1, depth2 ... depth13, globalmeanpool, out]. Default ``out`` i.e. the whole model. - is_train : boolean - Whether the model is used for training i.e. enable dropout. - reuse : boolean - Whether to reuse the model. - - Examples - --------- - Classify ImageNet classes, see `tutorial_models_mobilenetv1.py `__ - - >>> x = tf.placeholder(tf.float32, [None, 224, 224, 3]) - >>> # get the whole model - >>> net = tl.models.MobileNetV1(x) - >>> # restore pre-trained parameters - >>> sess = tf.InteractiveSession() - >>> net.restore_params(sess) - >>> # use for inferencing - >>> probs = tf.nn.softmax(net.outputs) - - Extract features and Train a classifier with 100 classes - - >>> x = tf.placeholder(tf.float32, [None, 224, 224, 3]) - >>> # get model without the last layer - >>> cnn = tl.models.MobileNetV1(x, end_with='reshape') - >>> # add one more layer - >>> net = Conv2d(cnn, 100, (1, 1), (1, 1), name='out') - >>> net = FlattenLayer(net, name='flatten') - >>> # initialize all parameters - >>> sess = tf.InteractiveSession() - >>> tl.layers.initialize_global_variables(sess) - >>> # restore pre-trained parameters - >>> cnn.restore_params(sess) - >>> # train your own classifier (only update the last layer) - >>> train_params = tl.layers.get_variables_with_name('out') - - Reuse model - - >>> x1 = tf.placeholder(tf.float32, [None, 224, 224, 3]) - >>> x2 = tf.placeholder(tf.float32, [None, 224, 224, 3]) - >>> # get model without the last layer - >>> net1 = tl.models.MobileNetV1(x1, end_with='reshape') - >>> # reuse the parameters with different input - >>> net2 = tl.models.MobileNetV1(x2, end_with='reshape', reuse=True) - >>> # restore pre-trained parameters (as they share parameters, we don’t need to restore net2) - >>> sess = tf.InteractiveSession() - >>> net1.restore_params(sess) - - """ - - def __init__(self, x, end_with='out', is_train=False, reuse=None): - - self.net = self.mobilenetv1(x, end_with, is_train, reuse) - - self.outputs = self.net.outputs - - self.all_params = list(self.net.all_params) - self.all_layers = list(self.net.all_layers) - self.all_drop = dict(self.net.all_drop) - self.print_layers = self.net.print_layers - self.print_params = self.net.print_params - - # @classmethod - def mobilenetv1(self, x, end_with='out', is_train=False, reuse=None): - with tf.compat.v1.variable_scope("mobilenetv1", reuse=reuse): - n = InputLayer(x) - n = self.conv_block(n, 32, strides=(2, 2), is_train=is_train, name="conv") - if end_with in n.outputs.name: - return n - n = self.depthwise_conv_block(n, 64, is_train=is_train, name="depth1") - if end_with in n.outputs.name: - return n - - n = self.depthwise_conv_block(n, 128, strides=(2, 2), is_train=is_train, name="depth2") - if end_with in n.outputs.name: - return n - n = self.depthwise_conv_block(n, 128, is_train=is_train, name="depth3") - if end_with in n.outputs.name: - return n - - n = self.depthwise_conv_block(n, 256, strides=(2, 2), is_train=is_train, name="depth4") - if end_with in n.outputs.name: - return n - n = self.depthwise_conv_block(n, 256, is_train=is_train, name="depth5") - if end_with in n.outputs.name: - return n - - n = self.depthwise_conv_block(n, 512, strides=(2, 2), is_train=is_train, name="depth6") - if end_with in n.outputs.name: - return n - n = self.depthwise_conv_block(n, 512, is_train=is_train, name="depth7") - if end_with in n.outputs.name: - return n - n = self.depthwise_conv_block(n, 512, is_train=is_train, name="depth8") - if end_with in n.outputs.name: - return n - n = self.depthwise_conv_block(n, 512, is_train=is_train, name="depth9") - if end_with in n.outputs.name: - return n - n = self.depthwise_conv_block(n, 512, is_train=is_train, name="depth10") - if end_with in n.outputs.name: - return n - n = self.depthwise_conv_block(n, 512, is_train=is_train, name="depth11") - if end_with in n.outputs.name: - return n - - n = self.depthwise_conv_block(n, 1024, strides=(2, 2), is_train=is_train, name="depth12") - if end_with in n.outputs.name: - return n - n = self.depthwise_conv_block(n, 1024, is_train=is_train, name="depth13") - if end_with in n.outputs.name: - return n - - n = GlobalMeanPool2d(n, name='globalmeanpool') - if end_with in n.outputs.name: - return n - # n = DropoutLayer(n, 1-1e-3, True, is_train, name='drop') - # n = DenseLayer(n, 1000, name='output') # equal - n = ReshapeLayer(n, [-1, 1, 1, 1024], name='reshape') - if end_with in n.outputs.name: - return n - n = Conv2d(n, 1000, (1, 1), (1, 1), name='out') - n = FlattenLayer(n, name='flatten') - if end_with == 'out': - return n - - raise Exception("end_with : conv, depth1, depth2 ... depth13, globalmeanpool, out") - - @classmethod - def conv_block(cls, n, n_filter, filter_size=(3, 3), strides=(1, 1), is_train=False, name='conv_block'): - # ref: https://github.com/keras-team/keras/blob/master/keras/applications/mobilenet.py - with tf.compat.v1.variable_scope(name): - n = Conv2d(n, n_filter, filter_size, strides, b_init=None, name='conv') - n = BatchNormLayer(n, decay=0.99, act=tf.nn.relu6, is_train=is_train, name='batchnorm') - return n - - @classmethod - def depthwise_conv_block(cls, n, n_filter, strides=(1, 1), is_train=False, name="depth_block"): - with tf.compat.v1.variable_scope(name): - n = DepthwiseConv2d(n, (3, 3), strides, b_init=None, name='depthwise') - n = BatchNormLayer(n, decay=0.99, act=tf.nn.relu6, is_train=is_train, name='batchnorm1') - n = Conv2d(n, n_filter, (1, 1), (1, 1), b_init=None, name='conv') - n = BatchNormLayer(n, decay=0.99, act=tf.nn.relu6, is_train=is_train, name='batchnorm2') - return n - - def restore_params(self, sess, path='models'): - logging.info("Restore pre-trained parameters") - maybe_download_and_extract( - 'mobilenet.npz', path, 'https://github.com/tensorlayer/pretrained-models/raw/master/models/', - expected_bytes=25600116 - ) # ls -al - params = load_npz(name=os.path.join(path, 'mobilenet.npz')) - assign_weights(sess, params[:len(self.net.all_params)], self.net) - del params diff --git a/tensorlayer/models/squeezenetv1.py b/tensorlayer/models/squeezenetv1.py deleted file mode 100644 index 437749e..0000000 --- a/tensorlayer/models/squeezenetv1.py +++ /dev/null @@ -1,191 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- -"""SqueezeNet for ImageNet.""" - -import os - -import tensorflow as tf - -from tensorlayer import logging - -from tensorlayer.layers import Layer -from tensorlayer.layers import Conv2d -from tensorlayer.layers import InputLayer -from tensorlayer.layers import MaxPool2d -from tensorlayer.layers import ConcatLayer -from tensorlayer.layers import DropoutLayer -from tensorlayer.layers import GlobalMeanPool2d - -from tensorlayer.files import maybe_download_and_extract -from tensorlayer.files import assign_weights -from tensorlayer.files import load_npz - -__all__ = [ - 'SqueezeNetV1', -] - - -class SqueezeNetV1(Layer): - """Pre-trained SqueezeNetV1 model. - - Parameters - ------------ - x : placeholder - shape [None, 224, 224, 3], value range [0, 255]. - end_with : str - The end point of the model [input, fire2, fire3 ... fire9, output]. Default ``output`` i.e. the whole model. - is_train : boolean - Whether the model is used for training i.e. enable dropout. - reuse : boolean - Whether to reuse the model. - - Examples - --------- - Classify ImageNet classes, see `tutorial_models_squeezenetv1.py `__ - - >>> x = tf.placeholder(tf.float32, [None, 224, 224, 3]) - >>> # get the whole model - >>> net = tl.models.SqueezeNetV1(x) - >>> # restore pre-trained parameters - >>> sess = tf.InteractiveSession() - >>> net.restore_params(sess) - >>> # use for inferencing - >>> probs = tf.nn.softmax(net.outputs) - - Extract features and Train a classifier with 100 classes - - >>> x = tf.placeholder(tf.float32, [None, 224, 224, 3]) - >>> # get model without the last layer - >>> cnn = tl.models.SqueezeNetV1(x, end_with='fire9') - >>> # add one more layer - >>> net = Conv2d(cnn, 100, (1, 1), (1, 1), padding='VALID', name='output') - >>> net = GlobalMeanPool2d(net) - >>> # initialize all parameters - >>> sess = tf.InteractiveSession() - >>> tl.layers.initialize_global_variables(sess) - >>> # restore pre-trained parameters - >>> cnn.restore_params(sess) - >>> # train your own classifier (only update the last layer) - >>> train_params = tl.layers.get_variables_with_name('output') - - Reuse model - - >>> x1 = tf.placeholder(tf.float32, [None, 224, 224, 3]) - >>> x2 = tf.placeholder(tf.float32, [None, 224, 224, 3]) - >>> # get model without the last layer - >>> net1 = tl.models.SqueezeNetV1(x1, end_with='fire9') - >>> # reuse the parameters with different input - >>> net2 = tl.models.SqueezeNetV1(x2, end_with='fire9', reuse=True) - >>> # restore pre-trained parameters (as they share parameters, we don’t need to restore net2) - >>> sess = tf.InteractiveSession() - >>> net1.restore_params(sess) - - """ - - def __init__(self, x, end_with='output', is_train=False, reuse=None): - - self.net = self.squeezenetv1(x, end_with, is_train, reuse) - - self.outputs = self.net.outputs - - self.all_params = list(self.net.all_params) - self.all_layers = list(self.net.all_layers) - self.all_drop = dict(self.net.all_drop) - self.print_layers = self.net.print_layers - self.print_params = self.net.print_params - - @classmethod - def squeezenetv1(cls, x, end_with='output', is_train=False, reuse=None): - with tf.compat.v1.variable_scope("squeezenetv1", reuse=reuse): - with tf.compat.v1.variable_scope("input"): - n = InputLayer(x) - # n = Conv2d(n, 96, (7,7),(2,2),tf.nn.relu,'SAME',name='conv1') - n = Conv2d(n, 64, (3, 3), (2, 2), tf.nn.relu, 'SAME', name='conv1') - n = MaxPool2d(n, (3, 3), (2, 2), 'VALID', name='max') - if end_with in n.outputs.name: - return n - - with tf.compat.v1.variable_scope("fire2"): - n = Conv2d(n, 16, (1, 1), (1, 1), tf.nn.relu, 'SAME', name='squeeze1x1') - n1 = Conv2d(n, 64, (1, 1), (1, 1), tf.nn.relu, 'SAME', name='expand1x1') - n2 = Conv2d(n, 64, (3, 3), (1, 1), tf.nn.relu, 'SAME', name='expand3x3') - n = ConcatLayer([n1, n2], -1, name='concat') - if end_with in n.outputs.name: - return n - - with tf.compat.v1.variable_scope("fire3"): - n = Conv2d(n, 16, (1, 1), (1, 1), tf.nn.relu, 'SAME', name='squeeze1x1') - n1 = Conv2d(n, 64, (1, 1), (1, 1), tf.nn.relu, 'SAME', name='expand1x1') - n2 = Conv2d(n, 64, (3, 3), (1, 1), tf.nn.relu, 'SAME', name='expand3x3') - n = ConcatLayer([n1, n2], -1, name='concat') - n = MaxPool2d(n, (3, 3), (2, 2), 'VALID', name='max') - if end_with in n.outputs.name: - return n - - with tf.compat.v1.variable_scope("fire4"): - n = Conv2d(n, 32, (1, 1), (1, 1), tf.nn.relu, 'SAME', name='squeeze1x1') - n1 = Conv2d(n, 128, (1, 1), (1, 1), tf.nn.relu, 'SAME', name='expand1x1') - n2 = Conv2d(n, 128, (3, 3), (1, 1), tf.nn.relu, 'SAME', name='expand3x3') - n = ConcatLayer([n1, n2], -1, name='concat') - if end_with in n.outputs.name: - return n - - with tf.compat.v1.variable_scope("fire5"): - n = Conv2d(n, 32, (1, 1), (1, 1), tf.nn.relu, 'SAME', name='squeeze1x1') - n1 = Conv2d(n, 128, (1, 1), (1, 1), tf.nn.relu, 'SAME', name='expand1x1') - n2 = Conv2d(n, 128, (3, 3), (1, 1), tf.nn.relu, 'SAME', name='expand3x3') - n = ConcatLayer([n1, n2], -1, name='concat') - n = MaxPool2d(n, (3, 3), (2, 2), 'VALID', name='max') - if end_with in n.outputs.name: - return n - - with tf.compat.v1.variable_scope("fire6"): - n = Conv2d(n, 48, (1, 1), (1, 1), tf.nn.relu, 'SAME', name='squeeze1x1') - n1 = Conv2d(n, 192, (1, 1), (1, 1), tf.nn.relu, 'SAME', name='expand1x1') - n2 = Conv2d(n, 192, (3, 3), (1, 1), tf.nn.relu, 'SAME', name='expand3x3') - n = ConcatLayer([n1, n2], -1, name='concat') - if end_with in n.outputs.name: - return n - - with tf.compat.v1.variable_scope("fire7"): - n = Conv2d(n, 48, (1, 1), (1, 1), tf.nn.relu, 'SAME', name='squeeze1x1') - n1 = Conv2d(n, 192, (1, 1), (1, 1), tf.nn.relu, 'SAME', name='expand1x1') - n2 = Conv2d(n, 192, (3, 3), (1, 1), tf.nn.relu, 'SAME', name='expand3x3') - n = ConcatLayer([n1, n2], -1, name='concat') - if end_with in n.outputs.name: - return n - - with tf.compat.v1.variable_scope("fire8"): - n = Conv2d(n, 64, (1, 1), (1, 1), tf.nn.relu, 'SAME', name='squeeze1x1') - n1 = Conv2d(n, 256, (1, 1), (1, 1), tf.nn.relu, 'SAME', name='expand1x1') - n2 = Conv2d(n, 256, (3, 3), (1, 1), tf.nn.relu, 'SAME', name='expand3x3') - n = ConcatLayer([n1, n2], -1, name='concat') - if end_with in n.outputs.name: - return n - - with tf.compat.v1.variable_scope("fire9"): - n = Conv2d(n, 64, (1, 1), (1, 1), tf.nn.relu, 'SAME', name='squeeze1x1') - n1 = Conv2d(n, 256, (1, 1), (1, 1), tf.nn.relu, 'SAME', name='expand1x1') - n2 = Conv2d(n, 256, (3, 3), (1, 1), tf.nn.relu, 'SAME', name='expand3x3') - n = ConcatLayer([n1, n2], -1, name='concat') - if end_with in n.outputs.name: - return n - - with tf.compat.v1.variable_scope("output"): - n = DropoutLayer(n, keep=0.5, is_fix=True, is_train=is_train, name='drop1') - n = Conv2d(n, 1000, (1, 1), (1, 1), padding='VALID', name='conv10') # 13, 13, 1000 - n = GlobalMeanPool2d(n) - if end_with in n.outputs.name: - return n - - raise Exception("end_with : input, fire2, fire3 ... fire9, output") - - def restore_params(self, sess, path='models'): - logging.info("Restore pre-trained parameters") - maybe_download_and_extract( - 'squeezenet.npz', path, 'https://github.com/tensorlayer/pretrained-models/raw/master/models/', - expected_bytes=7405613 - ) # ls -al - params = load_npz(name=os.path.join(path, 'squeezenet.npz')) - assign_weights(sess, params[:len(self.net.all_params)], self.net) - del params diff --git a/tensorlayer/models/vgg.py b/tensorlayer/models/vgg.py deleted file mode 100644 index 23ac521..0000000 --- a/tensorlayer/models/vgg.py +++ /dev/null @@ -1,305 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- -""" -VGG for ImageNet. - -Introduction ----------------- -VGG is a convolutional neural network model proposed by K. Simonyan and A. Zisserman -from the University of Oxford in the paper “Very Deep Convolutional Networks for -Large-Scale Image Recognition” . The model achieves 92.7% top-5 test accuracy in ImageNet, -which is a dataset of over 14 million images belonging to 1000 classes. - -Download Pre-trained Model ----------------------------- -- Model weights in this example - vgg16_weights.npz : http://www.cs.toronto.edu/~frossard/post/vgg16/ -- Model weights in this example - vgg19.npy : https://media.githubusercontent.com/media/tensorlayer/pretrained-models/master/models/ -- Caffe VGG 16 model : https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-md -- Tool to convert the Caffe models to TensorFlow's : https://github.com/ethereon/caffe-tensorflow - -Note ------- -- For simplified CNN layer see "Convolutional layer (Simplified)" -in read the docs website. -- When feeding other images to the model be sure to properly resize or crop them -beforehand. Distorted images might end up being misclassified. One way of safely -feeding images of multiple sizes is by doing center cropping. -""" - -import sys - -import os -import numpy as np -import tensorflow as tf -from tensorflow.python.eager import context - -from tensorlayer import logging - -from tensorlayer.layers import Conv2d -from tensorlayer.layers import Dense -from tensorlayer.layers import Flatten -from tensorlayer.layers import Input -from tensorlayer.layers import MaxPool2d -from tensorlayer.layers import LayerList -from tensorlayer.layers import BatchNorm -from tensorlayer.models import Model - -from tensorlayer.files import maybe_download_and_extract -from tensorlayer.files import assign_weights - - -__all__ = [ - 'VGG', 'vgg16', 'vgg19', -# 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', -# 'vgg19_bn', 'vgg19', -] - -layer_names = [ - ['conv1_1', 'conv1_2'], - 'pool1', - ['conv2_1', 'conv2_2'], - 'pool2', - ['conv3_1', 'conv3_2', 'conv3_3', 'conv3_4'], - 'pool3', - ['conv4_1', 'conv4_2', 'conv4_3', 'conv4_4'], - 'pool4', - ['conv5_1', 'conv5_2', 'conv5_3', 'conv5_4'], - 'pool5', - 'flatten', 'fc1_relu', 'fc2_relu', 'outputs' -] - -cfg = { - 'A': [[64], 'M', [128], 'M', [256, 256], 'M', [512, 512], 'M', [512, 512], 'M', 'F', 'fc1', 'fc2', 'O'], - 'B': [[64, 64], 'M', [128, 128], 'M', [256, 256], 'M', [512, 512], 'M', [512, 512], 'M', 'F', 'fc1', 'fc2', 'O'], - 'D': [[64, 64], 'M', [128, 128], 'M', [256, 256, 256], 'M', [512, 512, 512], 'M', [512, 512, 512], 'M', 'F', 'fc1', 'fc2', 'O'], - 'E': [[64, 64], 'M', [128, 128], 'M', [256, 256, 256, 256], 'M', [512, 512, 512, 512], 'M', [512, 512, 512, 512], 'M', 'F', 'fc1', 'fc2', 'O'], -} - -mapped_cfg = { - 'vgg11': 'A', 'vgg11_bn': 'A', - 'vgg13': 'B', 'vgg13_bn': 'B', - 'vgg16': 'D', 'vgg16_bn': 'D', - 'vgg19': 'E', 'vgg19_bn': 'E' -} - -model_urls = { - 'vgg16': 'http://www.cs.toronto.edu/~frossard/vgg16/', - 'vgg19': 'https://media.githubusercontent.com/media/tensorlayer/pretrained-models/master/models/' -} - -model_saved_name = { - 'vgg16': 'vgg16_weights.npz', - 'vgg19': 'vgg19.npy' -} - -class VGG(Model): - """Pre-trained VGG model. - - Parameters - ------------ - end_with : str - The end point of the model. Default ``fc3_relu`` i.e. the whole model. - - Examples - --------- - Classify ImageNet classes with VGG16, see `tutorial_models_vgg.py `__ - - - >>> # get the whole model - >>> vgg = tl.models.vgg.vgg16() - >>> # restore pre-trained VGG parameters - >>> vgg.restore_weights() - >>> # use for inferencing - >>> probs = tf.nn.softmax(vgg.outputs) - - Extract features with VGG16 and Train a classifier with 100 classes - - >>> # get VGG without the last layer - >>> vgg = tl.models.vgg.vgg16(end_with='fc2_relu') - >>> # add one more layer - >>> net = tl.layers.DenseLayer(vgg, 100, name='out') - >>> # restore pre-trained VGG parameters - >>> vgg.restore_weights() - >>> # train your own classifier (only update the last layer) - >>> train_params = tl.layers.get_variables_with_name('out') - - Reuse model - - >>> # get VGG without the last layer - >>> vgg1 = tl.models.vgg.vgg16(end_with='fc2_relu') - >>> # reuse the parameters of vgg1 with different input - >>> vgg2 = tl.models.vgg.vgg16(end_with='fc2_relu', reuse=True) - >>> # restore pre-trained VGG parameters (as they share parameters, we don’t need to restore vgg2) - >>> vgg1.restore_weights() - - """ - - def __init__(self, layer_type, batch_norm=False, end_with='outputs', name=None): - super(VGG, self).__init__() - self.end_with = end_with - - self.innet = Input([None, 224, 224, 3]) - - config = cfg[mapped_cfg[layer_type]] - self.layers = make_layers(config, batch_norm, end_with) - - def forward(self, inputs): - """ - inputs : tensor - Shape [None, 224, 224, 3], value range [0, 1]. - """ - outputs = inputs * 255.0 - mean = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean') - outputs = outputs - mean - - out = self.innet(outputs) - out = self.layers(out) - return out.outputs - - -def make_layers(config, batch_norm=False, end_with='outputs'): - layer_list = [] - is_end = False - for layer_group_idx, layer_group in enumerate(config): - if isinstance(layer_group, list): - for idx, layer in enumerate(layer_group): - layer_name = layer_names[layer_group_idx][idx] - n_filter = layer - if idx == 0: - if layer_group_idx > 0: - in_channels = config[layer_group_idx - 2][-1] - else: - in_channels = 3 - else: - in_channels = layer - layer_list.append(Conv2d(n_filter=n_filter, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, - padding='SAME', in_channels=in_channels, name=layer_name)) - if batch_norm: - layer_list.append(BatchNorm()) - if layer_name == end_with: - is_end = True - break - else: - layer_name = layer_names[layer_group_idx] - if layer_group == 'M': - layer_list.append(MaxPool2d(filter_size=(2, 2), strides=(2, 2), padding='SAME', name=layer_name)) - elif layer_group == 'O': - layer_list.append(Dense(n_units=1000, in_channels=4096, name=layer_name)) - elif layer_group == 'F': - layer_list.append(Flatten(name='flatten')) - elif layer_group == 'fc1': - layer_list.append(Dense(n_units=4096, act=tf.nn.relu, in_channels=512 * 7 * 7, name=layer_name)) - elif layer_group == 'fc2': - layer_list.append(Dense(n_units=4096, act=tf.nn.relu, in_channels=4096, name=layer_name)) - if layer_name == end_with: - is_end = True - if is_end: - break - return LayerList(layer_list) - - -def restore_model(model, layer_type, sess=None): - logging.info("Restore pre-trained weights") - # download weights - maybe_download_and_extract( - model_saved_name[layer_type], 'models', model_urls[layer_type] - ) - weights = [] - if layer_type == 'vgg16': - npz = np.load(os.path.join('models', model_saved_name[layer_type])) - # get weight list - for val in sorted(npz.items()): - logging.info(" Loading weights %s in %s" % (str(val[1].shape), val[0])) - weights.append(val[1]) - if len(model.weights) == len(weights): - break - elif layer_type == 'vgg19': - npz = np.load(os.path.join('models', model_saved_name[layer_type]), encoding='latin1').item() - # get weight list - for val in sorted(npz.items()): - logging.info(" Loading %s in %s" % (str(val[1][0].shape), val[0])) - logging.info(" Loading %s in %s" % (str(val[1][1].shape), val[0])) - weights.extend(val[1]) - if len(model.weights) == len(weights): - break - # assign weight values - assign_weights(sess, weights, model) - del weights - - -def VGG_static(layer_type, batch_norm=False, end_with='outputs', name=None): - ni = Input([None, 224, 224, 3]) - - config = cfg[mapped_cfg[layer_type]] - layers = make_layers(config, batch_norm, end_with) - - nn = layers(ni) - - M = Model(inputs=ni, outputs=nn, name=name) - return M - - -def vgg16(pretrained=False, end_with='outputs', sess=None): - if context.default_execution_mode == context.EAGER_MODE: - model = VGG(layer_type='vgg16', batch_norm=False, end_with=end_with) - else: - model = VGG_static(layer_type='vgg16', batch_norm=False, end_with=end_with) - if pretrained: - # model.restore_weights() - restore_model(model, layer_type='vgg16', sess=sess) - return model - - -def vgg19(pretrained=False, end_with='outputs', sess=None): - if context.default_execution_mode == context.EAGER_MODE: - model = VGG(layer_type='vgg19', batch_norm=False, end_with=end_with) - else: - model = VGG_static(layer_type='vgg19', batch_norm=False, end_with=end_with) - if pretrained: - # model.restore_weights() - restore_model(model, layer_type='vgg19', sess=sess) - return model - -# models without pretrained parameters -'''def vgg11(pretrained=False, end_with='outputs'): - model = VGG(layer_type='vgg11', batch_norm=False, end_with=end_with) - if pretrained: - model.restore_weights() - return model - - -def vgg11_bn(pretrained=False, end_with='outputs'): - model = VGG(layer_type='vgg11_bn', batch_norm=True, end_with=end_with) - if pretrained: - model.restore_weights() - return model - - -def vgg13(pretrained=False, end_with='outputs'): - model = VGG(layer_type='vgg13', batch_norm=False, end_with=end_with) - if pretrained: - model.restore_weights() - return model - - -def vgg13_bn(pretrained=False, end_with='outputs'): - model = VGG(layer_type='vgg13_bn', batch_norm=True, end_with=end_with) - if pretrained: - model.restore_weights() - return model - - -def vgg16_bn(pretrained=False, end_with='outputs'): - model = VGG(layer_type='vgg16_bn', batch_norm=True, end_with=end_with) - if pretrained: - model.restore_weights() - return model - - -def vgg19_bn(pretrained=False, end_with='outputs'): - model = VGG(layer_type='vgg19_bn', batch_norm=True, end_with=end_with) - if pretrained: - model.restore_weights() - return model -''' - diff --git a/tensorlayer/models/vgg16.py b/tensorlayer/models/vgg16.py deleted file mode 100644 index b83520a..0000000 --- a/tensorlayer/models/vgg16.py +++ /dev/null @@ -1,174 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- -""" -VGG-16 for ImageNet. - -Introduction ----------------- -VGG is a convolutional neural network model proposed by K. Simonyan and A. Zisserman -from the University of Oxford in the paper “Very Deep Convolutional Networks for -Large-Scale Image Recognition” . The model achieves 92.7% top-5 test accuracy in ImageNet, -which is a dataset of over 14 million images belonging to 1000 classes. - -Download Pre-trained Model ----------------------------- -- Model weights in this example - vgg16_weights.npz : http://www.cs.toronto.edu/~frossard/post/vgg16/ -- Caffe VGG 16 model : https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-md -- Tool to convert the Caffe models to TensorFlow's : https://github.com/ethereon/caffe-tensorflow - -Note ------- -- For simplified CNN layer see "Convolutional layer (Simplified)" -in read the docs website. -- When feeding other images to the model be sure to properly resize or crop them -beforehand. Distorted images might end up being misclassified. One way of safely -feeding images of multiple sizes is by doing center cropping. -""" - -import os -import numpy as np -import tensorflow as tf - -from tensorlayer import logging - -from tensorlayer.layers import Conv2d -from tensorlayer.layers import Dense -from tensorlayer.layers import Flatten -from tensorlayer.layers import Input -from tensorlayer.layers import MaxPool2d -from tensorlayer.layers import LayerList -from tensorlayer.models import Model - -from tensorlayer.files import maybe_download_and_extract -from tensorlayer.files import assign_weights - -__all__ = [ - 'VGG16', -] - - -class VGG16(Model): - """Pre-trained VGG-16 model. - - Parameters - ------------ - end_with : str - The end point of the model. Default ``fc3_relu`` i.e. the whole model. - - Examples - --------- - Classify ImageNet classes with VGG16, see `tutorial_models_vgg16.py `__ - - >>> # get the whole model - >>> vgg = tl.models.VGG16() - >>> # restore pre-trained VGG parameters - >>> vgg.restore_weights() - >>> # use for inferencing - >>> probs = tf.nn.softmax(vgg.outputs) - - Extract features with VGG16 and Train a classifier with 100 classes - - >>> # get VGG without the last layer - >>> vgg = tl.models.VGG16(end_with='fc2_relu') - >>> # add one more layer - >>> net = tl.layers.DenseLayer(vgg, 100, name='out') - >>> # restore pre-trained VGG parameters - >>> vgg.restore_weights() - >>> # train your own classifier (only update the last layer) - >>> train_params = tl.layers.get_variables_with_name('out') - - Reuse model - - >>> # get VGG without the last layer - >>> vgg1 = tl.models.VGG16(end_with='fc2_relu') - >>> # reuse the parameters of vgg1 with different input - >>> vgg2 = tl.models.VGG16(end_with='fc2_relu', reuse=True) - >>> # restore pre-trained VGG parameters (as they share parameters, we don’t need to restore vgg2) - >>> vgg1.restore_weights() - - """ - - def __init__(self, end_with='outputs', name=None): - super(VGG16, self).__init__() - self.end_with = end_with - - self.layer_names = ['conv1_1', 'conv1_2', 'pool1', 'conv2_1', 'conv2_2', 'pool2', - 'conv3_1', 'conv3_2', 'conv3_3', 'pool3', 'conv4_1', 'conv4_2', 'conv4_3', 'pool4', - 'conv5_1', 'conv5_2', 'conv5_3', 'pool5', - 'flatten', 'fc1_relu', 'fc2_relu', 'outputs'] - self.innet = Input([None, 224, 224, 3]) - self.layers = LayerList([ - # conv1 - Conv2d(n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', in_channels=3, name='conv1_1'), - Conv2d(n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', in_channels=64, name='conv1_2'), - MaxPool2d(filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool1'), - - # conv2 - Conv2d(n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', in_channels=64, name='conv2_1'), - Conv2d(n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', in_channels=128, name='conv2_2'), - MaxPool2d(filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool2'), - - # conv3 - Conv2d(n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', in_channels=128, name='conv3_1'), - Conv2d(n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', in_channels=256, name='conv3_2'), - Conv2d(n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', in_channels=256, name='conv3_3'), - MaxPool2d(filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool3'), - - # conv4 - Conv2d(n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', in_channels=256, name='conv4_1'), - Conv2d(n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', in_channels=512, name='conv4_2'), - Conv2d(n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', in_channels=512, name='conv4_3'), - MaxPool2d(filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool4'), - - # conv5 - Conv2d(n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', in_channels=512, name='conv5_1'), - Conv2d(n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', in_channels=512, name='conv5_2'), - Conv2d(n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', in_channels=512, name='conv5_3'), - MaxPool2d(filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool5'), - Flatten(name='flatten'), - Dense(n_units=4096, act=tf.nn.relu, in_channels=512*7*7, name='fc1_relu'), - Dense(n_units=4096, act=tf.nn.relu, in_channels=4096, name='fc2_relu'), - Dense(n_units=1000, in_channels=4096, name='outputs'), - ][:self.layer_names.index(self.end_with) + 1]) - - def forward(self, inputs): - """ - inputs : tensor - Shape [None, 224, 224, 3], value range [0, 1]. - """ - outputs = inputs * 255.0 - mean = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean') - outputs = outputs - mean - # outputs = inputs - - out = self.innet(outputs) - out = self.layers(out) - # for layer in self.layers: - # outputs = layer(outputs) - # if layer.name == self.end_with: - # break - return out.outputs - - def restore_params(self, **kwargs): - raise Exception("please change restore_params --> restore_weights") - - def restore_weights(self, sess=None): - logging.info("Restore pre-trained weights") - ## download weights - maybe_download_and_extract( - 'vgg16_weights.npz', 'models', 'http://www.cs.toronto.edu/~frossard/vgg16/', expected_bytes=553436134 - ) - npz = np.load(os.path.join('models', 'vgg16_weights.npz')) - ## get weight list - weights = [] - for val in sorted(npz.items()): - logging.info(" Loading weights %s in %s" % (str(val[1].shape), val[0])) - weights.append(val[1]) - if len(self.weights) == len(weights): - break - ## assign weight values - print(self.weights) - assign_weights(sess, weights, self) - del weights - - diff --git a/tensorlayer/models/vgg19.py b/tensorlayer/models/vgg19.py deleted file mode 100644 index 2e6aaa8..0000000 --- a/tensorlayer/models/vgg19.py +++ /dev/null @@ -1,175 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- -""" -VGG-19 for ImageNet. - -Introduction ----------------- -VGG is a convolutional neural network model proposed by K. Simonyan and A. Zisserman -from the University of Oxford in the paper “Very Deep Convolutional Networks for -Large-Scale Image Recognition” . The model achieves 92.7% top-5 test accuracy in ImageNet, -which is a dataset of over 14 million images belonging to 1000 classes. - -Download Pre-trained Model ----------------------------- -- Model weights in this example - vgg19.npy : https://media.githubusercontent.com/media/tensorlayer/pretrained-models/master/models/ - -Note ------- -- For simplified CNN layer see "Convolutional layer (Simplified)" -in read the docs website. -- When feeding other images to the model be sure to properly resize or crop them -beforehand. Distorted images might end up being misclassified. One way of safely -feeding images of multiple sizes is by doing center cropping. -""" - -import os -import numpy as np -import tensorflow as tf - -from tensorlayer import logging - -from tensorlayer.layers import Conv2d -from tensorlayer.layers import Dense -from tensorlayer.layers import Flatten -from tensorlayer.layers import Input -from tensorlayer.layers import MaxPool2d -from tensorlayer.layers import LayerList -from tensorlayer.models import Model - -from tensorlayer.files import maybe_download_and_extract -from tensorlayer.files import assign_weights - -__all__ = [ - 'VGG19', -] - - -class VGG19(Model): - """Pre-trained VGG-19 model. - - Parameters - ------------ - end_with : str - The end point of the model. Default ``fc3_relu`` i.e. the whole model. - - Examples - --------- - Classify ImageNet classes with VGG19, see `tutorial_models_vgg19.py `__ - - >>> # get the whole model - >>> vgg = tl.models.VGG19() - >>> # restore pre-trained VGG parameters - >>> vgg.restore_weights() - >>> # use for inferencing - >>> probs = tf.nn.softmax(vgg.outputs) - - Extract features with VGG19 and Train a classifier with 100 classes - - >>> # get VGG without the last layer - >>> vgg = tl.models.VGG19(end_with='fc2_relu') - >>> # add one more layer - >>> net = tl.layers.DenseLayer(vgg, 100, name='out') - >>> # restore pre-trained VGG parameters - >>> vgg.restore_weights() - >>> # train your own classifier (only update the last layer) - >>> train_params = tl.layers.get_variables_with_name('out') - - Reuse model - - >>> # get VGG without the last layer - >>> vgg1 = tl.models.VGG19(end_with='fc2_relu') - >>> # reuse the parameters of vgg1 with different input - >>> vgg2 = tl.models.VGG19(end_with='fc2_relu', reuse=True) - >>> # restore pre-trained VGG parameters (as they share parameters, we don’t need to restore vgg2) - >>> vgg1.restore_weights() - - """ - - def __init__(self, end_with='outputs', name=None): - super(VGG19, self).__init__() - self.end_with = end_with - - self.layer_names = ['conv1_1', 'conv1_2', 'pool1', 'conv2_1', 'conv2_2', 'pool2', - 'conv3_1', 'conv3_2', 'conv3_3', 'conv3_4', 'pool3', 'conv4_1', 'conv4_2', 'conv4_3', 'conv4_4', 'pool4', - 'conv5_1', 'conv5_2', 'conv5_3', 'conv5_4', 'pool5', - 'flatten', 'fc1_relu', 'fc2_relu', 'outputs'] - self.innet = Input([None, 224, 224, 3]) - self.layers = LayerList([ - # conv1 - Conv2d(n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', in_channels=3, name='conv1_1'), - Conv2d(n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', in_channels=64, name='conv1_2'), - MaxPool2d(filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool1'), - - # conv2 - Conv2d(n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', in_channels=64, name='conv2_1'), - Conv2d(n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', in_channels=128, name='conv2_2'), - MaxPool2d(filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool2'), - - # conv3 - Conv2d(n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', in_channels=128, name='conv3_1'), - Conv2d(n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', in_channels=256, name='conv3_2'), - Conv2d(n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', in_channels=256, name='conv3_3'), - Conv2d(n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', in_channels=256, name='conv3_4'), - MaxPool2d(filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool3'), - - # conv4 - Conv2d(n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', in_channels=256, name='conv4_1'), - Conv2d(n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', in_channels=512, name='conv4_2'), - Conv2d(n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', in_channels=512, name='conv4_3'), - Conv2d(n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', in_channels=512, name='conv4_4'), - MaxPool2d(filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool4'), - - # conv5 - Conv2d(n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', in_channels=512, name='conv5_1'), - Conv2d(n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', in_channels=512, name='conv5_2'), - Conv2d(n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', in_channels=512, name='conv5_3'), - Conv2d(n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', in_channels=512, name='conv5_4'), - MaxPool2d(filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool5'), - Flatten(name='flatten'), - Dense(n_units=4096, act=tf.nn.relu, in_channels=512*7*7, name='fc1_relu'), - Dense(n_units=4096, act=tf.nn.relu, in_channels=4096, name='fc2_relu'), - Dense(n_units=1000, in_channels=4096, name='outputs'), - ][:self.layer_names.index(self.end_with) + 1]) - - def forward(self, inputs): - """ - inputs : tensor - Shape [None, 224, 224, 3], value range [0, 1]. - """ - outputs = inputs * 255.0 - mean = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean') - outputs = outputs - mean - - out = self.innet(outputs) - out = self.layers(out) - return out.outputs - - def restore_params(self, **kwargs): - raise Exception("please change restore_params --> restore_weights") - - def restore_weights(self, sess=None): - logging.info("Restore pre-trained weights") - ## download weights - maybe_download_and_extract( - 'vgg19.npy', 'models', - 'https://media.githubusercontent.com/media/tensorlayer/pretrained-models/master/models/', - expected_bytes=574670860 - ) - vgg19_npy_path = os.path.join('models', 'vgg19.npy') - npz = np.load(vgg19_npy_path, encoding='latin1').item() - - weights = [] - for val in sorted(npz.items()): - W = np.asarray(val[1][0]) - b = np.asarray(val[1][1]) - print(" Loading %s: %s, %s" % (val[0], W.shape, b.shape)) - weights.extend([W, b]) - if len(self.all_params) == len(weights): - break - ## assign weight values - print(self.weights) - assign_weights(sess, weights, self) - del weights - - diff --git a/tensorlayer/nlp.py b/tensorlayer/nlp.py deleted file mode 100644 index c024a24..0000000 --- a/tensorlayer/nlp.py +++ /dev/null @@ -1,1140 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import collections -from collections import Counter -import os -import random -import re -import subprocess -import tempfile -import warnings - -from six.moves import urllib -from six.moves import xrange - -import numpy as np - -import tensorflow as tf -from tensorflow.python.platform import gfile - -import tensorlayer as tl -from tensorlayer.lazy_imports import LazyImport - -nltk = LazyImport("nltk") - -__all__ = [ - 'generate_skip_gram_batch', - 'sample', - 'sample_top', - 'SimpleVocabulary', - 'Vocabulary', - 'process_sentence', - 'create_vocab', - 'simple_read_words', - 'read_words', - 'read_analogies_file', - 'build_vocab', - 'build_reverse_dictionary', - 'build_words_dataset', - 'words_to_word_ids', - 'word_ids_to_words', - 'save_vocab', - 'basic_tokenizer', - 'create_vocabulary', - 'initialize_vocabulary', - 'sentence_to_token_ids', - 'data_to_token_ids', - 'moses_multi_bleu', -] - - -def generate_skip_gram_batch(data, batch_size, num_skips, skip_window, data_index=0): - """Generate a training batch for the Skip-Gram model. - - See `Word2Vec example `__. - - Parameters - ---------- - data : list of data - To present context, usually a list of integers. - batch_size : int - Batch size to return. - num_skips : int - How many times to reuse an input to generate a label. - skip_window : int - How many words to consider left and right. - data_index : int - Index of the context location. This code use `data_index` to instead of yield like ``tl.iterate``. - - Returns - ------- - batch : list of data - Inputs. - labels : list of data - Labels - data_index : int - Index of the context location. - - Examples - -------- - Setting num_skips=2, skip_window=1, use the right and left words. - In the same way, num_skips=4, skip_window=2 means use the nearby 4 words. - - >>> data = [1,2,3,4,5,6,7,8,9,10,11] - >>> batch, labels, data_index = tl.nlp.generate_skip_gram_batch(data=data, batch_size=8, num_skips=2, skip_window=1, data_index=0) - >>> print(batch) - [2 2 3 3 4 4 5 5] - >>> print(labels) - [[3] - [1] - [4] - [2] - [5] - [3] - [4] - [6]] - - """ - # global data_index # you can put data_index outside the function, then - # modify the global data_index in the function without return it. - # note: without using yield, this code use data_index to instead. - - if batch_size % num_skips != 0: - raise Exception("batch_size should be able to be divided by num_skips.") - if num_skips > 2 * skip_window: - raise Exception("num_skips <= 2 * skip_window") - batch = np.ndarray(shape=(batch_size), dtype=np.int32) - labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32) - span = 2 * skip_window + 1 # [ skip_window target skip_window ] - buffer = collections.deque(maxlen=span) - for _ in range(span): - buffer.append(data[data_index]) - data_index = (data_index + 1) % len(data) - for i in range(batch_size // num_skips): - target = skip_window # target label at the center of the buffer - targets_to_avoid = [skip_window] - for j in range(num_skips): - while target in targets_to_avoid: - target = random.randint(0, span - 1) - targets_to_avoid.append(target) - batch[i * num_skips + j] = buffer[skip_window] - labels[i * num_skips + j, 0] = buffer[target] - buffer.append(data[data_index]) - data_index = (data_index + 1) % len(data) - return batch, labels, data_index - - -def sample(a=None, temperature=1.0): - """Sample an index from a probability array. - - Parameters - ---------- - a : list of float - List of probabilities. - temperature : float or None - The higher the more uniform. When a = [0.1, 0.2, 0.7], - - temperature = 0.7, the distribution will be sharpen [0.05048273, 0.13588945, 0.81362782] - - temperature = 1.0, the distribution will be the same [0.1, 0.2, 0.7] - - temperature = 1.5, the distribution will be filtered [0.16008435, 0.25411807, 0.58579758] - - If None, it will be ``np.argmax(a)`` - - Notes - ------ - - No matter what is the temperature and input list, the sum of all probabilities will be one. Even if input list = [1, 100, 200], the sum of all probabilities will still be one. - - For large vocabulary size, choice a higher temperature or ``tl.nlp.sample_top`` to avoid error. - - """ - if a is None: - raise Exception("a : list of float") - b = np.copy(a) - try: - if temperature == 1: - return np.argmax(np.random.multinomial(1, a, 1)) - if temperature is None: - return np.argmax(a) - else: - a = np.log(a) / temperature - a = np.exp(a) / np.sum(np.exp(a)) - return np.argmax(np.random.multinomial(1, a, 1)) - except Exception: - # np.set_printoptions(threshold=np.nan) - # tl.logging.info(a) - # tl.logging.info(np.sum(a)) - # tl.logging.info(np.max(a)) - # tl.logging.info(np.min(a)) - # exit() - message = "For large vocabulary_size, choice a higher temperature\ - to avoid log error. Hint : use ``sample_top``. " - - warnings.warn(message, Warning) - # tl.logging.info(a) - # tl.logging.info(b) - return np.argmax(np.random.multinomial(1, b, 1)) - - -def sample_top(a=None, top_k=10): - """Sample from ``top_k`` probabilities. - - Parameters - ---------- - a : list of float - List of probabilities. - top_k : int - Number of candidates to be considered. - - """ - if a is None: - a = [] - - idx = np.argpartition(a, -top_k)[-top_k:] - probs = a[idx] - # tl.logging.info("new %f" % probs) - probs = probs / np.sum(probs) - choice = np.random.choice(idx, p=probs) - return choice - # old implementation - # a = np.array(a) - # idx = np.argsort(a)[::-1] - # idx = idx[:top_k] - # # a = a[idx] - # probs = a[idx] - # tl.logging.info("prev %f" % probs) - # # probs = probs / np.sum(probs) - # # choice = np.random.choice(idx, p=probs) - # # return choice - - -# Vector representations of words (Advanced) UNDOCUMENT -class SimpleVocabulary(object): - """Simple vocabulary wrapper, see create_vocab(). - - Parameters - ------------ - vocab : dictionary - A dictionary that maps word to ID. - unk_id : int - The ID for 'unknown' word. - - """ - - def __init__(self, vocab, unk_id): - """Initialize the vocabulary.""" - self._vocab = vocab - self._unk_id = unk_id - - def word_to_id(self, word): - """Returns the integer id of a word string.""" - if word in self._vocab: - return self._vocab[word] - else: - return self._unk_id - - -class Vocabulary(object): - """Create Vocabulary class from a given vocabulary and its id-word, word-id convert. - See create_vocab() and ``tutorial_tfrecord3.py``. - - Parameters - ----------- - vocab_file : str - The file contains the vocabulary (can be created via ``tl.nlp.create_vocab``), where the words are the first whitespace-separated token on each line (other tokens are ignored) and the word ids are the corresponding line numbers. - start_word : str - Special word denoting sentence start. - end_word : str - Special word denoting sentence end. - unk_word : str - Special word denoting unknown words. - - Attributes - ------------ - vocab : dictionary - A dictionary that maps word to ID. - reverse_vocab : list of int - A list that maps ID to word. - start_id : int - For start ID. - end_id : int - For end ID. - unk_id : int - For unknown ID. - pad_id : int - For Padding ID. - - Examples - ------------- - The vocab file looks like follow, includes `start_word` , `end_word` ... - - >>> a 969108 - >>> 586368 - >>> 586368 - >>> . 440479 - >>> on 213612 - >>> of 202290 - >>> the 196219 - >>> in 182598 - >>> with 152984 - >>> and 139109 - >>> is 97322 - - """ - - def __init__(self, vocab_file, start_word="", end_word="", unk_word="", pad_word=""): - if not tf.gfile.Exists(vocab_file): - tl.logging.fatal("Vocab file %s not found." % vocab_file) - tl.logging.info("Initializing vocabulary from file: %s" % vocab_file) - - with tf.gfile.GFile(vocab_file, mode="r") as f: - reverse_vocab = list(f.readlines()) - reverse_vocab = [line.split()[0] for line in reverse_vocab] - # assert start_word in reverse_vocab - # assert end_word in reverse_vocab - if start_word not in reverse_vocab: # haodong - reverse_vocab.append(start_word) - if end_word not in reverse_vocab: - reverse_vocab.append(end_word) - if unk_word not in reverse_vocab: - reverse_vocab.append(unk_word) - if pad_word not in reverse_vocab: - reverse_vocab.append(pad_word) - - vocab = dict([(x, y) for (y, x) in enumerate(reverse_vocab)]) - - tl.logging.info("Vocabulary from %s : %s %s %s" % (vocab_file, start_word, end_word, unk_word)) - tl.logging.info(" vocabulary with %d words (includes start_word, end_word, unk_word)" % len(vocab)) - # tl.logging.info(" vocabulary with %d words" % len(vocab)) - - self.vocab = vocab # vocab[word] = id - self.reverse_vocab = reverse_vocab # reverse_vocab[id] = word - - # Save special word ids. - self.start_id = vocab[start_word] - self.end_id = vocab[end_word] - self.unk_id = vocab[unk_word] - self.pad_id = vocab[pad_word] - tl.logging.info(" start_id: %d" % self.start_id) - tl.logging.info(" end_id : %d" % self.end_id) - tl.logging.info(" unk_id : %d" % self.unk_id) - tl.logging.info(" pad_id : %d" % self.pad_id) - - def word_to_id(self, word): - """Returns the integer word id of a word string.""" - if word in self.vocab: - return self.vocab[word] - else: - return self.unk_id - - def id_to_word(self, word_id): - """Returns the word string of an integer word id.""" - if word_id >= len(self.reverse_vocab): - return self.reverse_vocab[self.unk_id] - else: - return self.reverse_vocab[word_id] - - -def process_sentence(sentence, start_word="", end_word=""): - """Seperate a sentence string into a list of string words, add start_word and end_word, - see ``create_vocab()`` and ``tutorial_tfrecord3.py``. - - Parameters - ---------- - sentence : str - A sentence. - start_word : str or None - The start word. If None, no start word will be appended. - end_word : str or None - The end word. If None, no end word will be appended. - - Returns - --------- - list of str - A list of strings that separated into words. - - Examples - ----------- - >>> c = "how are you?" - >>> c = tl.nlp.process_sentence(c) - >>> print(c) - ['', 'how', 'are', 'you', '?', ''] - - Notes - ------- - - You have to install the following package. - - `Installing NLTK `__ - - `Installing NLTK data `__ - - """ - if start_word is not None: - process_sentence = [start_word] - else: - process_sentence = [] - process_sentence.extend(nltk.tokenize.word_tokenize(sentence.lower())) - - if end_word is not None: - process_sentence.append(end_word) - return process_sentence - - -def create_vocab(sentences, word_counts_output_file, min_word_count=1): - """Creates the vocabulary of word to word_id. - - See ``tutorial_tfrecord3.py``. - - The vocabulary is saved to disk in a text file of word counts. The id of each - word in the file is its corresponding 0-based line number. - - Parameters - ------------ - sentences : list of list of str - All sentences for creating the vocabulary. - word_counts_output_file : str - The file name. - min_word_count : int - Minimum number of occurrences for a word. - - Returns - -------- - :class:`SimpleVocabulary` - The simple vocabulary object, see :class:`Vocabulary` for more. - - Examples - -------- - Pre-process sentences - - >>> captions = ["one two , three", "four five five"] - >>> processed_capts = [] - >>> for c in captions: - >>> c = tl.nlp.process_sentence(c, start_word="", end_word="") - >>> processed_capts.append(c) - >>> print(processed_capts) - ...[['', 'one', 'two', ',', 'three', ''], ['', 'four', 'five', 'five', '']] - - Create vocabulary - - >>> tl.nlp.create_vocab(processed_capts, word_counts_output_file='vocab.txt', min_word_count=1) - Creating vocabulary. - Total words: 8 - Words in vocabulary: 8 - Wrote vocabulary file: vocab.txt - - Get vocabulary object - - >>> vocab = tl.nlp.Vocabulary('vocab.txt', start_word="", end_word="", unk_word="") - INFO:tensorflow:Initializing vocabulary from file: vocab.txt - [TL] Vocabulary from vocab.txt : - vocabulary with 10 words (includes start_word, end_word, unk_word) - start_id: 2 - end_id: 3 - unk_id: 9 - pad_id: 0 - - """ - tl.logging.info("Creating vocabulary.") - - counter = Counter() - - for c in sentences: - counter.update(c) - # tl.logging.info('c',c) - tl.logging.info(" Total words: %d" % len(counter)) - - # Filter uncommon words and sort by descending count. - word_counts = [x for x in counter.items() if x[1] >= min_word_count] - word_counts.sort(key=lambda x: x[1], reverse=True) - word_counts = [("", 0)] + word_counts # 1st id should be reserved for padding - # tl.logging.info(word_counts) - tl.logging.info(" Words in vocabulary: %d" % len(word_counts)) - - # Write out the word counts file. - with tf.gfile.FastGFile(word_counts_output_file, "w") as f: - f.write("\n".join(["%s %d" % (w, c) for w, c in word_counts])) - tl.logging.info(" Wrote vocabulary file: %s" % word_counts_output_file) - - # Create the vocabulary dictionary. - reverse_vocab = [x[0] for x in word_counts] - unk_id = len(reverse_vocab) - vocab_dict = dict([(x, y) for (y, x) in enumerate(reverse_vocab)]) - vocab = SimpleVocabulary(vocab_dict, unk_id) - - return vocab - - -# Vector representations of words -def simple_read_words(filename="nietzsche.txt"): - """Read context from file without any preprocessing. - - Parameters - ---------- - filename : str - A file path (like .txt file) - - Returns - -------- - str - The context in a string. - - """ - with open(filename, "r") as f: - words = f.read() - return words - - -def read_words(filename="nietzsche.txt", replace=None): - """Read list format context from a file. - - For customized read_words method, see ``tutorial_generate_text.py``. - - Parameters - ---------- - filename : str - a file path. - replace : list of str - replace original string by target string. - - Returns - ------- - list of str - The context in a list (split using space). - """ - if replace is None: - replace = ['\n', ''] - - with tf.gfile.GFile(filename, "r") as f: - try: # python 3.4 or older - context_list = f.read().replace(*replace).split() - except Exception: # python 3.5 - f.seek(0) - replace = [x.encode('utf-8') for x in replace] - context_list = f.read().replace(*replace).split() - return context_list - - -def read_analogies_file(eval_file='questions-words.txt', word2id=None): - """Reads through an analogy question file, return its id format. - - Parameters - ---------- - eval_file : str - The file name. - word2id : dictionary - a dictionary that maps word to ID. - - Returns - -------- - numpy.array - A ``[n_examples, 4]`` numpy array containing the analogy question's word IDs. - - Examples - --------- - The file should be in this format - - >>> : capital-common-countries - >>> Athens Greece Baghdad Iraq - >>> Athens Greece Bangkok Thailand - >>> Athens Greece Beijing China - >>> Athens Greece Berlin Germany - >>> Athens Greece Bern Switzerland - >>> Athens Greece Cairo Egypt - >>> Athens Greece Canberra Australia - >>> Athens Greece Hanoi Vietnam - >>> Athens Greece Havana Cuba - - Get the tokenized analogy question data - - >>> words = tl.files.load_matt_mahoney_text8_dataset() - >>> data, count, dictionary, reverse_dictionary = tl.nlp.build_words_dataset(words, vocabulary_size, True) - >>> analogy_questions = tl.nlp.read_analogies_file(eval_file='questions-words.txt', word2id=dictionary) - >>> print(analogy_questions) - [[ 3068 1248 7161 1581] - [ 3068 1248 28683 5642] - [ 3068 1248 3878 486] - ..., - [ 1216 4309 19982 25506] - [ 1216 4309 3194 8650] - [ 1216 4309 140 312]] - - """ - if word2id is None: - word2id = {} - - questions = [] - questions_skipped = 0 - - with open(eval_file, "rb") as analogy_f: - for line in analogy_f: - if line.startswith(b":"): # Skip comments. - continue - words = line.strip().lower().split(b" ") # lowercase - ids = [word2id.get(w.strip().decode()) for w in words] - if None in ids or len(ids) != 4: - questions_skipped += 1 - else: - questions.append(np.array(ids)) - tl.logging.info("Eval analogy file: %s" % eval_file) - tl.logging.info("Questions: %d", len(questions)) - tl.logging.info("Skipped: %d", questions_skipped) - analogy_questions = np.array(questions, dtype=np.int32) - return analogy_questions - - -def build_vocab(data): - """Build vocabulary. - - Given the context in list format. - Return the vocabulary, which is a dictionary for word to id. - e.g. {'campbell': 2587, 'atlantic': 2247, 'aoun': 6746 .... } - - Parameters - ---------- - data : list of str - The context in list format - - Returns - -------- - dictionary - that maps word to unique ID. e.g. {'campbell': 2587, 'atlantic': 2247, 'aoun': 6746 .... } - - References - --------------- - - `tensorflow.models.rnn.ptb.reader `_ - - Examples - -------- - >>> data_path = os.getcwd() + '/simple-examples/data' - >>> train_path = os.path.join(data_path, "ptb.train.txt") - >>> word_to_id = build_vocab(read_txt_words(train_path)) - - """ - # data = _read_words(filename) - counter = collections.Counter(data) - # tl.logging.info('counter %s' % counter) # dictionary for the occurrence number of each word, e.g. 'banknote': 1, 'photography': 1, 'kia': 1 - count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0])) - # tl.logging.info('count_pairs %s' % count_pairs) # convert dictionary to list of tuple, e.g. ('ssangyong', 1), ('swapo', 1), ('wachter', 1) - words, _ = list(zip(*count_pairs)) - word_to_id = dict(zip(words, range(len(words)))) - # tl.logging.info(words) # list of words - # tl.logging.info(word_to_id) # dictionary for word to id, e.g. 'campbell': 2587, 'atlantic': 2247, 'aoun': 6746 - return word_to_id - - -def build_reverse_dictionary(word_to_id): - """Given a dictionary that maps word to integer id. - Returns a reverse dictionary that maps a id to word. - - Parameters - ---------- - word_to_id : dictionary - that maps word to ID. - - Returns - -------- - dictionary - A dictionary that maps IDs to words. - - """ - reverse_dictionary = dict(zip(word_to_id.values(), word_to_id.keys())) - return reverse_dictionary - - -def build_words_dataset(words=None, vocabulary_size=50000, printable=True, unk_key='UNK'): - """Build the words dictionary and replace rare words with 'UNK' token. - The most common word has the smallest integer id. - - Parameters - ---------- - words : list of str or byte - The context in list format. You may need to do preprocessing on the words, such as lower case, remove marks etc. - vocabulary_size : int - The maximum vocabulary size, limiting the vocabulary size. Then the script replaces rare words with 'UNK' token. - printable : boolean - Whether to print the read vocabulary size of the given words. - unk_key : str - Represent the unknown words. - - Returns - -------- - data : list of int - The context in a list of ID. - count : list of tuple and list - Pair words and IDs. - - count[0] is a list : the number of rare words - - count[1:] are tuples : the number of occurrence of each word - - e.g. [['UNK', 418391], (b'the', 1061396), (b'of', 593677), (b'and', 416629), (b'one', 411764)] - dictionary : dictionary - It is `word_to_id` that maps word to ID. - reverse_dictionary : a dictionary - It is `id_to_word` that maps ID to word. - - Examples - -------- - >>> words = tl.files.load_matt_mahoney_text8_dataset() - >>> vocabulary_size = 50000 - >>> data, count, dictionary, reverse_dictionary = tl.nlp.build_words_dataset(words, vocabulary_size) - - References - ----------------- - - `tensorflow/examples/tutorials/word2vec/word2vec_basic.py `__ - - """ - if words is None: - raise Exception("words : list of str or byte") - - count = [[unk_key, -1]] - count.extend(collections.Counter(words).most_common(vocabulary_size - 1)) - dictionary = dict() - for word, _ in count: - dictionary[word] = len(dictionary) - data = list() - unk_count = 0 - for word in words: - if word in dictionary: - index = dictionary[word] - else: - index = 0 # dictionary['UNK'] - unk_count += 1 - data.append(index) - count[0][1] = unk_count - reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys())) - if printable: - tl.logging.info('Real vocabulary size %d' % len(collections.Counter(words).keys())) - tl.logging.info('Limited vocabulary size {}'.format(vocabulary_size)) - if len(collections.Counter(words).keys()) < vocabulary_size: - raise Exception( - "len(collections.Counter(words).keys()) >= vocabulary_size , the limited vocabulary_size must be less than or equal to the read vocabulary_size" - ) - return data, count, dictionary, reverse_dictionary - - -def words_to_word_ids(data=None, word_to_id=None, unk_key='UNK'): - """Convert a list of string (words) to IDs. - - Parameters - ---------- - data : list of string or byte - The context in list format - word_to_id : a dictionary - that maps word to ID. - unk_key : str - Represent the unknown words. - - Returns - -------- - list of int - A list of IDs to represent the context. - - Examples - -------- - >>> words = tl.files.load_matt_mahoney_text8_dataset() - >>> vocabulary_size = 50000 - >>> data, count, dictionary, reverse_dictionary = tl.nlp.build_words_dataset(words, vocabulary_size, True) - >>> context = [b'hello', b'how', b'are', b'you'] - >>> ids = tl.nlp.words_to_word_ids(words, dictionary) - >>> context = tl.nlp.word_ids_to_words(ids, reverse_dictionary) - >>> print(ids) - [6434, 311, 26, 207] - >>> print(context) - [b'hello', b'how', b'are', b'you'] - - References - --------------- - - `tensorflow.models.rnn.ptb.reader `__ - - """ - if data is None: - raise Exception("data : list of string or byte") - if word_to_id is None: - raise Exception("word_to_id : a dictionary") - # if isinstance(data[0], six.string_types): - # tl.logging.info(type(data[0])) - # # exit() - # tl.logging.info(data[0]) - # tl.logging.info(word_to_id) - # return [word_to_id[str(word)] for word in data] - # else: - - word_ids = [] - for word in data: - if word_to_id.get(word) is not None: - word_ids.append(word_to_id[word]) - else: - word_ids.append(word_to_id[unk_key]) - return word_ids - # return [word_to_id[word] for word in data] # this one - - # if isinstance(data[0], str): - # # tl.logging.info('is a string object') - # return [word_to_id[word] for word in data] - # else:#if isinstance(s, bytes): - # # tl.logging.info('is a unicode object') - # # tl.logging.info(data[0]) - # return [word_to_id[str(word)] f - - -def word_ids_to_words(data, id_to_word): - """Convert a list of integer to strings (words). - - Parameters - ---------- - data : list of int - The context in list format. - id_to_word : dictionary - a dictionary that maps ID to word. - - Returns - -------- - list of str - A list of string or byte to represent the context. - - Examples - --------- - >>> see ``tl.nlp.words_to_word_ids`` - - """ - return [id_to_word[i] for i in data] - - -def save_vocab(count=None, name='vocab.txt'): - """Save the vocabulary to a file so the model can be reloaded. - - Parameters - ---------- - count : a list of tuple and list - count[0] is a list : the number of rare words, - count[1:] are tuples : the number of occurrence of each word, - e.g. [['UNK', 418391], (b'the', 1061396), (b'of', 593677), (b'and', 416629), (b'one', 411764)] - - Examples - --------- - >>> words = tl.files.load_matt_mahoney_text8_dataset() - >>> vocabulary_size = 50000 - >>> data, count, dictionary, reverse_dictionary = tl.nlp.build_words_dataset(words, vocabulary_size, True) - >>> tl.nlp.save_vocab(count, name='vocab_text8.txt') - >>> vocab_text8.txt - UNK 418391 - the 1061396 - of 593677 - and 416629 - one 411764 - in 372201 - a 325873 - to 316376 - - """ - if count is None: - count = [] - - pwd = os.getcwd() - vocabulary_size = len(count) - with open(os.path.join(pwd, name), "w") as f: - for i in xrange(vocabulary_size): - f.write("%s %d\n" % (tf.compat.as_text(count[i][0]), count[i][1])) - tl.logging.info("%d vocab saved to %s in %s" % (vocabulary_size, name, pwd)) - - -# Functions for translation - - -def basic_tokenizer(sentence, _WORD_SPLIT=re.compile(b"([.,!?\"':;)(])")): - """Very basic tokenizer: split the sentence into a list of tokens. - - Parameters - ----------- - sentence : tensorflow.python.platform.gfile.GFile Object - _WORD_SPLIT : regular expression for word spliting. - - - Examples - -------- - >>> see create_vocabulary - >>> from tensorflow.python.platform import gfile - >>> train_path = "wmt/giga-fren.release2" - >>> with gfile.GFile(train_path + ".en", mode="rb") as f: - >>> for line in f: - >>> tokens = tl.nlp.basic_tokenizer(line) - >>> tl.logging.info(tokens) - >>> exit() - [b'Changing', b'Lives', b'|', b'Changing', b'Society', b'|', b'How', - b'It', b'Works', b'|', b'Technology', b'Drives', b'Change', b'Home', - b'|', b'Concepts', b'|', b'Teachers', b'|', b'Search', b'|', b'Overview', - b'|', b'Credits', b'|', b'HHCC', b'Web', b'|', b'Reference', b'|', - b'Feedback', b'Virtual', b'Museum', b'of', b'Canada', b'Home', b'Page'] - - References - ---------- - - Code from ``/tensorflow/models/rnn/translation/data_utils.py`` - - """ - words = [] - sentence = tf.compat.as_bytes(sentence) - for space_separated_fragment in sentence.strip().split(): - words.extend(re.split(_WORD_SPLIT, space_separated_fragment)) - return [w for w in words if w] - - -def create_vocabulary( - vocabulary_path, data_path, max_vocabulary_size, tokenizer=None, normalize_digits=True, - _DIGIT_RE=re.compile(br"\d"), _START_VOCAB=None -): - r"""Create vocabulary file (if it does not exist yet) from data file. - - Data file is assumed to contain one sentence per line. Each sentence is - tokenized and digits are normalized (if normalize_digits is set). - Vocabulary contains the most-frequent tokens up to max_vocabulary_size. - We write it to vocabulary_path in a one-token-per-line format, so that later - token in the first line gets id=0, second line gets id=1, and so on. - - Parameters - ----------- - vocabulary_path : str - Path where the vocabulary will be created. - data_path : str - Data file that will be used to create vocabulary. - max_vocabulary_size : int - Limit on the size of the created vocabulary. - tokenizer : function - A function to use to tokenize each data sentence. If None, basic_tokenizer will be used. - normalize_digits : boolean - If true, all digits are replaced by `0`. - _DIGIT_RE : regular expression function - Default is ``re.compile(br"\d")``. - _START_VOCAB : list of str - The pad, go, eos and unk token, default is ``[b"_PAD", b"_GO", b"_EOS", b"_UNK"]``. - - References - ---------- - - Code from ``/tensorflow/models/rnn/translation/data_utils.py`` - - """ - if _START_VOCAB is None: - _START_VOCAB = [b"_PAD", b"_GO", b"_EOS", b"_UNK"] - if not gfile.Exists(vocabulary_path): - tl.logging.info("Creating vocabulary %s from data %s" % (vocabulary_path, data_path)) - vocab = {} - with gfile.GFile(data_path, mode="rb") as f: - counter = 0 - for line in f: - counter += 1 - if counter % 100000 == 0: - tl.logging.info(" processing line %d" % counter) - tokens = tokenizer(line) if tokenizer else basic_tokenizer(line) - for w in tokens: - word = re.sub(_DIGIT_RE, b"0", w) if normalize_digits else w - if word in vocab: - vocab[word] += 1 - else: - vocab[word] = 1 - vocab_list = _START_VOCAB + sorted(vocab, key=vocab.get, reverse=True) - if len(vocab_list) > max_vocabulary_size: - vocab_list = vocab_list[:max_vocabulary_size] - with gfile.GFile(vocabulary_path, mode="wb") as vocab_file: - for w in vocab_list: - vocab_file.write(w + b"\n") - else: - tl.logging.info("Vocabulary %s from data %s exists" % (vocabulary_path, data_path)) - - -def initialize_vocabulary(vocabulary_path): - """Initialize vocabulary from file, return the `word_to_id` (dictionary) - and `id_to_word` (list). - - We assume the vocabulary is stored one-item-per-line, so a file will result in a vocabulary {"dog": 0, "cat": 1}, and this function will also return the reversed-vocabulary ["dog", "cat"]. - - Parameters - ----------- - vocabulary_path : str - Path to the file containing the vocabulary. - - Returns - -------- - vocab : dictionary - a dictionary that maps word to ID. - rev_vocab : list of int - a list that maps ID to word. - - Examples - --------- - >>> Assume 'test' contains - dog - cat - bird - >>> vocab, rev_vocab = tl.nlp.initialize_vocabulary("test") - >>> print(vocab) - >>> {b'cat': 1, b'dog': 0, b'bird': 2} - >>> print(rev_vocab) - >>> [b'dog', b'cat', b'bird'] - - Raises - ------- - ValueError : if the provided vocabulary_path does not exist. - - """ - if gfile.Exists(vocabulary_path): - rev_vocab = [] - with gfile.GFile(vocabulary_path, mode="rb") as f: - rev_vocab.extend(f.readlines()) - rev_vocab = [tf.compat.as_bytes(line.strip()) for line in rev_vocab] - vocab = dict([(x, y) for (y, x) in enumerate(rev_vocab)]) - return vocab, rev_vocab - else: - raise ValueError("Vocabulary file %s not found.", vocabulary_path) - - -def sentence_to_token_ids( - sentence, vocabulary, tokenizer=None, normalize_digits=True, UNK_ID=3, _DIGIT_RE=re.compile(br"\d") -): - """Convert a string to list of integers representing token-ids. - - For example, a sentence "I have a dog" may become tokenized into - ["I", "have", "a", "dog"] and with vocabulary {"I": 1, "have": 2, - "a": 4, "dog": 7"} this function will return [1, 2, 4, 7]. - - Parameters - ----------- - sentence : tensorflow.python.platform.gfile.GFile Object - The sentence in bytes format to convert to token-ids, see ``basic_tokenizer()`` and ``data_to_token_ids()``. - vocabulary : dictionary - Mmapping tokens to integers. - tokenizer : function - A function to use to tokenize each sentence. If None, ``basic_tokenizer`` will be used. - normalize_digits : boolean - If true, all digits are replaced by 0. - - Returns - -------- - list of int - The token-ids for the sentence. - - """ - if tokenizer: - words = tokenizer(sentence) - else: - words = basic_tokenizer(sentence) - if not normalize_digits: - return [vocabulary.get(w, UNK_ID) for w in words] - # Normalize digits by 0 before looking words up in the vocabulary. - return [vocabulary.get(re.sub(_DIGIT_RE, b"0", w), UNK_ID) for w in words] - - -def data_to_token_ids( - data_path, target_path, vocabulary_path, tokenizer=None, normalize_digits=True, UNK_ID=3, - _DIGIT_RE=re.compile(br"\d") -): - """Tokenize data file and turn into token-ids using given vocabulary file. - - This function loads data line-by-line from data_path, calls the above - sentence_to_token_ids, and saves the result to target_path. See comment - for sentence_to_token_ids on the details of token-ids format. - - Parameters - ----------- - data_path : str - Path to the data file in one-sentence-per-line format. - target_path : str - Path where the file with token-ids will be created. - vocabulary_path : str - Path to the vocabulary file. - tokenizer : function - A function to use to tokenize each sentence. If None, ``basic_tokenizer`` will be used. - normalize_digits : boolean - If true, all digits are replaced by 0. - - References - ---------- - - Code from ``/tensorflow/models/rnn/translation/data_utils.py`` - - """ - if not gfile.Exists(target_path): - tl.logging.info("Tokenizing data in %s" % data_path) - vocab, _ = initialize_vocabulary(vocabulary_path) - with gfile.GFile(data_path, mode="rb") as data_file: - with gfile.GFile(target_path, mode="w") as tokens_file: - counter = 0 - for line in data_file: - counter += 1 - if counter % 100000 == 0: - tl.logging.info(" tokenizing line %d" % counter) - token_ids = sentence_to_token_ids( - line, vocab, tokenizer, normalize_digits, UNK_ID=UNK_ID, _DIGIT_RE=_DIGIT_RE - ) - tokens_file.write(" ".join([str(tok) for tok in token_ids]) + "\n") - else: - tl.logging.info("Target path %s exists" % target_path) - - -def moses_multi_bleu(hypotheses, references, lowercase=False): - """Calculate the bleu score for hypotheses and references - using the MOSES ulti-bleu.perl script. - - Parameters - ------------ - hypotheses : numpy.array.string - A numpy array of strings where each string is a single example. - references : numpy.array.string - A numpy array of strings where each string is a single example. - lowercase : boolean - If True, pass the "-lc" flag to the multi-bleu script - - Examples - --------- - >>> hypotheses = ["a bird is flying on the sky"] - >>> references = ["two birds are flying on the sky", "a bird is on the top of the tree", "an airplane is on the sky",] - >>> score = tl.nlp.moses_multi_bleu(hypotheses, references) - - Returns - -------- - float - The BLEU score - - References - ---------- - - `Google/seq2seq/metric/bleu `__ - - """ - if np.size(hypotheses) == 0: - return np.float32(0.0) - - # Get MOSES multi-bleu script - try: - multi_bleu_path, _ = urllib.request.urlretrieve( - "https://raw.githubusercontent.com/moses-smt/mosesdecoder/" - "master/scripts/generic/multi-bleu.perl" - ) - os.chmod(multi_bleu_path, 0o755) - except Exception: # pylint: disable=W0702 - tl.logging.info("Unable to fetch multi-bleu.perl script, using local.") - metrics_dir = os.path.dirname(os.path.realpath(__file__)) - bin_dir = os.path.abspath(os.path.join(metrics_dir, "..", "..", "bin")) - multi_bleu_path = os.path.join(bin_dir, "tools/multi-bleu.perl") - - # Dump hypotheses and references to tempfiles - hypothesis_file = tempfile.NamedTemporaryFile() - hypothesis_file.write("\n".join(hypotheses).encode("utf-8")) - hypothesis_file.write(b"\n") - hypothesis_file.flush() - reference_file = tempfile.NamedTemporaryFile() - reference_file.write("\n".join(references).encode("utf-8")) - reference_file.write(b"\n") - reference_file.flush() - - # Calculate BLEU using multi-bleu script - with open(hypothesis_file.name, "r") as read_pred: - bleu_cmd = [multi_bleu_path] - if lowercase: - bleu_cmd += ["-lc"] - bleu_cmd += [reference_file.name] - try: - bleu_out = subprocess.check_output(bleu_cmd, stdin=read_pred, stderr=subprocess.STDOUT) - bleu_out = bleu_out.decode("utf-8") - bleu_score = re.search(r"BLEU = (.+?),", bleu_out).group(1) - bleu_score = float(bleu_score) - except subprocess.CalledProcessError as error: - if error.output is not None: - tl.logging.warning("multi-bleu.perl script returned non-zero exit code") - tl.logging.warning(error.output) - bleu_score = np.float32(0.0) - - # Close temp files - hypothesis_file.close() - reference_file.close() - - return np.float32(bleu_score) diff --git a/tensorlayer/optimizers/__init__.py b/tensorlayer/optimizers/__init__.py deleted file mode 100644 index e74b388..0000000 --- a/tensorlayer/optimizers/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- -""" -TensorLayer provides rich layer implementations trailed for -various benchmarks and domain-specific problems. In addition, we also -support transparent access to native TensorFlow parameters. -For example, we provide not only layers for local response normalization, but also -layers that allow user to apply ``tf.nn.lrn`` on ``network.outputs``. -More functions can be found in `TensorFlow API `__. -""" - -from .amsgrad import AMSGrad diff --git a/tensorlayer/optimizers/amsgrad.py b/tensorlayer/optimizers/amsgrad.py deleted file mode 100644 index 290eced..0000000 --- a/tensorlayer/optimizers/amsgrad.py +++ /dev/null @@ -1,199 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- -"""AMSGrad Implementation based on the paper: "On the Convergence of Adam and Beyond" (ICLR 2018) -Article Link: https://openreview.net/pdf?id=ryQu7f-RZ -Original Implementation by: https://github.com/taki0112/AMSGrad-Tensorflow -""" - -from tensorflow.python.eager import context -from tensorflow.python.framework import ops -from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import resource_variable_ops -from tensorflow.python.ops import state_ops -from tensorflow.python.ops import variable_scope -from tensorflow.python.training import optimizer - - -class AMSGrad(optimizer.Optimizer): - """Implementation of the AMSGrad optimization algorithm. - - See: `On the Convergence of Adam and Beyond - [Reddi et al., 2018] `__. - - Parameters - ---------- - learning_rate: float - A Tensor or a floating point value. The learning rate. - beta1: float - A float value or a constant float tensor. - The exponential decay rate for the 1st moment estimates. - beta2: float - A float value or a constant float tensor. - The exponential decay rate for the 2nd moment estimates. - epsilon: float - A small constant for numerical stability. - This epsilon is "epsilon hat" in the Kingma and Ba paper - (in the formula just before Section 2.1), not the epsilon in Algorithm 1 of the paper. - use_locking: bool - If True use locks for update operations. - name: str - Optional name for the operations created when applying gradients. - Defaults to "AMSGrad". - """ - - def __init__(self, learning_rate=0.01, beta1=0.9, beta2=0.99, epsilon=1e-8, use_locking=False, name="AMSGrad"): - """Construct a new Adam optimizer.""" - super(AMSGrad, self).__init__(use_locking, name) - self._lr = learning_rate - self._beta1 = beta1 - self._beta2 = beta2 - self._epsilon = epsilon - - self._lr_t = None - self._beta1_t = None - self._beta2_t = None - self._epsilon_t = None - - self._beta1_power = None - self._beta2_power = None - - def _create_slots(self, var_list): - first_var = min(var_list, key=lambda x: x.name) - - create_new = self._beta1_power is None - if not create_new and context.in_graph_mode(): - create_new = (self._beta1_power.graph is not first_var.graph) - - if create_new: - with ops.colocate_with(first_var): - self._beta1_power = variable_scope.variable(self._beta1, name="beta1_power", trainable=False) - self._beta2_power = variable_scope.variable(self._beta2, name="beta2_power", trainable=False) - # Create slots for the first and second moments. - for v in var_list: - self._zeros_slot(v, "m", self._name) - self._zeros_slot(v, "v", self._name) - self._zeros_slot(v, "vhat", self._name) - - def _prepare(self): - self._lr_t = ops.convert_to_tensor(self._lr) - self._beta1_t = ops.convert_to_tensor(self._beta1) - self._beta2_t = ops.convert_to_tensor(self._beta2) - self._epsilon_t = ops.convert_to_tensor(self._epsilon) - - def _apply_dense(self, grad, var): - beta1_power = math_ops.cast(self._beta1_power, var.dtype.base_dtype) - beta2_power = math_ops.cast(self._beta2_power, var.dtype.base_dtype) - lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) - beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype) - beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype) - epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype) - - lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) - - # m_t = beta1 * m + (1 - beta1) * g_t - m = self.get_slot(var, "m") - m_scaled_g_values = grad * (1 - beta1_t) - m_t = state_ops.assign(m, beta1_t * m + m_scaled_g_values, use_locking=self._use_locking) - - # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) - v = self.get_slot(var, "v") - v_scaled_g_values = (grad * grad) * (1 - beta2_t) - v_t = state_ops.assign(v, beta2_t * v + v_scaled_g_values, use_locking=self._use_locking) - - # amsgrad - vhat = self.get_slot(var, "vhat") - vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat)) - v_sqrt = math_ops.sqrt(vhat_t) - - var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t), use_locking=self._use_locking) - return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t]) - - def _resource_apply_dense(self, grad, var): - var = var.handle - beta1_power = math_ops.cast(self._beta1_power, grad.dtype.base_dtype) - beta2_power = math_ops.cast(self._beta2_power, grad.dtype.base_dtype) - lr_t = math_ops.cast(self._lr_t, grad.dtype.base_dtype) - beta1_t = math_ops.cast(self._beta1_t, grad.dtype.base_dtype) - beta2_t = math_ops.cast(self._beta2_t, grad.dtype.base_dtype) - epsilon_t = math_ops.cast(self._epsilon_t, grad.dtype.base_dtype) - - lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) - - # m_t = beta1 * m + (1 - beta1) * g_t - m = self.get_slot(var, "m").handle - m_scaled_g_values = grad * (1 - beta1_t) - m_t = state_ops.assign(m, beta1_t * m + m_scaled_g_values, use_locking=self._use_locking) - - # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) - v = self.get_slot(var, "v").handle - v_scaled_g_values = (grad * grad) * (1 - beta2_t) - v_t = state_ops.assign(v, beta2_t * v + v_scaled_g_values, use_locking=self._use_locking) - - # amsgrad - vhat = self.get_slot(var, "vhat").handle - vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat)) - v_sqrt = math_ops.sqrt(vhat_t) - - var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t), use_locking=self._use_locking) - return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t]) - - def _apply_sparse_shared(self, grad, var, indices, scatter_add): - beta1_power = math_ops.cast(self._beta1_power, var.dtype.base_dtype) - beta2_power = math_ops.cast(self._beta2_power, var.dtype.base_dtype) - lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) - beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype) - beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype) - epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype) - - lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) - - # m_t = beta1 * m + (1 - beta1) * g_t - m = self.get_slot(var, "m") - m_scaled_g_values = grad * (1 - beta1_t) - m_t = state_ops.assign(m, m * beta1_t, use_locking=self._use_locking) - with ops.control_dependencies([m_t]): - m_t = scatter_add(m, indices, m_scaled_g_values) - - # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) - v = self.get_slot(var, "v") - v_scaled_g_values = (grad * grad) * (1 - beta2_t) - v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking) - with ops.control_dependencies([v_t]): - v_t = scatter_add(v, indices, v_scaled_g_values) - - # amsgrad - vhat = self.get_slot(var, "vhat") - vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat)) - v_sqrt = math_ops.sqrt(vhat_t) - var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t), use_locking=self._use_locking) - return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t]) - - def _apply_sparse(self, grad, var): - return self._apply_sparse_shared( - grad.values, - var, - grad.indices, - lambda x, i, v: state_ops. - scatter_add( # pylint: disable=g-long-lambda - x, i, v, use_locking=self._use_locking - ) - ) - - def _resource_scatter_add(self, x, i, v): - with ops.control_dependencies([resource_variable_ops.resource_scatter_add(x.handle, i, v)]): - return x.value() - - def _resource_apply_sparse(self, grad, var, indices): - return self._apply_sparse_shared(grad, var, indices, self._resource_scatter_add) - - def _finish(self, update_ops, name_scope): - # Update the power accumulators. - with ops.control_dependencies(update_ops): - with ops.colocate_with(self._beta1_power): - update_beta1 = self._beta1_power.assign( - self._beta1_power * self._beta1_t, use_locking=self._use_locking - ) - update_beta2 = self._beta2_power.assign( - self._beta2_power * self._beta2_t, use_locking=self._use_locking - ) - return control_flow_ops.group(*update_ops + [update_beta1, update_beta2], name=name_scope) diff --git a/tensorlayer/package_info.py b/tensorlayer/package_info.py deleted file mode 100644 index 51515a7..0000000 --- a/tensorlayer/package_info.py +++ /dev/null @@ -1,24 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- -"""Deep learning and Reinforcement learning library for Researchers and Engineers.""" - -MAJOR = 1 -MINOR = 11 -PATCH = 1 -PRE_RELEASE = '' -# Use the following formatting: (major, minor, patch, prerelease) -VERSION = (MAJOR, MINOR, PATCH, PRE_RELEASE) - -__shortversion__ = '.'.join(map(str, VERSION[:3])) -__version__ = '.'.join(map(str, VERSION[:3])) + ''.join(VERSION[3:]) - -__package_name__ = 'tensorlayer' -__contact_names__ = 'TensorLayer Contributors' -__contact_emails__ = 'tensorlayer@gmail.com' -__homepage__ = 'http://tensorlayer.readthedocs.io/en/latest/' -__repository_url__ = 'https://github.com/tensorlayer/tensorlayer' -__download_url__ = 'https://github.com/tensorlayer/tensorlayer' -__description__ = 'High Level Tensorflow Deep Learning Library for Researcher and Engineer.' -__license__ = 'apache' -__keywords__ = 'deep learning, machine learning, computer vision, nlp, ' -__keywords__ += 'supervised learning, unsupervised learning, reinforcement learning, tensorflow' diff --git a/tensorlayer/prepro.py b/tensorlayer/prepro.py deleted file mode 100644 index b729bd6..0000000 --- a/tensorlayer/prepro.py +++ /dev/null @@ -1,4184 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import copy - -import threading -import time - -import numpy as np - -import tensorlayer as tl - -import scipy -import scipy.ndimage as ndi - -from scipy import linalg -from scipy.ndimage.filters import gaussian_filter -from scipy.ndimage.interpolation import map_coordinates - -import skimage - -from skimage import exposure -from skimage import transform - -from skimage.morphology import disk -from skimage.morphology import erosion as _erosion -from skimage.morphology import binary_dilation as _binary_dilation -from skimage.morphology import binary_erosion as _binary_erosion - -from six.moves import range -from tensorlayer.lazy_imports import LazyImport -import PIL -cv2 = LazyImport("cv2") -import math -import random - -# linalg https://docs.scipy.org/doc/scipy/reference/linalg.html -# ndimage https://docs.scipy.org/doc/scipy/reference/ndimage.html - -__all__ = [ - 'threading_data', - 'affine_rotation_matrix', - 'affine_horizontal_flip_matrix', - 'affine_shift_matrix', - 'affine_shear_matrix', - 'affine_zoom_matrix', - 'affine_respective_zoom_matrix', - 'transform_matrix_offset_center', - 'affine_transform', - 'affine_transform_cv2', - 'affine_transform_keypoints', - 'projective_transform_by_points', - 'rotation', - 'rotation_multi', - 'crop', - 'crop_multi', - 'flip_axis', - 'flip_axis_multi', - 'shift', - 'shift_multi', - 'shear', - 'shear_multi', - 'shear2', - 'shear_multi2', - 'swirl', - 'swirl_multi', - 'elastic_transform', - 'elastic_transform_multi', - 'zoom', - 'respective_zoom', - 'zoom_multi', - 'brightness', - 'brightness_multi', - 'illumination', - 'rgb_to_hsv', - 'hsv_to_rgb', - 'adjust_hue', - 'imresize', - 'pixel_value_scale', - 'samplewise_norm', - 'featurewise_norm', - 'get_zca_whitening_principal_components_img', - 'zca_whitening', - 'channel_shift', - 'channel_shift_multi', - 'drop', - 'array_to_img', - 'find_contours', - 'pt2map', - 'binary_dilation', - 'dilation', - 'binary_erosion', - 'erosion', - 'obj_box_coords_rescale', - 'obj_box_coord_rescale', - 'obj_box_coord_scale_to_pixelunit', - 'obj_box_coord_centroid_to_upleft_butright', - 'obj_box_coord_upleft_butright_to_centroid', - 'obj_box_coord_centroid_to_upleft', - 'obj_box_coord_upleft_to_centroid', - 'parse_darknet_ann_str_to_list', - 'parse_darknet_ann_list_to_cls_box', - 'obj_box_left_right_flip', - 'obj_box_imresize', - 'obj_box_crop', - 'obj_box_shift', - 'obj_box_zoom', - 'pad_sequences', - 'remove_pad_sequences', - 'process_sequences', - 'sequences_add_start_id', - 'sequences_add_end_id', - 'sequences_add_end_id_after_pad', - 'sequences_get_mask', - 'keypoint_random_crop', - 'keypoint_resize_random_crop', - 'keypoint_random_rotate', - 'keypoint_random_flip', - 'keypoint_random_resize', - 'keypoint_random_resize_shortestedge', -] - - -def threading_data(data=None, fn=None, thread_count=None, **kwargs): - """Process a batch of data by given function by threading. - - Usually be used for data augmentation. - - Parameters - ----------- - data : numpy.array or others - The data to be processed. - thread_count : int - The number of threads to use. - fn : function - The function for data processing. - more args : the args for `fn` - Ssee Examples below. - - Examples - -------- - Process images. - - >>> images, _, _, _ = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3)) - >>> images = tl.prepro.threading_data(images[0:32], tl.prepro.zoom, zoom_range=[0.5, 1]) - - Customized image preprocessing function. - - >>> def distort_img(x): - >>> x = tl.prepro.flip_axis(x, axis=0, is_random=True) - >>> x = tl.prepro.flip_axis(x, axis=1, is_random=True) - >>> x = tl.prepro.crop(x, 100, 100, is_random=True) - >>> return x - >>> images = tl.prepro.threading_data(images, distort_img) - - Process images and masks together (Usually be used for image segmentation). - - >>> X, Y --> [batch_size, row, col, 1] - >>> data = tl.prepro.threading_data([_ for _ in zip(X, Y)], tl.prepro.zoom_multi, zoom_range=[0.5, 1], is_random=True) - data --> [batch_size, 2, row, col, 1] - >>> X_, Y_ = data.transpose((1,0,2,3,4)) - X_, Y_ --> [batch_size, row, col, 1] - >>> tl.vis.save_image(X_, 'images.png') - >>> tl.vis.save_image(Y_, 'masks.png') - - Process images and masks together by using ``thread_count``. - - >>> X, Y --> [batch_size, row, col, 1] - >>> data = tl.prepro.threading_data(X, tl.prepro.zoom_multi, 8, zoom_range=[0.5, 1], is_random=True) - data --> [batch_size, 2, row, col, 1] - >>> X_, Y_ = data.transpose((1,0,2,3,4)) - X_, Y_ --> [batch_size, row, col, 1] - >>> tl.vis.save_image(X_, 'after.png') - >>> tl.vis.save_image(Y_, 'before.png') - - Customized function for processing images and masks together. - - >>> def distort_img(data): - >>> x, y = data - >>> x, y = tl.prepro.flip_axis_multi([x, y], axis=0, is_random=True) - >>> x, y = tl.prepro.flip_axis_multi([x, y], axis=1, is_random=True) - >>> x, y = tl.prepro.crop_multi([x, y], 100, 100, is_random=True) - >>> return x, y - - >>> X, Y --> [batch_size, row, col, channel] - >>> data = tl.prepro.threading_data([_ for _ in zip(X, Y)], distort_img) - >>> X_, Y_ = data.transpose((1,0,2,3,4)) - - Returns - ------- - list or numpyarray - The processed results. - - References - ---------- - - `python queue `__ - - `run with limited queue `__ - - """ - - def apply_fn(results, i, data, kwargs): - results[i] = fn(data, **kwargs) - - if thread_count is None: - results = [None] * len(data) - threads = [] - # for i in range(len(data)): - # t = threading.Thread(name='threading_and_return', target=apply_fn, args=(results, i, data[i], kwargs)) - for i, d in enumerate(data): - t = threading.Thread(name='threading_and_return', target=apply_fn, args=(results, i, d, kwargs)) - t.start() - threads.append(t) - else: - divs = np.linspace(0, len(data), thread_count + 1) - divs = np.round(divs).astype(int) - results = [None] * thread_count - threads = [] - for i in range(thread_count): - t = threading.Thread( - name='threading_and_return', target=apply_fn, args=(results, i, data[divs[i]:divs[i + 1]], kwargs) - ) - t.start() - threads.append(t) - - for t in threads: - t.join() - - if thread_count is None: - try: - return np.asarray(results) - except Exception: - return results - else: - return np.concatenate(results) - - -def affine_rotation_matrix(angle=(-20, 20)): - """Create an affine transform matrix for image rotation. - NOTE: In OpenCV, x is width and y is height. - - Parameters - ----------- - angle : int/float or tuple of two int/float - Degree to rotate, usually -180 ~ 180. - - int/float, a fixed angle. - - tuple of 2 floats/ints, randomly sample a value as the angle between these 2 values. - - Returns - ------- - numpy.array - An affine transform matrix. - - """ - if isinstance(angle, tuple): - theta = np.pi / 180 * np.random.uniform(angle[0], angle[1]) - else: - theta = np.pi / 180 * angle - rotation_matrix = np.array([[np.cos(theta), np.sin(theta), 0], \ - [-np.sin(theta), np.cos(theta), 0], \ - [0, 0, 1]]) - return rotation_matrix - - -def affine_horizontal_flip_matrix(prob=0.5): - """Create an affine transformation matrix for image horizontal flipping. - NOTE: In OpenCV, x is width and y is height. - - Parameters - ---------- - prob : float - Probability to flip the image. 1.0 means always flip. - - Returns - ------- - numpy.array - An affine transform matrix. - - """ - factor = np.random.uniform(0, 1) - if prob >= factor: - filp_matrix = np.array([[ -1. , 0., 0. ], \ - [ 0., 1., 0. ], \ - [ 0., 0., 1. ]]) - return filp_matrix - else: - filp_matrix = np.array([[ 1. , 0., 0. ], \ - [ 0., 1., 0. ], \ - [ 0., 0., 1. ]]) - return filp_matrix - - -def affine_vertical_flip_matrix(prob=0.5): - """Create an affine transformation for image vertical flipping. - NOTE: In OpenCV, x is width and y is height. - - Parameters - ---------- - prob : float - Probability to flip the image. 1.0 means always flip. - - Returns - ------- - numpy.array - An affine transform matrix. - - """ - factor = np.random.uniform(0, 1) - if prob >= factor: - filp_matrix = np.array([[ 1. , 0., 0. ], \ - [ 0., -1., 0. ], \ - [ 0., 0., 1. ]]) - return filp_matrix - else: - filp_matrix = np.array([[ 1. , 0., 0. ], \ - [ 0., 1., 0. ], \ - [ 0., 0., 1. ]]) - return filp_matrix - - -def affine_shift_matrix(wrg=(-0.1, 0.1), hrg=(-0.1, 0.1), w=200, h=200): - """Create an affine transform matrix for image shifting. - NOTE: In OpenCV, x is width and y is height. - - Parameters - ----------- - wrg : float or tuple of floats - Range to shift on width axis, -1 ~ 1. - - float, a fixed distance. - - tuple of 2 floats, randomly sample a value as the distance between these 2 values. - hrg : float or tuple of floats - Range to shift on height axis, -1 ~ 1. - - float, a fixed distance. - - tuple of 2 floats, randomly sample a value as the distance between these 2 values. - w, h : int - The width and height of the image. - - Returns - ------- - numpy.array - An affine transform matrix. - - """ - if isinstance(wrg, tuple): - tx = np.random.uniform(wrg[0], wrg[1]) * w - else: - tx = wrg * w - if isinstance(hrg, tuple): - ty = np.random.uniform(hrg[0], hrg[1]) * h - else: - ty = hrg * h - shift_matrix = np.array([[1, 0, tx], \ - [0, 1, ty], \ - [0, 0, 1]]) - return shift_matrix - - -def affine_shear_matrix(x_shear=(-0.1, 0.1), y_shear=(-0.1, 0.1)): - """Create affine transform matrix for image shearing. - NOTE: In OpenCV, x is width and y is height. - - Parameters - ----------- - shear : tuple of two floats - Percentage of shears for width and height directions. - - Returns - ------- - numpy.array - An affine transform matrix. - - """ - # if len(shear) != 2: - # raise AssertionError( - # "shear should be tuple of 2 floats, or you want to use tl.prepro.shear rather than tl.prepro.shear2 ?" - # ) - # if isinstance(shear, tuple): - # shear = list(shear) - # if is_random: - # shear[0] = np.random.uniform(-shear[0], shear[0]) - # shear[1] = np.random.uniform(-shear[1], shear[1]) - if isinstance(x_shear, tuple): - x_shear = np.random.uniform(x_shear[0], x_shear[1]) - if isinstance(y_shear, tuple): - y_shear = np.random.uniform(y_shear[0], y_shear[1]) - - shear_matrix = np.array([[1, x_shear, 0], \ - [y_shear, 1, 0], \ - [0, 0, 1]]) - return shear_matrix - - -def affine_zoom_matrix(zoom_range=(0.8, 1.1)): - """Create an affine transform matrix for zooming/scaling an image's height and width. - OpenCV format, x is width. - - Parameters - ----------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - zoom_range : float or tuple of 2 floats - The zooming/scaling ratio, greater than 1 means larger. - - float, a fixed ratio. - - tuple of 2 floats, randomly sample a value as the ratio between these 2 values. - - Returns - ------- - numpy.array - An affine transform matrix. - - """ - - if isinstance(zoom_range, (float, int)): - scale = zoom_range - elif isinstance(zoom_range, tuple): - scale = np.random.uniform(zoom_range[0], zoom_range[1]) - else: - raise Exception("zoom_range: float or tuple of 2 floats") - - zoom_matrix = np.array([[scale, 0, 0], \ - [0, scale, 0], \ - [0, 0, 1]]) - return zoom_matrix - - -def affine_respective_zoom_matrix(w_range=0.8, h_range=1.1): - """Get affine transform matrix for zooming/scaling that height and width are changed independently. - OpenCV format, x is width. - - Parameters - ----------- - w_range : float or tuple of 2 floats - The zooming/scaling ratio of width, greater than 1 means larger. - - float, a fixed ratio. - - tuple of 2 floats, randomly sample a value as the ratio between 2 values. - h_range : float or tuple of 2 floats - The zooming/scaling ratio of height, greater than 1 means larger. - - float, a fixed ratio. - - tuple of 2 floats, randomly sample a value as the ratio between 2 values. - - Returns - ------- - numpy.array - An affine transform matrix. - - """ - - if isinstance(h_range, (float, int)): - zy = h_range - elif isinstance(h_range, tuple): - zy = np.random.uniform(h_range[0], h_range[1]) - else: - raise Exception("h_range: float or tuple of 2 floats") - - if isinstance(w_range, (float, int)): - zx = w_range - elif isinstance(w_range, tuple): - zx = np.random.uniform(w_range[0], w_range[1]) - else: - raise Exception("w_range: float or tuple of 2 floats") - - zoom_matrix = np.array([[zx, 0, 0], \ - [0, zy, 0], \ - [0, 0, 1]]) - return zoom_matrix - - -# affine transform -def transform_matrix_offset_center(matrix, x, y): - """Convert the matrix from Cartesian coordinates (the origin in the middle of image) to Image coordinates (the origin on the top-left of image). - - Parameters - ---------- - matrix : numpy.array - Transform matrix. - x and y : 2 int - Size of image. - - Returns - ------- - numpy.array - The transform matrix. - - Examples - -------- - - See ``tl.prepro.rotation``, ``tl.prepro.shear``, ``tl.prepro.zoom``. - """ - o_x = (x - 1) / 2.0 - o_y = (y - 1) / 2.0 - offset_matrix = np.array([[1, 0, o_x], [0, 1, o_y], [0, 0, 1]]) - reset_matrix = np.array([[1, 0, -o_x], [0, 1, -o_y], [0, 0, 1]]) - transform_matrix = np.dot(np.dot(offset_matrix, matrix), reset_matrix) - return transform_matrix - - -def affine_transform(x, transform_matrix, channel_index=2, fill_mode='nearest', cval=0., order=1): - """Return transformed images by given an affine matrix in Scipy format (x is height). - - Parameters - ---------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - transform_matrix : numpy.array - Transform matrix (offset center), can be generated by ``transform_matrix_offset_center`` - channel_index : int - Index of channel, default 2. - fill_mode : str - Method to fill missing pixel, default `nearest`, more options `constant`, `reflect` or `wrap`, see `scipy ndimage affine_transform `__ - cval : float - Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0 - order : int - The order of interpolation. The order has to be in the range 0-5: - - 0 Nearest-neighbor - - 1 Bi-linear (default) - - 2 Bi-quadratic - - 3 Bi-cubic - - 4 Bi-quartic - - 5 Bi-quintic - - `scipy ndimage affine_transform `__ - - Returns - ------- - numpy.array - A processed image. - - Examples - -------- - >>> M_shear = tl.prepro.affine_shear_matrix(intensity=0.2, is_random=False) - >>> M_zoom = tl.prepro.affine_zoom_matrix(zoom_range=0.8) - >>> M_combined = M_shear.dot(M_zoom) - >>> transform_matrix = tl.prepro.transform_matrix_offset_center(M_combined, h, w) - >>> result = tl.prepro.affine_transform(image, transform_matrix) - - """ - # transform_matrix = transform_matrix_offset_center() - # asdihasid - # asd - - x = np.rollaxis(x, channel_index, 0) - final_affine_matrix = transform_matrix[:2, :2] - final_offset = transform_matrix[:2, 2] - channel_images = [ - ndi.interpolation. - affine_transform(x_channel, final_affine_matrix, final_offset, order=order, mode=fill_mode, cval=cval) - for x_channel in x - ] - x = np.stack(channel_images, axis=0) - x = np.rollaxis(x, 0, channel_index + 1) - return x - - -apply_transform = affine_transform - - -def affine_transform_cv2(x, transform_matrix, flags=None, border_mode='constant'): - """Return transformed images by given an affine matrix in OpenCV format (x is width). (Powered by OpenCV2, faster than ``tl.prepro.affine_transform``) - - Parameters - ---------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - transform_matrix : numpy.array - A transform matrix, OpenCV format. - border_mode : str - - `constant`, pad the image with a constant value (i.e. black or 0) - - `replicate`, the row or column at the very edge of the original is replicated to the extra border. - - Examples - -------- - >>> M_shear = tl.prepro.affine_shear_matrix(intensity=0.2, is_random=False) - >>> M_zoom = tl.prepro.affine_zoom_matrix(zoom_range=0.8) - >>> M_combined = M_shear.dot(M_zoom) - >>> result = tl.prepro.affine_transform_cv2(image, M_combined) - """ - rows, cols = x.shape[0], x.shape[1] - if flags is None: - flags = cv2.INTER_AREA - if border_mode is 'constant': - border_mode = cv2.BORDER_CONSTANT - elif border_mode is 'replicate': - border_mode = cv2.BORDER_REPLICATE - else: - raise Exception("unsupport border_mode, check cv.BORDER_ for more details.") - return cv2.warpAffine(x, transform_matrix[0:2,:], \ - (cols,rows), flags=flags, borderMode=border_mode) - - -def affine_transform_keypoints(coords_list, transform_matrix): - """Transform keypoint coordinates according to a given affine transform matrix. - OpenCV format, x is width. - - Note that, for pose estimation task, flipping requires maintaining the left and right body information. - We should not flip the left and right body, so please use ``tl.prepro.keypoint_random_flip``. - - Parameters - ----------- - coords_list : list of list of tuple/list - The coordinates - e.g., the keypoint coordinates of every person in an image. - transform_matrix : numpy.array - Transform matrix, OpenCV format. - - Examples - --------- - >>> # 1. get all affine transform matrices - >>> M_rotate = tl.prepro.affine_rotation_matrix(angle=20) - >>> M_flip = tl.prepro.affine_horizontal_flip_matrix(prob=1) - >>> # 2. combine all affine transform matrices to one matrix - >>> M_combined = dot(M_flip).dot(M_rotate) - >>> # 3. transfrom the matrix from Cartesian coordinate (the origin in the middle of image) - >>> # to Image coordinate (the origin on the top-left of image) - >>> transform_matrix = tl.prepro.transform_matrix_offset_center(M_combined, x=w, y=h) - >>> # 4. then we can transfrom the image once for all transformations - >>> result = tl.prepro.affine_transform_cv2(image, transform_matrix) # 76 times faster - >>> # 5. transform keypoint coordinates - >>> coords = [[(50, 100), (100, 100), (100, 50), (200, 200)], [(250, 50), (200, 50), (200, 100)]] - >>> coords_result = tl.prepro.affine_transform_keypoints(coords, transform_matrix) - """ - coords_result_list = [] - for coords in coords_list: - coords = np.asarray(coords) - coords = coords.transpose([1, 0]) - coords = np.insert(coords, 2, 1, axis=0) - # print(coords) - # print(transform_matrix) - coords_result = np.matmul(transform_matrix, coords) - coords_result = coords_result[0:2, :].transpose([1, 0]) - coords_result_list.append(coords_result) - return coords_result_list - - -def projective_transform_by_points( - x, src, dst, map_args=None, output_shape=None, order=1, mode='constant', cval=0.0, clip=True, - preserve_range=False -): - """Projective transform by given coordinates, usually 4 coordinates. - - see `scikit-image `__. - - Parameters - ----------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - src : list or numpy - The original coordinates, usually 4 coordinates of (width, height). - dst : list or numpy - The coordinates after transformation, the number of coordinates is the same with src. - map_args : dictionary or None - Keyword arguments passed to inverse map. - output_shape : tuple of 2 int - Shape of the output image generated. By default the shape of the input image is preserved. Note that, even for multi-band images, only rows and columns need to be specified. - order : int - The order of interpolation. The order has to be in the range 0-5: - - 0 Nearest-neighbor - - 1 Bi-linear (default) - - 2 Bi-quadratic - - 3 Bi-cubic - - 4 Bi-quartic - - 5 Bi-quintic - mode : str - One of `constant` (default), `edge`, `symmetric`, `reflect` or `wrap`. - Points outside the boundaries of the input are filled according to the given mode. Modes match the behaviour of numpy.pad. - cval : float - Used in conjunction with mode `constant`, the value outside the image boundaries. - clip : boolean - Whether to clip the output to the range of values of the input image. This is enabled by default, since higher order interpolation may produce values outside the given input range. - preserve_range : boolean - Whether to keep the original range of values. Otherwise, the input image is converted according to the conventions of img_as_float. - - Returns - ------- - numpy.array - A processed image. - - Examples - -------- - Assume X is an image from CIFAR-10, i.e. shape == (32, 32, 3) - - >>> src = [[0,0],[0,32],[32,0],[32,32]] # [w, h] - >>> dst = [[10,10],[0,32],[32,0],[32,32]] - >>> x = tl.prepro.projective_transform_by_points(X, src, dst) - - References - ----------- - - `scikit-image : geometric transformations `__ - - `scikit-image : examples `__ - - """ - if map_args is None: - map_args = {} - # if type(src) is list: - if isinstance(src, list): # convert to numpy - src = np.array(src) - # if type(dst) is list: - if isinstance(dst, list): - dst = np.array(dst) - if np.max(x) > 1: # convert to [0, 1] - x = x / 255 - - m = transform.ProjectiveTransform() - m.estimate(dst, src) - warped = transform.warp( - x, m, map_args=map_args, output_shape=output_shape, order=order, mode=mode, cval=cval, clip=clip, - preserve_range=preserve_range - ) - return warped - - -# rotate -def rotation( - x, rg=20, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1 -): - """Rotate an image randomly or non-randomly. - - Parameters - ----------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - rg : int or float - Degree to rotate, usually 0 ~ 180. - is_random : boolean - If True, randomly rotate. Default is False - row_index col_index and channel_index : int - Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0). - fill_mode : str - Method to fill missing pixel, default `nearest`, more options `constant`, `reflect` or `wrap`, see `scipy ndimage affine_transform `__ - cval : float - Value used for points outside the boundaries of the input if mode=`constant`. Default is 0.0 - order : int - The order of interpolation. The order has to be in the range 0-5. See ``tl.prepro.affine_transform`` and `scipy ndimage affine_transform `__ - - Returns - ------- - numpy.array - A processed image. - - Examples - --------- - >>> x --> [row, col, 1] - >>> x = tl.prepro.rotation(x, rg=40, is_random=False) - >>> tl.vis.save_image(x, 'im.png') - - """ - if is_random: - theta = np.pi / 180 * np.random.uniform(-rg, rg) - else: - theta = np.pi / 180 * rg - rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0], [np.sin(theta), np.cos(theta), 0], [0, 0, 1]]) - - h, w = x.shape[row_index], x.shape[col_index] - transform_matrix = transform_matrix_offset_center(rotation_matrix, h, w) - x = affine_transform(x, transform_matrix, channel_index, fill_mode, cval, order) - return x - - -def rotation_multi( - x, rg=20, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1 -): - """Rotate multiple images with the same arguments, randomly or non-randomly. - Usually be used for image segmentation which x=[X, Y], X and Y should be matched. - - Parameters - ----------- - x : list of numpy.array - List of images with dimension of [n_images, row, col, channel] (default). - others : args - See ``tl.prepro.rotation``. - - Returns - ------- - numpy.array - A list of processed images. - - Examples - -------- - >>> x, y --> [row, col, 1] greyscale - >>> x, y = tl.prepro.rotation_multi([x, y], rg=90, is_random=False) - - """ - if is_random: - theta = np.pi / 180 * np.random.uniform(-rg, rg) - else: - theta = np.pi / 180 * rg - rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0], [np.sin(theta), np.cos(theta), 0], [0, 0, 1]]) - - h, w = x[0].shape[row_index], x[0].shape[col_index] - transform_matrix = transform_matrix_offset_center(rotation_matrix, h, w) - results = [] - for data in x: - results.append(affine_transform(data, transform_matrix, channel_index, fill_mode, cval, order)) - return np.asarray(results) - - -# crop -def crop(x, wrg, hrg, is_random=False, row_index=0, col_index=1): - """Randomly or centrally crop an image. - - Parameters - ---------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - wrg : int - Size of width. - hrg : int - Size of height. - is_random : boolean, - If True, randomly crop, else central crop. Default is False. - row_index: int - index of row. - col_index: int - index of column. - - Returns - ------- - numpy.array - A processed image. - - """ - h, w = x.shape[row_index], x.shape[col_index] - - if (h <= hrg) or (w <= wrg): - raise AssertionError("The size of cropping should smaller than the original image") - - if is_random: - h_offset = int(np.random.uniform(0, h - hrg) - 1) - w_offset = int(np.random.uniform(0, w - wrg) - 1) - # tl.logging.info(h_offset, w_offset, x[h_offset: hrg+h_offset ,w_offset: wrg+w_offset].shape) - return x[h_offset:hrg + h_offset, w_offset:wrg + w_offset] - else: # central crop - h_offset = int(np.floor((h - hrg) / 2.)) - w_offset = int(np.floor((w - wrg) / 2.)) - h_end = h_offset + hrg - w_end = w_offset + wrg - return x[h_offset:h_end, w_offset:w_end] - # old implementation - # h_offset = (h - hrg)/2 - # w_offset = (w - wrg)/2 - # tl.logging.info(x[h_offset: h-h_offset ,w_offset: w-w_offset].shape) - # return x[h_offset: h-h_offset ,w_offset: w-w_offset] - # central crop - - -def crop_multi(x, wrg, hrg, is_random=False, row_index=0, col_index=1): - """Randomly or centrally crop multiple images. - - Parameters - ---------- - x : list of numpy.array - List of images with dimension of [n_images, row, col, channel] (default). - others : args - See ``tl.prepro.crop``. - - Returns - ------- - numpy.array - A list of processed images. - - """ - h, w = x[0].shape[row_index], x[0].shape[col_index] - - if (h <= hrg) or (w <= wrg): - raise AssertionError("The size of cropping should smaller than the original image") - - if is_random: - h_offset = int(np.random.uniform(0, h - hrg) - 1) - w_offset = int(np.random.uniform(0, w - wrg) - 1) - results = [] - for data in x: - results.append(data[h_offset:hrg + h_offset, w_offset:wrg + w_offset]) - return np.asarray(results) - else: - # central crop - h_offset = int(np.floor((h - hrg) / 2.)) - w_offset = int(np.floor((w - wrg) / 2.)) - results = [] - for data in x: - results.append(data[h_offset:h - h_offset, w_offset:w - w_offset]) - return np.asarray(results) - - -# flip -def flip_axis(x, axis=1, is_random=False): - """Flip the axis of an image, such as flip left and right, up and down, randomly or non-randomly, - - Parameters - ---------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - axis : int - Which axis to flip. - - 0, flip up and down - - 1, flip left and right - - 2, flip channel - is_random : boolean - If True, randomly flip. Default is False. - - Returns - ------- - numpy.array - A processed image. - - """ - if is_random: - factor = np.random.uniform(-1, 1) - if factor > 0: - x = np.asarray(x).swapaxes(axis, 0) - x = x[::-1, ...] - x = x.swapaxes(0, axis) - return x - else: - return x - else: - x = np.asarray(x).swapaxes(axis, 0) - x = x[::-1, ...] - x = x.swapaxes(0, axis) - return x - - -def flip_axis_multi(x, axis, is_random=False): - """Flip the axises of multiple images together, such as flip left and right, up and down, randomly or non-randomly, - - Parameters - ----------- - x : list of numpy.array - List of images with dimension of [n_images, row, col, channel] (default). - others : args - See ``tl.prepro.flip_axis``. - - Returns - ------- - numpy.array - A list of processed images. - - """ - if is_random: - factor = np.random.uniform(-1, 1) - if factor > 0: - # x = np.asarray(x).swapaxes(axis, 0) - # x = x[::-1, ...] - # x = x.swapaxes(0, axis) - # return x - results = [] - for data in x: - data = np.asarray(data).swapaxes(axis, 0) - data = data[::-1, ...] - data = data.swapaxes(0, axis) - results.append(data) - return np.asarray(results) - else: - return np.asarray(x) - else: - # x = np.asarray(x).swapaxes(axis, 0) - # x = x[::-1, ...] - # x = x.swapaxes(0, axis) - # return x - results = [] - for data in x: - data = np.asarray(data).swapaxes(axis, 0) - data = data[::-1, ...] - data = data.swapaxes(0, axis) - results.append(data) - return np.asarray(results) - - -# shift -def shift( - x, wrg=0.1, hrg=0.1, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., - order=1 -): - """Shift an image randomly or non-randomly. - - Parameters - ----------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - wrg : float - Percentage of shift in axis x, usually -0.25 ~ 0.25. - hrg : float - Percentage of shift in axis y, usually -0.25 ~ 0.25. - is_random : boolean - If True, randomly shift. Default is False. - row_index col_index and channel_index : int - Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0). - fill_mode : str - Method to fill missing pixel, default `nearest`, more options `constant`, `reflect` or `wrap`, see `scipy ndimage affine_transform `__ - cval : float - Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0. - order : int - The order of interpolation. The order has to be in the range 0-5. See ``tl.prepro.affine_transform`` and `scipy ndimage affine_transform `__ - - Returns - ------- - numpy.array - A processed image. - - """ - h, w = x.shape[row_index], x.shape[col_index] - if is_random: - tx = np.random.uniform(-hrg, hrg) * h - ty = np.random.uniform(-wrg, wrg) * w - else: - tx, ty = hrg * h, wrg * w - translation_matrix = np.array([[1, 0, tx], [0, 1, ty], [0, 0, 1]]) - - transform_matrix = translation_matrix # no need to do offset - x = affine_transform(x, transform_matrix, channel_index, fill_mode, cval, order) - return x - - -def shift_multi( - x, wrg=0.1, hrg=0.1, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., - order=1 -): - """Shift images with the same arguments, randomly or non-randomly. - Usually be used for image segmentation which x=[X, Y], X and Y should be matched. - - Parameters - ----------- - x : list of numpy.array - List of images with dimension of [n_images, row, col, channel] (default). - others : args - See ``tl.prepro.shift``. - - Returns - ------- - numpy.array - A list of processed images. - - """ - h, w = x[0].shape[row_index], x[0].shape[col_index] - if is_random: - tx = np.random.uniform(-hrg, hrg) * h - ty = np.random.uniform(-wrg, wrg) * w - else: - tx, ty = hrg * h, wrg * w - translation_matrix = np.array([[1, 0, tx], [0, 1, ty], [0, 0, 1]]) - - transform_matrix = translation_matrix # no need to do offset - results = [] - for data in x: - results.append(affine_transform(data, transform_matrix, channel_index, fill_mode, cval, order)) - return np.asarray(results) - - -# shear -def shear( - x, intensity=0.1, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., - order=1 -): - """Shear an image randomly or non-randomly. - - Parameters - ----------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - intensity : float - Percentage of shear, usually -0.5 ~ 0.5 (is_random==True), 0 ~ 0.5 (is_random==False), - you can have a quick try by shear(X, 1). - is_random : boolean - If True, randomly shear. Default is False. - row_index col_index and channel_index : int - Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0). - fill_mode : str - Method to fill missing pixel, default `nearest`, more options `constant`, `reflect` or `wrap`, see and `scipy ndimage affine_transform `__ - cval : float - Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0. - order : int - The order of interpolation. The order has to be in the range 0-5. See ``tl.prepro.affine_transform`` and `scipy ndimage affine_transform `__ - - Returns - ------- - numpy.array - A processed image. - - References - ----------- - - `Affine transformation `__ - - """ - if is_random: - shear = np.random.uniform(-intensity, intensity) - else: - shear = intensity - shear_matrix = np.array([[1, -np.sin(shear), 0], [0, np.cos(shear), 0], [0, 0, 1]]) - - h, w = x.shape[row_index], x.shape[col_index] - transform_matrix = transform_matrix_offset_center(shear_matrix, h, w) - x = affine_transform(x, transform_matrix, channel_index, fill_mode, cval, order) - return x - - -def shear_multi( - x, intensity=0.1, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., - order=1 -): - """Shear images with the same arguments, randomly or non-randomly. - Usually be used for image segmentation which x=[X, Y], X and Y should be matched. - - Parameters - ----------- - x : list of numpy.array - List of images with dimension of [n_images, row, col, channel] (default). - others : args - See ``tl.prepro.shear``. - - Returns - ------- - numpy.array - A list of processed images. - - """ - if is_random: - shear = np.random.uniform(-intensity, intensity) - else: - shear = intensity - shear_matrix = np.array([[1, -np.sin(shear), 0], [0, np.cos(shear), 0], [0, 0, 1]]) - - h, w = x[0].shape[row_index], x[0].shape[col_index] - transform_matrix = transform_matrix_offset_center(shear_matrix, h, w) - results = [] - for data in x: - results.append(affine_transform(data, transform_matrix, channel_index, fill_mode, cval, order)) - return np.asarray(results) - - -def shear2( - x, shear=(0.1, 0.1), is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., - order=1 -): - """Shear an image randomly or non-randomly. - - Parameters - ----------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - shear : tuple of two floats - Percentage of shear for height and width direction (0, 1). - is_random : boolean - If True, randomly shear. Default is False. - row_index col_index and channel_index : int - Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0). - fill_mode : str - Method to fill missing pixel, default `nearest`, more options `constant`, `reflect` or `wrap`, see `scipy ndimage affine_transform `__ - cval : float - Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0. - order : int - The order of interpolation. The order has to be in the range 0-5. See ``tl.prepro.affine_transform`` and `scipy ndimage affine_transform `__ - - Returns - ------- - numpy.array - A processed image. - - References - ----------- - - `Affine transformation `__ - - """ - if len(shear) != 2: - raise AssertionError( - "shear should be tuple of 2 floats, or you want to use tl.prepro.shear rather than tl.prepro.shear2 ?" - ) - if isinstance(shear, tuple): - shear = list(shear) - if is_random: - shear[0] = np.random.uniform(-shear[0], shear[0]) - shear[1] = np.random.uniform(-shear[1], shear[1]) - - shear_matrix = np.array([[1, shear[0], 0], \ - [shear[1], 1, 0], \ - [0, 0, 1]]) - - h, w = x.shape[row_index], x.shape[col_index] - transform_matrix = transform_matrix_offset_center(shear_matrix, h, w) - x = affine_transform(x, transform_matrix, channel_index, fill_mode, cval, order) - return x - - -def shear_multi2( - x, shear=(0.1, 0.1), is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., - order=1 -): - """Shear images with the same arguments, randomly or non-randomly. - Usually be used for image segmentation which x=[X, Y], X and Y should be matched. - - Parameters - ----------- - x : list of numpy.array - List of images with dimension of [n_images, row, col, channel] (default). - others : args - See ``tl.prepro.shear2``. - - Returns - ------- - numpy.array - A list of processed images. - - """ - if len(shear) != 2: - raise AssertionError( - "shear should be tuple of 2 floats, or you want to use tl.prepro.shear_multi rather than tl.prepro.shear_multi2 ?" - ) - if isinstance(shear, tuple): - shear = list(shear) - if is_random: - shear[0] = np.random.uniform(-shear[0], shear[0]) - shear[1] = np.random.uniform(-shear[1], shear[1]) - - shear_matrix = np.array([[1, shear[0], 0], [shear[1], 1, 0], [0, 0, 1]]) - - h, w = x[0].shape[row_index], x[0].shape[col_index] - transform_matrix = transform_matrix_offset_center(shear_matrix, h, w) - results = [] - for data in x: - results.append(affine_transform(data, transform_matrix, channel_index, fill_mode, cval, order)) - return np.asarray(results) - - -# swirl -def swirl( - x, center=None, strength=1, radius=100, rotation=0, output_shape=None, order=1, mode='constant', cval=0, - clip=True, preserve_range=False, is_random=False -): - """Swirl an image randomly or non-randomly, see `scikit-image swirl API `__ - and `example `__. - - Parameters - ----------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - center : tuple or 2 int or None - Center coordinate of transformation (optional). - strength : float - The amount of swirling applied. - radius : float - The extent of the swirl in pixels. The effect dies out rapidly beyond radius. - rotation : float - Additional rotation applied to the image, usually [0, 360], relates to center. - output_shape : tuple of 2 int or None - Shape of the output image generated (height, width). By default the shape of the input image is preserved. - order : int, optional - The order of the spline interpolation, default is 1. The order has to be in the range 0-5. See skimage.transform.warp for detail. - mode : str - One of `constant` (default), `edge`, `symmetric` `reflect` and `wrap`. - Points outside the boundaries of the input are filled according to the given mode, with `constant` used as the default. Modes match the behaviour of numpy.pad. - cval : float - Used in conjunction with mode `constant`, the value outside the image boundaries. - clip : boolean - Whether to clip the output to the range of values of the input image. This is enabled by default, since higher order interpolation may produce values outside the given input range. - preserve_range : boolean - Whether to keep the original range of values. Otherwise, the input image is converted according to the conventions of img_as_float. - is_random : boolean, - If True, random swirl. Default is False. - - random center = [(0 ~ x.shape[0]), (0 ~ x.shape[1])] - - random strength = [0, strength] - - random radius = [1e-10, radius] - - random rotation = [-rotation, rotation] - - Returns - ------- - numpy.array - A processed image. - - Examples - --------- - >>> x --> [row, col, 1] greyscale - >>> x = tl.prepro.swirl(x, strength=4, radius=100) - - """ - if radius == 0: - raise AssertionError("Invalid radius value") - - rotation = np.pi / 180 * rotation - if is_random: - center_h = int(np.random.uniform(0, x.shape[0])) - center_w = int(np.random.uniform(0, x.shape[1])) - center = (center_h, center_w) - strength = np.random.uniform(0, strength) - radius = np.random.uniform(1e-10, radius) - rotation = np.random.uniform(-rotation, rotation) - - max_v = np.max(x) - if max_v > 1: # Note: the input of this fn should be [-1, 1], rescale is required. - x = x / max_v - swirled = skimage.transform.swirl( - x, center=center, strength=strength, radius=radius, rotation=rotation, output_shape=output_shape, order=order, - mode=mode, cval=cval, clip=clip, preserve_range=preserve_range - ) - if max_v > 1: - swirled = swirled * max_v - return swirled - - -def swirl_multi( - x, center=None, strength=1, radius=100, rotation=0, output_shape=None, order=1, mode='constant', cval=0, - clip=True, preserve_range=False, is_random=False -): - """Swirl multiple images with the same arguments, randomly or non-randomly. - Usually be used for image segmentation which x=[X, Y], X and Y should be matched. - - Parameters - ----------- - x : list of numpy.array - List of images with dimension of [n_images, row, col, channel] (default). - others : args - See ``tl.prepro.swirl``. - - Returns - ------- - numpy.array - A list of processed images. - - """ - if radius == 0: - raise AssertionError("Invalid radius value") - - rotation = np.pi / 180 * rotation - if is_random: - center_h = int(np.random.uniform(0, x[0].shape[0])) - center_w = int(np.random.uniform(0, x[0].shape[1])) - center = (center_h, center_w) - strength = np.random.uniform(0, strength) - radius = np.random.uniform(1e-10, radius) - rotation = np.random.uniform(-rotation, rotation) - - results = [] - for data in x: - max_v = np.max(data) - if max_v > 1: # Note: the input of this fn should be [-1, 1], rescale is required. - data = data / max_v - swirled = skimage.transform.swirl( - data, center=center, strength=strength, radius=radius, rotation=rotation, output_shape=output_shape, - order=order, mode=mode, cval=cval, clip=clip, preserve_range=preserve_range - ) - if max_v > 1: - swirled = swirled * max_v - results.append(swirled) - return np.asarray(results) - - -# elastic_transform -def elastic_transform(x, alpha, sigma, mode="constant", cval=0, is_random=False): - """Elastic transformation for image as described in `[Simard2003] `__. - - Parameters - ----------- - x : numpy.array - A greyscale image. - alpha : float - Alpha value for elastic transformation. - sigma : float or sequence of float - The smaller the sigma, the more transformation. Standard deviation for Gaussian kernel. The standard deviations of the Gaussian filter are given for each axis as a sequence, or as a single number, in which case it is equal for all axes. - mode : str - See `scipy.ndimage.filters.gaussian_filter `__. Default is `constant`. - cval : float, - Used in conjunction with `mode` of `constant`, the value outside the image boundaries. - is_random : boolean - Default is False. - - Returns - ------- - numpy.array - A processed image. - - Examples - --------- - >>> x = tl.prepro.elastic_transform(x, alpha=x.shape[1]*3, sigma=x.shape[1]*0.07) - - References - ------------ - - `Github `__. - - `Kaggle `__ - - """ - if is_random is False: - random_state = np.random.RandomState(None) - else: - random_state = np.random.RandomState(int(time.time())) - # - is_3d = False - if len(x.shape) == 3 and x.shape[-1] == 1: - x = x[:, :, 0] - is_3d = True - elif len(x.shape) == 3 and x.shape[-1] != 1: - raise Exception("Only support greyscale image") - - if len(x.shape) != 2: - raise AssertionError("input should be grey-scale image") - - shape = x.shape - - dx = gaussian_filter((random_state.rand(*shape) * 2 - 1), sigma, mode=mode, cval=cval) * alpha - dy = gaussian_filter((random_state.rand(*shape) * 2 - 1), sigma, mode=mode, cval=cval) * alpha - - x_, y_ = np.meshgrid(np.arange(shape[0]), np.arange(shape[1]), indexing='ij') - indices = np.reshape(x_ + dx, (-1, 1)), np.reshape(y_ + dy, (-1, 1)) - if is_3d: - return map_coordinates(x, indices, order=1).reshape((shape[0], shape[1], 1)) - else: - return map_coordinates(x, indices, order=1).reshape(shape) - - -def elastic_transform_multi(x, alpha, sigma, mode="constant", cval=0, is_random=False): - """Elastic transformation for images as described in `[Simard2003] `__. - - Parameters - ----------- - x : list of numpy.array - List of greyscale images. - others : args - See ``tl.prepro.elastic_transform``. - - Returns - ------- - numpy.array - A list of processed images. - - """ - if is_random is False: - random_state = np.random.RandomState(None) - else: - random_state = np.random.RandomState(int(time.time())) - - shape = x[0].shape - if len(shape) == 3: - shape = (shape[0], shape[1]) - new_shape = random_state.rand(*shape) - - results = [] - for data in x: - is_3d = False - if len(data.shape) == 3 and data.shape[-1] == 1: - data = data[:, :, 0] - is_3d = True - elif len(data.shape) == 3 and data.shape[-1] != 1: - raise Exception("Only support greyscale image") - - if len(data.shape) != 2: - raise AssertionError("input should be grey-scale image") - - dx = gaussian_filter((new_shape * 2 - 1), sigma, mode=mode, cval=cval) * alpha - dy = gaussian_filter((new_shape * 2 - 1), sigma, mode=mode, cval=cval) * alpha - - x_, y_ = np.meshgrid(np.arange(shape[0]), np.arange(shape[1]), indexing='ij') - indices = np.reshape(x_ + dx, (-1, 1)), np.reshape(y_ + dy, (-1, 1)) - # tl.logging.info(data.shape) - if is_3d: - results.append(map_coordinates(data, indices, order=1).reshape((shape[0], shape[1], 1))) - else: - results.append(map_coordinates(data, indices, order=1).reshape(shape)) - return np.asarray(results) - - -# zoom -def zoom(x, zoom_range=(0.9, 1.1), row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1): - """Zooming/Scaling a single image that height and width are changed together. - - Parameters - ----------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - zoom_range : float or tuple of 2 floats - The zooming/scaling ratio, greater than 1 means larger. - - float, a fixed ratio. - - tuple of 2 floats, randomly sample a value as the ratio between 2 values. - row_index col_index and channel_index : int - Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0). - fill_mode : str - Method to fill missing pixel, default `nearest`, more options `constant`, `reflect` or `wrap`, see `scipy ndimage affine_transform `__ - cval : float - Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0. - order : int - The order of interpolation. The order has to be in the range 0-5. See ``tl.prepro.affine_transform`` and `scipy ndimage affine_transform `__ - - Returns - ------- - numpy.array - A processed image. - - """ - zoom_matrix = affine_zoom_matrix(zoom_range=zoom_range) - h, w = x.shape[row_index], x.shape[col_index] - transform_matrix = transform_matrix_offset_center(zoom_matrix, h, w) - x = affine_transform(x, transform_matrix, channel_index, fill_mode, cval, order) - return x - - -def respective_zoom( - x, h_range=(0.9, 1.1), w_range=(0.9, 1.1), row_index=0, col_index=1, channel_index=2, fill_mode='nearest', - cval=0., order=1 -): - """Zooming/Scaling a single image that height and width are changed independently. - - Parameters - ----------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - h_range : float or tuple of 2 floats - The zooming/scaling ratio of height, greater than 1 means larger. - - float, a fixed ratio. - - tuple of 2 floats, randomly sample a value as the ratio between 2 values. - w_range : float or tuple of 2 floats - The zooming/scaling ratio of width, greater than 1 means larger. - - float, a fixed ratio. - - tuple of 2 floats, randomly sample a value as the ratio between 2 values. - row_index col_index and channel_index : int - Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0). - fill_mode : str - Method to fill missing pixel, default `nearest`, more options `constant`, `reflect` or `wrap`, see `scipy ndimage affine_transform `__ - cval : float - Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0. - order : int - The order of interpolation. The order has to be in the range 0-5. See ``tl.prepro.affine_transform`` and `scipy ndimage affine_transform `__ - - Returns - ------- - numpy.array - A processed image. - - """ - zoom_matrix = affine_respective_zoom_matrix(h_range=h_range, w_range=w_range) - h, w = x.shape[row_index], x.shape[col_index] - transform_matrix = transform_matrix_offset_center(zoom_matrix, h, w) - x = affine_transform(x, transform_matrix, channel_index, fill_mode, cval, order) - return x - - -def zoom_multi( - x, zoom_range=(0.9, 1.1), is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', - cval=0., order=1 -): - """Zoom in and out of images with the same arguments, randomly or non-randomly. - Usually be used for image segmentation which x=[X, Y], X and Y should be matched. - - Parameters - ----------- - x : list of numpy.array - List of images with dimension of [n_images, row, col, channel] (default). - others : args - See ``tl.prepro.zoom``. - - Returns - ------- - numpy.array - A list of processed images. - - """ - if len(zoom_range) != 2: - raise Exception('zoom_range should be a tuple or list of two floats. ' 'Received arg: ', zoom_range) - - if is_random: - if zoom_range[0] == 1 and zoom_range[1] == 1: - zx, zy = 1, 1 - tl.logging.info(" random_zoom : not zoom in/out") - else: - zx, zy = np.random.uniform(zoom_range[0], zoom_range[1], 2) - else: - zx, zy = zoom_range - - zoom_matrix = np.array([[zx, 0, 0], [0, zy, 0], [0, 0, 1]]) - - h, w = x[0].shape[row_index], x[0].shape[col_index] - transform_matrix = transform_matrix_offset_center(zoom_matrix, h, w) - # x = affine_transform(x, transform_matrix, channel_index, fill_mode, cval) - # return x - results = [] - for data in x: - results.append(affine_transform(data, transform_matrix, channel_index, fill_mode, cval, order)) - return np.asarray(results) - - -# image = tf.image.random_brightness(image, max_delta=32. / 255.) -# image = tf.image.random_saturation(image, lower=0.5, upper=1.5) -# image = tf.image.random_hue(image, max_delta=0.032) -# image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - - -def brightness(x, gamma=1, gain=1, is_random=False): - """Change the brightness of a single image, randomly or non-randomly. - - Parameters - ----------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - gamma : float - Non negative real number. Default value is 1. - - Small than 1 means brighter. - - If `is_random` is True, gamma in a range of (1-gamma, 1+gamma). - gain : float - The constant multiplier. Default value is 1. - is_random : boolean - If True, randomly change brightness. Default is False. - - Returns - ------- - numpy.array - A processed image. - - References - ----------- - - `skimage.exposure.adjust_gamma `__ - - `chinese blog `__ - - """ - if is_random: - gamma = np.random.uniform(1 - gamma, 1 + gamma) - x = exposure.adjust_gamma(x, gamma, gain) - return x - - -def brightness_multi(x, gamma=1, gain=1, is_random=False): - """Change the brightness of multiply images, randomly or non-randomly. - Usually be used for image segmentation which x=[X, Y], X and Y should be matched. - - Parameters - ----------- - x : list of numpyarray - List of images with dimension of [n_images, row, col, channel] (default). - others : args - See ``tl.prepro.brightness``. - - Returns - ------- - numpy.array - A list of processed images. - - """ - if is_random: - gamma = np.random.uniform(1 - gamma, 1 + gamma) - - results = [] - for data in x: - results.append(exposure.adjust_gamma(data, gamma, gain)) - return np.asarray(results) - - -def illumination(x, gamma=1., contrast=1., saturation=1., is_random=False): - """Perform illumination augmentation for a single image, randomly or non-randomly. - - Parameters - ----------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - gamma : float - Change brightness (the same with ``tl.prepro.brightness``) - - if is_random=False, one float number, small than one means brighter, greater than one means darker. - - if is_random=True, tuple of two float numbers, (min, max). - contrast : float - Change contrast. - - if is_random=False, one float number, small than one means blur. - - if is_random=True, tuple of two float numbers, (min, max). - saturation : float - Change saturation. - - if is_random=False, one float number, small than one means unsaturation. - - if is_random=True, tuple of two float numbers, (min, max). - is_random : boolean - If True, randomly change illumination. Default is False. - - Returns - ------- - numpy.array - A processed image. - - Examples - --------- - Random - - >>> x = tl.prepro.illumination(x, gamma=(0.5, 5.0), contrast=(0.3, 1.0), saturation=(0.7, 1.0), is_random=True) - - Non-random - - >>> x = tl.prepro.illumination(x, 0.5, 0.6, 0.8, is_random=False) - - """ - if is_random: - if not (len(gamma) == len(contrast) == len(saturation) == 2): - raise AssertionError("if is_random = True, the arguments are (min, max)") - - ## random change brightness # small --> brighter - illum_settings = np.random.randint(0, 3) # 0-brighter, 1-darker, 2 keep normal - - if illum_settings == 0: # brighter - gamma = np.random.uniform(gamma[0], 1.0) # (.5, 1.0) - elif illum_settings == 1: # darker - gamma = np.random.uniform(1.0, gamma[1]) # (1.0, 5.0) - else: - gamma = 1 - im_ = brightness(x, gamma=gamma, gain=1, is_random=False) - - # tl.logging.info("using contrast and saturation") - image = PIL.Image.fromarray(im_) # array -> PIL - contrast_adjust = PIL.ImageEnhance.Contrast(image) - image = contrast_adjust.enhance(np.random.uniform(contrast[0], contrast[1])) #0.3,0.9)) - - saturation_adjust = PIL.ImageEnhance.Color(image) - image = saturation_adjust.enhance(np.random.uniform(saturation[0], saturation[1])) # (0.7,1.0)) - im_ = np.array(image) # PIL -> array - else: - im_ = brightness(x, gamma=gamma, gain=1, is_random=False) - image = PIL.Image.fromarray(im_) # array -> PIL - contrast_adjust = PIL.ImageEnhance.Contrast(image) - image = contrast_adjust.enhance(contrast) - - saturation_adjust = PIL.ImageEnhance.Color(image) - image = saturation_adjust.enhance(saturation) - im_ = np.array(image) # PIL -> array - return np.asarray(im_) - - -def rgb_to_hsv(rgb): - """Input RGB image [0~255] return HSV image [0~1]. - - Parameters - ------------ - rgb : numpy.array - An image with values between 0 and 255. - - Returns - ------- - numpy.array - A processed image. - - """ - # Translated from source of colorsys.rgb_to_hsv - # r,g,b should be a numpy arrays with values between 0 and 255 - # rgb_to_hsv returns an array of floats between 0.0 and 1.0. - rgb = rgb.astype('float') - hsv = np.zeros_like(rgb) - # in case an RGBA array was passed, just copy the A channel - hsv[..., 3:] = rgb[..., 3:] - r, g, b = rgb[..., 0], rgb[..., 1], rgb[..., 2] - maxc = np.max(rgb[..., :3], axis=-1) - minc = np.min(rgb[..., :3], axis=-1) - hsv[..., 2] = maxc - mask = maxc != minc - hsv[mask, 1] = (maxc - minc)[mask] / maxc[mask] - rc = np.zeros_like(r) - gc = np.zeros_like(g) - bc = np.zeros_like(b) - rc[mask] = (maxc - r)[mask] / (maxc - minc)[mask] - gc[mask] = (maxc - g)[mask] / (maxc - minc)[mask] - bc[mask] = (maxc - b)[mask] / (maxc - minc)[mask] - hsv[..., 0] = np.select([r == maxc, g == maxc], [bc - gc, 2.0 + rc - bc], default=4.0 + gc - rc) - hsv[..., 0] = (hsv[..., 0] / 6.0) % 1.0 - return hsv - - -def hsv_to_rgb(hsv): - """Input HSV image [0~1] return RGB image [0~255]. - - Parameters - ------------- - hsv : numpy.array - An image with values between 0.0 and 1.0 - - Returns - ------- - numpy.array - A processed image. - """ - # Translated from source of colorsys.hsv_to_rgb - # h,s should be a numpy arrays with values between 0.0 and 1.0 - # v should be a numpy array with values between 0.0 and 255.0 - # hsv_to_rgb returns an array of uints between 0 and 255. - rgb = np.empty_like(hsv) - rgb[..., 3:] = hsv[..., 3:] - h, s, v = hsv[..., 0], hsv[..., 1], hsv[..., 2] - i = (h * 6.0).astype('uint8') - f = (h * 6.0) - i - p = v * (1.0 - s) - q = v * (1.0 - s * f) - t = v * (1.0 - s * (1.0 - f)) - i = i % 6 - conditions = [s == 0.0, i == 1, i == 2, i == 3, i == 4, i == 5] - rgb[..., 0] = np.select(conditions, [v, q, p, p, t, v], default=v) - rgb[..., 1] = np.select(conditions, [v, v, v, q, p, p], default=t) - rgb[..., 2] = np.select(conditions, [v, p, t, v, v, q], default=p) - return rgb.astype('uint8') - - -def adjust_hue(im, hout=0.66, is_offset=True, is_clip=True, is_random=False): - """Adjust hue of an RGB image. - - This is a convenience method that converts an RGB image to float representation, converts it to HSV, add an offset to the hue channel, converts back to RGB and then back to the original data type. - For TF, see `tf.image.adjust_hue `__.and `tf.image.random_hue `__. - - Parameters - ----------- - im : numpy.array - An image with values between 0 and 255. - hout : float - The scale value for adjusting hue. - - If is_offset is False, set all hue values to this value. 0 is red; 0.33 is green; 0.66 is blue. - - If is_offset is True, add this value as the offset to the hue channel. - is_offset : boolean - Whether `hout` is added on HSV as offset or not. Default is True. - is_clip : boolean - If HSV value smaller than 0, set to 0. Default is True. - is_random : boolean - If True, randomly change hue. Default is False. - - Returns - ------- - numpy.array - A processed image. - - Examples - --------- - Random, add a random value between -0.2 and 0.2 as the offset to every hue values. - - >>> im_hue = tl.prepro.adjust_hue(image, hout=0.2, is_offset=True, is_random=False) - - Non-random, make all hue to green. - - >>> im_green = tl.prepro.adjust_hue(image, hout=0.66, is_offset=False, is_random=False) - - References - ----------- - - `tf.image.random_hue `__. - - `tf.image.adjust_hue `__. - - `StackOverflow: Changing image hue with python PIL `__. - - """ - hsv = rgb_to_hsv(im) - if is_random: - hout = np.random.uniform(-hout, hout) - - if is_offset: - hsv[..., 0] += hout - else: - hsv[..., 0] = hout - - if is_clip: - hsv[..., 0] = np.clip(hsv[..., 0], 0, np.inf) # Hao : can remove green dots - - rgb = hsv_to_rgb(hsv) - return rgb - - -# # contrast -# def constant(x, cutoff=0.5, gain=10, inv=False, is_random=False): -# # TODO -# x = exposure.adjust_sigmoid(x, cutoff=cutoff, gain=gain, inv=inv) -# return x -# -# def constant_multi(): -# #TODO -# pass - - -def imresize(x, size=None, interp='bicubic', mode=None): - """Resize an image by given output size and method. - - Warning, this function will rescale the value to [0, 255]. - - Parameters - ----------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - size : list of 2 int or None - For height and width. - interp : str - Interpolation method for re-sizing (`nearest`, `lanczos`, `bilinear`, `bicubic` (default) or `cubic`). - mode : str - The PIL image mode (`P`, `L`, etc.) to convert arr before resizing. - - Returns - ------- - numpy.array - A processed image. - - References - ------------ - - `scipy.misc.imresize `__ - - """ - if size is None: - size = [100, 100] - - if x.shape[-1] == 1: - # greyscale - x = scipy.misc.imresize(x[:, :, 0], size, interp=interp, mode=mode) - return x[:, :, np.newaxis] - elif x.shape[-1] == 3: - # rgb, bgr .. - return scipy.misc.imresize(x, size, interp=interp, mode=mode) - else: - raise Exception("Unsupported channel %d" % x.shape[-1]) - - -# value scale -def pixel_value_scale(im, val=0.9, clip=None, is_random=False): - """Scales each value in the pixels of the image. - - Parameters - ----------- - im : numpy.array - An image. - val : float - The scale value for changing pixel value. - - If is_random=False, multiply this value with all pixels. - - If is_random=True, multiply a value between [1-val, 1+val] with all pixels. - clip : tuple of 2 numbers - The minimum and maximum value. - is_random : boolean - If True, see ``val``. - - Returns - ------- - numpy.array - A processed image. - - Examples - ---------- - Random - - >>> im = pixel_value_scale(im, 0.1, [0, 255], is_random=True) - - Non-random - - >>> im = pixel_value_scale(im, 0.9, [0, 255], is_random=False) - - """ - - clip = clip if clip is not None else (-np.inf, np.inf) - - if is_random: - scale = 1 + np.random.uniform(-val, val) - im = im * scale - else: - im = im * val - - if len(clip) == 2: - im = np.clip(im, clip[0], clip[1]) - else: - raise Exception("clip : tuple of 2 numbers") - - return im - - -# normailization -def samplewise_norm( - x, rescale=None, samplewise_center=False, samplewise_std_normalization=False, channel_index=2, epsilon=1e-7 -): - """Normalize an image by rescale, samplewise centering and samplewise centering in order. - - Parameters - ----------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - rescale : float - Rescaling factor. If None or 0, no rescaling is applied, otherwise we multiply the data by the value provided (before applying any other transformation) - samplewise_center : boolean - If True, set each sample mean to 0. - samplewise_std_normalization : boolean - If True, divide each input by its std. - epsilon : float - A small position value for dividing standard deviation. - - Returns - ------- - numpy.array - A processed image. - - Examples - -------- - >>> x = samplewise_norm(x, samplewise_center=True, samplewise_std_normalization=True) - >>> print(x.shape, np.mean(x), np.std(x)) - (160, 176, 1), 0.0, 1.0 - - Notes - ------ - When samplewise_center and samplewise_std_normalization are True. - - For greyscale image, every pixels are subtracted and divided by the mean and std of whole image. - - For RGB image, every pixels are subtracted and divided by the mean and std of this pixel i.e. the mean and std of a pixel is 0 and 1. - - """ - if rescale: - x *= rescale - - if x.shape[channel_index] == 1: - # greyscale - if samplewise_center: - x = x - np.mean(x) - if samplewise_std_normalization: - x = x / np.std(x) - return x - elif x.shape[channel_index] == 3: - # rgb - if samplewise_center: - x = x - np.mean(x, axis=channel_index, keepdims=True) - if samplewise_std_normalization: - x = x / (np.std(x, axis=channel_index, keepdims=True) + epsilon) - return x - else: - raise Exception("Unsupported channels %d" % x.shape[channel_index]) - - -def featurewise_norm(x, mean=None, std=None, epsilon=1e-7): - """Normalize every pixels by the same given mean and std, which are usually - compute from all examples. - - Parameters - ----------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - mean : float - Value for subtraction. - std : float - Value for division. - epsilon : float - A small position value for dividing standard deviation. - - Returns - ------- - numpy.array - A processed image. - - """ - if mean: - x = x - mean - if std: - x = x / (std + epsilon) - return x - - -# whitening -def get_zca_whitening_principal_components_img(X): - """Return the ZCA whitening principal components matrix. - - Parameters - ----------- - x : numpy.array - Batch of images with dimension of [n_example, row, col, channel] (default). - - Returns - ------- - numpy.array - A processed image. - - """ - flatX = np.reshape(X, (X.shape[0], X.shape[1] * X.shape[2] * X.shape[3])) - tl.logging.info("zca : computing sigma ..") - sigma = np.dot(flatX.T, flatX) / flatX.shape[0] - tl.logging.info("zca : computing U, S and V ..") - U, S, _ = linalg.svd(sigma) # USV - tl.logging.info("zca : computing principal components ..") - principal_components = np.dot(np.dot(U, np.diag(1. / np.sqrt(S + 10e-7))), U.T) - return principal_components - - -def zca_whitening(x, principal_components): - """Apply ZCA whitening on an image by given principal components matrix. - - Parameters - ----------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - principal_components : matrix - Matrix from ``get_zca_whitening_principal_components_img``. - - Returns - ------- - numpy.array - A processed image. - - """ - flatx = np.reshape(x, (x.size)) - # tl.logging.info(principal_components.shape, x.shape) # ((28160, 28160), (160, 176, 1)) - # flatx = np.reshape(x, (x.shape)) - # flatx = np.reshape(x, (x.shape[0], )) - # tl.logging.info(flatx.shape) # (160, 176, 1) - whitex = np.dot(flatx, principal_components) - x = np.reshape(whitex, (x.shape[0], x.shape[1], x.shape[2])) - return x - - -# developing -# def barrel_transform(x, intensity): -# # https://github.com/fchollet/keras/blob/master/keras/preprocessing/image.py -# # TODO -# pass -# -# def barrel_transform_multi(x, intensity): -# # https://github.com/fchollet/keras/blob/master/keras/preprocessing/image.py -# # TODO -# pass - - -# channel shift -def channel_shift(x, intensity, is_random=False, channel_index=2): - """Shift the channels of an image, randomly or non-randomly, see `numpy.rollaxis `__. - - Parameters - ----------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - intensity : float - Intensity of shifting. - is_random : boolean - If True, randomly shift. Default is False. - channel_index : int - Index of channel. Default is 2. - - Returns - ------- - numpy.array - A processed image. - - """ - if is_random: - factor = np.random.uniform(-intensity, intensity) - else: - factor = intensity - x = np.rollaxis(x, channel_index, 0) - min_x, max_x = np.min(x), np.max(x) - channel_images = [np.clip(x_channel + factor, min_x, max_x) for x_channel in x] - x = np.stack(channel_images, axis=0) - x = np.rollaxis(x, 0, channel_index + 1) - return x - # x = np.rollaxis(x, channel_index, 0) - # min_x, max_x = np.min(x), np.max(x) - # channel_images = [np.clip(x_channel + np.random.uniform(-intensity, intensity), min_x, max_x) - # for x_channel in x] - # x = np.stack(channel_images, axis=0) - # x = np.rollaxis(x, 0, channel_index+1) - # return x - - -def channel_shift_multi(x, intensity, is_random=False, channel_index=2): - """Shift the channels of images with the same arguments, randomly or non-randomly, see `numpy.rollaxis `__. - Usually be used for image segmentation which x=[X, Y], X and Y should be matched. - - Parameters - ----------- - x : list of numpy.array - List of images with dimension of [n_images, row, col, channel] (default). - others : args - See ``tl.prepro.channel_shift``. - - Returns - ------- - numpy.array - A list of processed images. - - """ - if is_random: - factor = np.random.uniform(-intensity, intensity) - else: - factor = intensity - - results = [] - for data in x: - data = np.rollaxis(data, channel_index, 0) - min_x, max_x = np.min(data), np.max(data) - channel_images = [np.clip(x_channel + factor, min_x, max_x) for x_channel in x] - data = np.stack(channel_images, axis=0) - data = np.rollaxis(x, 0, channel_index + 1) - results.append(data) - return np.asarray(results) - - -# noise -def drop(x, keep=0.5): - """Randomly set some pixels to zero by a given keeping probability. - - Parameters - ----------- - x : numpy.array - An image with dimension of [row, col, channel] or [row, col]. - keep : float - The keeping probability (0, 1), the lower more values will be set to zero. - - Returns - ------- - numpy.array - A processed image. - - """ - if len(x.shape) == 3: - if x.shape[-1] == 3: # color - img_size = x.shape - mask = np.random.binomial(n=1, p=keep, size=x.shape[:-1]) - for i in range(3): - x[:, :, i] = np.multiply(x[:, :, i], mask) - elif x.shape[-1] == 1: # greyscale image - img_size = x.shape - x = np.multiply(x, np.random.binomial(n=1, p=keep, size=img_size)) - else: - raise Exception("Unsupported shape {}".format(x.shape)) - elif len(x.shape) == 2 or 1: # greyscale matrix (image) or vector - img_size = x.shape - x = np.multiply(x, np.random.binomial(n=1, p=keep, size=img_size)) - else: - raise Exception("Unsupported shape {}".format(x.shape)) - return x - - -# x = np.asarray([[1,2,3,4,5,6,7,8,9,10],[1,2,3,4,5,6,7,8,9,10]]) -# x = np.asarray([x,x,x,x,x,x]) -# x.shape = 10, 4, 3 -# tl.logging.info(x) -# # exit() -# tl.logging.info(x.shape) -# # exit() -# tl.logging.info(drop(x, keep=1.)) -# exit() - - -# Numpy and PIL -def array_to_img(x, dim_ordering=(0, 1, 2), scale=True): - """Converts a numpy array to PIL image object (uint8 format). - - Parameters - ---------- - x : numpy.array - An image with dimension of 3 and channels of 1 or 3. - dim_ordering : tuple of 3 int - Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0). - scale : boolean - If True, converts image to [0, 255] from any range of value like [-1, 2]. Default is True. - - Returns - ------- - PIL.image - An image. - - References - ----------- - `PIL Image.fromarray `__ - - """ - # if dim_ordering == 'default': - # dim_ordering = K.image_dim_ordering() - # if dim_ordering == 'th': # theano - # x = x.transpose(1, 2, 0) - - x = x.transpose(dim_ordering) - - if scale: - x += max(-np.min(x), 0) - x_max = np.max(x) - if x_max != 0: - # tl.logging.info(x_max) - # x /= x_max - x = x / x_max - x *= 255 - - if x.shape[2] == 3: - # RGB - return PIL.Image.fromarray(x.astype('uint8'), 'RGB') - - elif x.shape[2] == 1: - # grayscale - return PIL.Image.fromarray(x[:, :, 0].astype('uint8'), 'L') - - else: - raise Exception('Unsupported channel number: ', x.shape[2]) - - -def find_contours(x, level=0.8, fully_connected='low', positive_orientation='low'): - """Find iso-valued contours in a 2D array for a given level value, returns list of (n, 2)-ndarrays - see `skimage.measure.find_contours `__. - - Parameters - ------------ - x : 2D ndarray of double. - Input data in which to find contours. - level : float - Value along which to find contours in the array. - fully_connected : str - Either `low` or `high`. Indicates whether array elements below the given level value are to be considered fully-connected (and hence elements above the value will only be face connected), or vice-versa. (See notes below for details.) - positive_orientation : str - Either `low` or `high`. Indicates whether the output contours will produce positively-oriented polygons around islands of low- or high-valued elements. If `low` then contours will wind counter-clockwise around elements below the iso-value. Alternately, this means that low-valued elements are always on the left of the contour. - - Returns - -------- - list of (n,2)-ndarrays - Each contour is an ndarray of shape (n, 2), consisting of n (row, column) coordinates along the contour. - - """ - return skimage.measure.find_contours( - x, level, fully_connected=fully_connected, positive_orientation=positive_orientation - ) - - -def pt2map(list_points=None, size=(100, 100), val=1): - """Inputs a list of points, return a 2D image. - - Parameters - -------------- - list_points : list of 2 int - [[x, y], [x, y]..] for point coordinates. - size : tuple of 2 int - (w, h) for output size. - val : float or int - For the contour value. - - Returns - ------- - numpy.array - An image. - - """ - if list_points is None: - raise Exception("list_points : list of 2 int") - i_m = np.zeros(size) - if len(list_points) == 0: - return i_m - for xx in list_points: - for x in xx: - # tl.logging.info(x) - i_m[int(np.round(x[0]))][int(np.round(x[1]))] = val - return i_m - - -def binary_dilation(x, radius=3): - """Return fast binary morphological dilation of an image. - see `skimage.morphology.binary_dilation `__. - - Parameters - ----------- - x : 2D array - A binary image. - radius : int - For the radius of mask. - - Returns - ------- - numpy.array - A processed binary image. - - """ - mask = disk(radius) - x = _binary_dilation(x, selem=mask) - - return x - - -def dilation(x, radius=3): - """Return greyscale morphological dilation of an image, - see `skimage.morphology.dilation `__. - - Parameters - ----------- - x : 2D array - An greyscale image. - radius : int - For the radius of mask. - - Returns - ------- - numpy.array - A processed greyscale image. - - """ - mask = disk(radius) - x = dilation(x, selem=mask) - - return x - - -def binary_erosion(x, radius=3): - """Return binary morphological erosion of an image, - see `skimage.morphology.binary_erosion `__. - - Parameters - ----------- - x : 2D array - A binary image. - radius : int - For the radius of mask. - - Returns - ------- - numpy.array - A processed binary image. - - """ - mask = disk(radius) - x = _binary_erosion(x, selem=mask) - return x - - -def erosion(x, radius=3): - """Return greyscale morphological erosion of an image, - see `skimage.morphology.erosion `__. - - Parameters - ----------- - x : 2D array - A greyscale image. - radius : int - For the radius of mask. - - Returns - ------- - numpy.array - A processed greyscale image. - - """ - mask = disk(radius) - x = _erosion(x, selem=mask) - return x - - -def obj_box_coords_rescale(coords=None, shape=None): - """Scale down a list of coordinates from pixel unit to the ratio of image size i.e. in the range of [0, 1]. - - Parameters - ------------ - coords : list of list of 4 ints or None - For coordinates of more than one images .e.g.[[x, y, w, h], [x, y, w, h], ...]. - shape : list of 2 int or None - 【height, width]. - - Returns - ------- - list of list of 4 numbers - A list of new bounding boxes. - - - Examples - --------- - >>> coords = obj_box_coords_rescale(coords=[[30, 40, 50, 50], [10, 10, 20, 20]], shape=[100, 100]) - >>> print(coords) - [[0.3, 0.4, 0.5, 0.5], [0.1, 0.1, 0.2, 0.2]] - >>> coords = obj_box_coords_rescale(coords=[[30, 40, 50, 50]], shape=[50, 100]) - >>> print(coords) - [[0.3, 0.8, 0.5, 1.0]] - >>> coords = obj_box_coords_rescale(coords=[[30, 40, 50, 50]], shape=[100, 200]) - >>> print(coords) - [[0.15, 0.4, 0.25, 0.5]] - - Returns - ------- - list of 4 numbers - New coordinates. - - """ - if coords is None: - coords = [] - if shape is None: - shape = [100, 200] - - imh, imw = shape[0], shape[1] - imh = imh * 1.0 # * 1.0 for python2 : force division to be float point - imw = imw * 1.0 - coords_new = list() - for coord in coords: - - if len(coord) != 4: - raise AssertionError("coordinate should be 4 values : [x, y, w, h]") - - x = coord[0] / imw - y = coord[1] / imh - w = coord[2] / imw - h = coord[3] / imh - coords_new.append([x, y, w, h]) - return coords_new - - -def obj_box_coord_rescale(coord=None, shape=None): - """Scale down one coordinates from pixel unit to the ratio of image size i.e. in the range of [0, 1]. - It is the reverse process of ``obj_box_coord_scale_to_pixelunit``. - - Parameters - ------------ - coords : list of 4 int or None - One coordinates of one image e.g. [x, y, w, h]. - shape : list of 2 int or None - For [height, width]. - - Returns - ------- - list of 4 numbers - New bounding box. - - Examples - --------- - >>> coord = tl.prepro.obj_box_coord_rescale(coord=[30, 40, 50, 50], shape=[100, 100]) - [0.3, 0.4, 0.5, 0.5] - - """ - if coord is None: - coord = [] - if shape is None: - shape = [100, 200] - - return obj_box_coords_rescale(coords=[coord], shape=shape)[0] - - -def obj_box_coord_scale_to_pixelunit(coord, shape=None): - """Convert one coordinate [x, y, w (or x2), h (or y2)] in ratio format to image coordinate format. - It is the reverse process of ``obj_box_coord_rescale``. - - Parameters - ----------- - coord : list of 4 float - One coordinate of one image [x, y, w (or x2), h (or y2)] in ratio format, i.e value range [0~1]. - shape : tuple of 2 or None - For [height, width]. - - Returns - ------- - list of 4 numbers - New bounding box. - - Examples - --------- - >>> x, y, x2, y2 = tl.prepro.obj_box_coord_scale_to_pixelunit([0.2, 0.3, 0.5, 0.7], shape=(100, 200, 3)) - [40, 30, 100, 70] - - """ - if shape is None: - shape = [100, 100] - - imh, imw = shape[0:2] - x = int(coord[0] * imw) - x2 = int(coord[2] * imw) - y = int(coord[1] * imh) - y2 = int(coord[3] * imh) - return [x, y, x2, y2] - - -# coords = obj_box_coords_rescale(coords=[[30, 40, 50, 50], [10, 10, 20, 20]], shape=[100, 100]) -# tl.logging.info(coords) -# # [[0.3, 0.4, 0.5, 0.5], [0.1, 0.1, 0.2, 0.2]] -# coords = obj_box_coords_rescale(coords=[[30, 40, 50, 50]], shape=[50, 100]) -# tl.logging.info(coords) -# # [[0.3, 0.8, 0.5, 1.0]] -# coords = obj_box_coords_rescale(coords=[[30, 40, 50, 50]], shape=[100, 200]) -# tl.logging.info(coords) -# # [[0.15, 0.4, 0.25, 0.5]] -# exit() - - -def obj_box_coord_centroid_to_upleft_butright(coord, to_int=False): - """Convert one coordinate [x_center, y_center, w, h] to [x1, y1, x2, y2] in up-left and botton-right format. - - Parameters - ------------ - coord : list of 4 int/float - One coordinate. - to_int : boolean - Whether to convert output as integer. - - Returns - ------- - list of 4 numbers - New bounding box. - - Examples - --------- - >>> coord = obj_box_coord_centroid_to_upleft_butright([30, 40, 20, 20]) - [20, 30, 40, 50] - - """ - if len(coord) != 4: - raise AssertionError("coordinate should be 4 values : [x, y, w, h]") - - x_center, y_center, w, h = coord - x = x_center - w / 2. - y = y_center - h / 2. - x2 = x + w - y2 = y + h - if to_int: - return [int(x), int(y), int(x2), int(y2)] - else: - return [x, y, x2, y2] - - -# coord = obj_box_coord_centroid_to_upleft_butright([30, 40, 20, 20]) -# tl.logging.info(coord) [20, 30, 40, 50] -# exit() - - -def obj_box_coord_upleft_butright_to_centroid(coord): - """Convert one coordinate [x1, y1, x2, y2] to [x_center, y_center, w, h]. - It is the reverse process of ``obj_box_coord_centroid_to_upleft_butright``. - - Parameters - ------------ - coord : list of 4 int/float - One coordinate. - - Returns - ------- - list of 4 numbers - New bounding box. - - """ - if len(coord) != 4: - raise AssertionError("coordinate should be 4 values : [x1, y1, x2, y2]") - x1, y1, x2, y2 = coord - w = x2 - x1 - h = y2 - y1 - x_c = x1 + w / 2. - y_c = y1 + h / 2. - return [x_c, y_c, w, h] - - -def obj_box_coord_centroid_to_upleft(coord): - """Convert one coordinate [x_center, y_center, w, h] to [x, y, w, h]. - It is the reverse process of ``obj_box_coord_upleft_to_centroid``. - - Parameters - ------------ - coord : list of 4 int/float - One coordinate. - - Returns - ------- - list of 4 numbers - New bounding box. - - """ - if len(coord) != 4: - raise AssertionError("coordinate should be 4 values : [x, y, w, h]") - - x_center, y_center, w, h = coord - x = x_center - w / 2. - y = y_center - h / 2. - return [x, y, w, h] - - -def obj_box_coord_upleft_to_centroid(coord): - """Convert one coordinate [x, y, w, h] to [x_center, y_center, w, h]. - It is the reverse process of ``obj_box_coord_centroid_to_upleft``. - - Parameters - ------------ - coord : list of 4 int/float - One coordinate. - - Returns - ------- - list of 4 numbers - New bounding box. - - """ - if len(coord) != 4: - raise AssertionError("coordinate should be 4 values : [x, y, w, h]") - - x, y, w, h = coord - x_center = x + w / 2. - y_center = y + h / 2. - return [x_center, y_center, w, h] - - -def parse_darknet_ann_str_to_list(annotations): - r"""Input string format of class, x, y, w, h, return list of list format. - - Parameters - ----------- - annotations : str - The annotations in darkent format "class, x, y, w, h ...." seperated by "\\n". - - Returns - ------- - list of list of 4 numbers - List of bounding box. - - """ - annotations = annotations.split("\n") - ann = [] - for a in annotations: - a = a.split() - if len(a) == 5: - for i, _v in enumerate(a): - if i == 0: - a[i] = int(a[i]) - else: - a[i] = float(a[i]) - ann.append(a) - return ann - - -def parse_darknet_ann_list_to_cls_box(annotations): - """Parse darknet annotation format into two lists for class and bounding box. - - Input list of [[class, x, y, w, h], ...], return two list of [class ...] and [[x, y, w, h], ...]. - - Parameters - ------------ - annotations : list of list - A list of class and bounding boxes of images e.g. [[class, x, y, w, h], ...] - - Returns - ------- - list of int - List of class labels. - - list of list of 4 numbers - List of bounding box. - - """ - class_list = [] - bbox_list = [] - for ann in annotations: - class_list.append(ann[0]) - bbox_list.append(ann[1:]) - return class_list, bbox_list - - -def obj_box_horizontal_flip(im, coords=None, is_rescale=False, is_center=False, is_random=False): - """Left-right flip the image and coordinates for object detection. - - Parameters - ---------- - im : numpy.array - An image with dimension of [row, col, channel] (default). - coords : list of list of 4 int/float or None - Coordinates [[x, y, w, h], [x, y, w, h], ...]. - is_rescale : boolean - Set to True, if the input coordinates are rescaled to [0, 1]. Default is False. - is_center : boolean - Set to True, if the x and y of coordinates are the centroid (i.e. darknet format). Default is False. - is_random : boolean - If True, randomly flip. Default is False. - - Returns - ------- - numpy.array - A processed image - list of list of 4 numbers - A list of new bounding boxes. - - Examples - -------- - >>> im = np.zeros([80, 100]) # as an image with shape width=100, height=80 - >>> im, coords = obj_box_left_right_flip(im, coords=[[0.2, 0.4, 0.3, 0.3], [0.1, 0.5, 0.2, 0.3]], is_rescale=True, is_center=True, is_random=False) - >>> print(coords) - [[0.8, 0.4, 0.3, 0.3], [0.9, 0.5, 0.2, 0.3]] - >>> im, coords = obj_box_left_right_flip(im, coords=[[0.2, 0.4, 0.3, 0.3]], is_rescale=True, is_center=False, is_random=False) - >>> print(coords) - [[0.5, 0.4, 0.3, 0.3]] - >>> im, coords = obj_box_left_right_flip(im, coords=[[20, 40, 30, 30]], is_rescale=False, is_center=True, is_random=False) - >>> print(coords) - [[80, 40, 30, 30]] - >>> im, coords = obj_box_left_right_flip(im, coords=[[20, 40, 30, 30]], is_rescale=False, is_center=False, is_random=False) - >>> print(coords) - [[50, 40, 30, 30]] - - """ - if coords is None: - coords = [] - - def _flip(im, coords): - im = flip_axis(im, axis=1, is_random=False) - coords_new = list() - - for coord in coords: - - if len(coord) != 4: - raise AssertionError("coordinate should be 4 values : [x, y, w, h]") - - if is_rescale: - if is_center: - # x_center' = 1 - x - x = 1. - coord[0] - else: - # x_center' = 1 - x - w - x = 1. - coord[0] - coord[2] - else: - if is_center: - # x' = im.width - x - x = im.shape[1] - coord[0] - else: - # x' = im.width - x - w - x = im.shape[1] - coord[0] - coord[2] - coords_new.append([x, coord[1], coord[2], coord[3]]) - return im, coords_new - - if is_random: - factor = np.random.uniform(-1, 1) - if factor > 0: - return _flip(im, coords) - else: - return im, coords - else: - return _flip(im, coords) - - -obj_box_left_right_flip = obj_box_horizontal_flip - -# im = np.zeros([80, 100]) # as an image with shape width=100, height=80 -# im, coords = obj_box_left_right_flip(im, coords=[[0.2, 0.4, 0.3, 0.3], [0.1, 0.5, 0.2, 0.3]], is_rescale=True, is_center=True, is_random=False) -# tl.logging.info(coords) -# # [[0.8, 0.4, 0.3, 0.3], [0.9, 0.5, 0.2, 0.3]] -# im, coords = obj_box_left_right_flip(im, coords=[[0.2, 0.4, 0.3, 0.3]], is_rescale=True, is_center=False, is_random=False) -# tl.logging.info(coords) -# # [[0.5, 0.4, 0.3, 0.3]] -# im, coords = obj_box_left_right_flip(im, coords=[[20, 40, 30, 30]], is_rescale=False, is_center=True, is_random=False) -# tl.logging.info(coords) -# # [[80, 40, 30, 30]] -# im, coords = obj_box_left_right_flip(im, coords=[[20, 40, 30, 30]], is_rescale=False, is_center=False, is_random=False) -# tl.logging.info(coords) -# # [[50, 40, 30, 30]] -# exit() - - -def obj_box_imresize(im, coords=None, size=None, interp='bicubic', mode=None, is_rescale=False): - """Resize an image, and compute the new bounding box coordinates. - - Parameters - ------------- - im : numpy.array - An image with dimension of [row, col, channel] (default). - coords : list of list of 4 int/float or None - Coordinates [[x, y, w, h], [x, y, w, h], ...] - size interp and mode : args - See ``tl.prepro.imresize``. - is_rescale : boolean - Set to True, if the input coordinates are rescaled to [0, 1], then return the original coordinates. Default is False. - - Returns - ------- - numpy.array - A processed image - list of list of 4 numbers - A list of new bounding boxes. - - Examples - -------- - >>> im = np.zeros([80, 100, 3]) # as an image with shape width=100, height=80 - >>> _, coords = obj_box_imresize(im, coords=[[20, 40, 30, 30], [10, 20, 20, 20]], size=[160, 200], is_rescale=False) - >>> print(coords) - [[40, 80, 60, 60], [20, 40, 40, 40]] - >>> _, coords = obj_box_imresize(im, coords=[[20, 40, 30, 30]], size=[40, 100], is_rescale=False) - >>> print(coords) - [[20, 20, 30, 15]] - >>> _, coords = obj_box_imresize(im, coords=[[20, 40, 30, 30]], size=[60, 150], is_rescale=False) - >>> print(coords) - [[30, 30, 45, 22]] - >>> im2, coords = obj_box_imresize(im, coords=[[0.2, 0.4, 0.3, 0.3]], size=[160, 200], is_rescale=True) - >>> print(coords, im2.shape) - [[0.2, 0.4, 0.3, 0.3]] (160, 200, 3) - - """ - if coords is None: - coords = [] - if size is None: - size = [100, 100] - - imh, imw = im.shape[0:2] - imh = imh * 1.0 # * 1.0 for python2 : force division to be float point - imw = imw * 1.0 - im = imresize(im, size=size, interp=interp, mode=mode) - - if is_rescale is False: - coords_new = list() - - for coord in coords: - - if len(coord) != 4: - raise AssertionError("coordinate should be 4 values : [x, y, w, h]") - - # x' = x * (imw'/imw) - x = int(coord[0] * (size[1] / imw)) - # y' = y * (imh'/imh) - # tl.logging.info('>>', coord[1], size[0], imh) - y = int(coord[1] * (size[0] / imh)) - # w' = w * (imw'/imw) - w = int(coord[2] * (size[1] / imw)) - # h' = h * (imh'/imh) - h = int(coord[3] * (size[0] / imh)) - coords_new.append([x, y, w, h]) - return im, coords_new - else: - return im, coords - - -# im = np.zeros([80, 100, 3]) # as an image with shape width=100, height=80 -# _, coords = obj_box_imresize(im, coords=[[20, 40, 30, 30], [10, 20, 20, 20]], size=[160, 200], is_rescale=False) -# tl.logging.info(coords) -# # [[40, 80, 60, 60], [20, 40, 40, 40]] -# _, coords = obj_box_imresize(im, coords=[[20, 40, 30, 30]], size=[40, 100], is_rescale=False) -# tl.logging.info(coords) -# # [20, 20, 30, 15] -# _, coords = obj_box_imresize(im, coords=[[20, 40, 30, 30]], size=[60, 150], is_rescale=False) -# tl.logging.info(coords) -# # [30, 30, 45, 22] -# im2, coords = obj_box_imresize(im, coords=[[0.2, 0.4, 0.3, 0.3]], size=[160, 200], is_rescale=True) -# tl.logging.info(coords, im2.shape) -# # [0.2, 0.4, 0.3, 0.3] (160, 200, 3) -# exit() - - -def obj_box_crop( - im, classes=None, coords=None, wrg=100, hrg=100, is_rescale=False, is_center=False, is_random=False, - thresh_wh=0.02, thresh_wh2=12. -): - """Randomly or centrally crop an image, and compute the new bounding box coordinates. - Objects outside the cropped image will be removed. - - Parameters - ----------- - im : numpy.array - An image with dimension of [row, col, channel] (default). - classes : list of int or None - Class IDs. - coords : list of list of 4 int/float or None - Coordinates [[x, y, w, h], [x, y, w, h], ...] - wrg hrg and is_random : args - See ``tl.prepro.crop``. - is_rescale : boolean - Set to True, if the input coordinates are rescaled to [0, 1]. Default is False. - is_center : boolean, default False - Set to True, if the x and y of coordinates are the centroid (i.e. darknet format). Default is False. - thresh_wh : float - Threshold, remove the box if its ratio of width(height) to image size less than the threshold. - thresh_wh2 : float - Threshold, remove the box if its ratio of width to height or vice verse higher than the threshold. - - Returns - ------- - numpy.array - A processed image - list of int - A list of classes - list of list of 4 numbers - A list of new bounding boxes. - - """ - if classes is None: - classes = [] - if coords is None: - coords = [] - - h, w = im.shape[0], im.shape[1] - - if (h <= hrg) or (w <= wrg): - raise AssertionError("The size of cropping should smaller than the original image") - - if is_random: - h_offset = int(np.random.uniform(0, h - hrg) - 1) - w_offset = int(np.random.uniform(0, w - wrg) - 1) - h_end = hrg + h_offset - w_end = wrg + w_offset - im_new = im[h_offset:h_end, w_offset:w_end] - else: # central crop - h_offset = int(np.floor((h - hrg) / 2.)) - w_offset = int(np.floor((w - wrg) / 2.)) - h_end = h_offset + hrg - w_end = w_offset + wrg - im_new = im[h_offset:h_end, w_offset:w_end] - - # w - # _____________________________ - # | h/w offset | - # | ------- | - # h | | | | - # | | | | - # | ------- | - # | h/w end | - # |___________________________| - - def _get_coord(coord): - """Input pixel-unit [x, y, w, h] format, then make sure [x, y] it is the up-left coordinates, - before getting the new coordinates. - Boxes outsides the cropped image will be removed. - - """ - if is_center: - coord = obj_box_coord_centroid_to_upleft(coord) - - ##======= pixel unit format and upleft, w, h ==========## - - # x = np.clip( coord[0] - w_offset, 0, w_end - w_offset) - # y = np.clip( coord[1] - h_offset, 0, h_end - h_offset) - # w = np.clip( coord[2] , 0, w_end - w_offset) - # h = np.clip( coord[3] , 0, h_end - h_offset) - - x = coord[0] - w_offset - y = coord[1] - h_offset - w = coord[2] - h = coord[3] - - if x < 0: - if x + w <= 0: - return None - w = w + x - x = 0 - elif x > im_new.shape[1]: # object outside the cropped image - return None - - if y < 0: - if y + h <= 0: - return None - h = h + y - y = 0 - elif y > im_new.shape[0]: # object outside the cropped image - return None - - if (x is not None) and (x + w > im_new.shape[1]): # box outside the cropped image - w = im_new.shape[1] - x - - if (y is not None) and (y + h > im_new.shape[0]): # box outside the cropped image - h = im_new.shape[0] - y - - if (w / (h + 1.) > thresh_wh2) or (h / (w + 1.) > thresh_wh2): # object shape strange: too narrow - # tl.logging.info('xx', w, h) - return None - - if (w / (im_new.shape[1] * 1.) < thresh_wh) or (h / (im_new.shape[0] * 1.) < - thresh_wh): # object shape strange: too narrow - # tl.logging.info('yy', w, im_new.shape[1], h, im_new.shape[0]) - return None - - coord = [x, y, w, h] - - ## convert back if input format is center. - if is_center: - coord = obj_box_coord_upleft_to_centroid(coord) - - return coord - - coords_new = list() - classes_new = list() - for i, _ in enumerate(coords): - coord = coords[i] - - if len(coord) != 4: - raise AssertionError("coordinate should be 4 values : [x, y, w, h]") - - if is_rescale: - # for scaled coord, upscaled before process and scale back in the end. - coord = obj_box_coord_scale_to_pixelunit(coord, im.shape) - coord = _get_coord(coord) - if coord is not None: - coord = obj_box_coord_rescale(coord, im_new.shape) - coords_new.append(coord) - classes_new.append(classes[i]) - else: - coord = _get_coord(coord) - if coord is not None: - coords_new.append(coord) - classes_new.append(classes[i]) - return im_new, classes_new, coords_new - - -def obj_box_shift( - im, classes=None, coords=None, wrg=0.1, hrg=0.1, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', - cval=0., order=1, is_rescale=False, is_center=False, is_random=False, thresh_wh=0.02, thresh_wh2=12. -): - """Shift an image randomly or non-randomly, and compute the new bounding box coordinates. - Objects outside the cropped image will be removed. - - Parameters - ----------- - im : numpy.array - An image with dimension of [row, col, channel] (default). - classes : list of int or None - Class IDs. - coords : list of list of 4 int/float or None - Coordinates [[x, y, w, h], [x, y, w, h], ...] - wrg, hrg row_index col_index channel_index is_random fill_mode cval and order : see ``tl.prepro.shift``. - is_rescale : boolean - Set to True, if the input coordinates are rescaled to [0, 1]. Default is False. - is_center : boolean - Set to True, if the x and y of coordinates are the centroid (i.e. darknet format). Default is False. - thresh_wh : float - Threshold, remove the box if its ratio of width(height) to image size less than the threshold. - thresh_wh2 : float - Threshold, remove the box if its ratio of width to height or vice verse higher than the threshold. - - - Returns - ------- - numpy.array - A processed image - list of int - A list of classes - list of list of 4 numbers - A list of new bounding boxes. - - """ - if classes is None: - classes = [] - if coords is None: - coords = [] - - imh, imw = im.shape[row_index], im.shape[col_index] - - if (hrg >= 1.0) and (hrg <= 0.) and (wrg >= 1.0) and (wrg <= 0.): - raise AssertionError("shift range should be (0, 1)") - - if is_random: - tx = np.random.uniform(-hrg, hrg) * imh - ty = np.random.uniform(-wrg, wrg) * imw - else: - tx, ty = hrg * imh, wrg * imw - translation_matrix = np.array([[1, 0, tx], [0, 1, ty], [0, 0, 1]]) - - transform_matrix = translation_matrix # no need to do offset - im_new = affine_transform(im, transform_matrix, channel_index, fill_mode, cval, order) - - # modified from obj_box_crop - def _get_coord(coord): - """Input pixel-unit [x, y, w, h] format, then make sure [x, y] it is the up-left coordinates, - before getting the new coordinates. - Boxes outsides the cropped image will be removed. - - """ - if is_center: - coord = obj_box_coord_centroid_to_upleft(coord) - - ##======= pixel unit format and upleft, w, h ==========## - x = coord[0] - ty # only change this - y = coord[1] - tx # only change this - w = coord[2] - h = coord[3] - - if x < 0: - if x + w <= 0: - return None - w = w + x - x = 0 - elif x > im_new.shape[1]: # object outside the cropped image - return None - - if y < 0: - if y + h <= 0: - return None - h = h + y - y = 0 - elif y > im_new.shape[0]: # object outside the cropped image - return None - - if (x is not None) and (x + w > im_new.shape[1]): # box outside the cropped image - w = im_new.shape[1] - x - - if (y is not None) and (y + h > im_new.shape[0]): # box outside the cropped image - h = im_new.shape[0] - y - - if (w / (h + 1.) > thresh_wh2) or (h / (w + 1.) > thresh_wh2): # object shape strange: too narrow - # tl.logging.info('xx', w, h) - return None - - if (w / (im_new.shape[1] * 1.) < thresh_wh) or (h / (im_new.shape[0] * 1.) < - thresh_wh): # object shape strange: too narrow - # tl.logging.info('yy', w, im_new.shape[1], h, im_new.shape[0]) - return None - - coord = [x, y, w, h] - - ## convert back if input format is center. - if is_center: - coord = obj_box_coord_upleft_to_centroid(coord) - - return coord - - coords_new = list() - classes_new = list() - for i, _ in enumerate(coords): - coord = coords[i] - - if len(coord) != 4: - raise AssertionError("coordinate should be 4 values : [x, y, w, h]") - - if is_rescale: - # for scaled coord, upscaled before process and scale back in the end. - coord = obj_box_coord_scale_to_pixelunit(coord, im.shape) - coord = _get_coord(coord) - if coord is not None: - coord = obj_box_coord_rescale(coord, im_new.shape) - coords_new.append(coord) - classes_new.append(classes[i]) - else: - coord = _get_coord(coord) - if coord is not None: - coords_new.append(coord) - classes_new.append(classes[i]) - return im_new, classes_new, coords_new - - -def obj_box_zoom( - im, classes=None, coords=None, zoom_range=(0.9, - 1.1), row_index=0, col_index=1, channel_index=2, fill_mode='nearest', - cval=0., order=1, is_rescale=False, is_center=False, is_random=False, thresh_wh=0.02, thresh_wh2=12. -): - """Zoom in and out of a single image, randomly or non-randomly, and compute the new bounding box coordinates. - Objects outside the cropped image will be removed. - - Parameters - ----------- - im : numpy.array - An image with dimension of [row, col, channel] (default). - classes : list of int or None - Class IDs. - coords : list of list of 4 int/float or None - Coordinates [[x, y, w, h], [x, y, w, h], ...]. - zoom_range row_index col_index channel_index is_random fill_mode cval and order : see ``tl.prepro.zoom``. - is_rescale : boolean - Set to True, if the input coordinates are rescaled to [0, 1]. Default is False. - is_center : boolean - Set to True, if the x and y of coordinates are the centroid. (i.e. darknet format). Default is False. - thresh_wh : float - Threshold, remove the box if its ratio of width(height) to image size less than the threshold. - thresh_wh2 : float - Threshold, remove the box if its ratio of width to height or vice verse higher than the threshold. - - Returns - ------- - numpy.array - A processed image - list of int - A list of classes - list of list of 4 numbers - A list of new bounding boxes. - - """ - if classes is None: - classes = [] - if coords is None: - coords = [] - - if len(zoom_range) != 2: - raise Exception('zoom_range should be a tuple or list of two floats. ' 'Received arg: ', zoom_range) - if is_random: - if zoom_range[0] == 1 and zoom_range[1] == 1: - zx, zy = 1, 1 - tl.logging.info(" random_zoom : not zoom in/out") - else: - zx, zy = np.random.uniform(zoom_range[0], zoom_range[1], 2) - else: - zx, zy = zoom_range - # tl.logging.info(zx, zy) - zoom_matrix = np.array([[zx, 0, 0], [0, zy, 0], [0, 0, 1]]) - - h, w = im.shape[row_index], im.shape[col_index] - transform_matrix = transform_matrix_offset_center(zoom_matrix, h, w) - im_new = affine_transform(im, transform_matrix, channel_index, fill_mode, cval, order) - - # modified from obj_box_crop - def _get_coord(coord): - """Input pixel-unit [x, y, w, h] format, then make sure [x, y] it is the up-left coordinates, - before getting the new coordinates. - Boxes outsides the cropped image will be removed. - - """ - if is_center: - coord = obj_box_coord_centroid_to_upleft(coord) - - # ======= pixel unit format and upleft, w, h ========== - x = (coord[0] - im.shape[1] / 2) / zy + im.shape[1] / 2 # only change this - y = (coord[1] - im.shape[0] / 2) / zx + im.shape[0] / 2 # only change this - w = coord[2] / zy # only change this - h = coord[3] / zx # only change thisS - - if x < 0: - if x + w <= 0: - return None - w = w + x - x = 0 - elif x > im_new.shape[1]: # object outside the cropped image - return None - - if y < 0: - if y + h <= 0: - return None - h = h + y - y = 0 - elif y > im_new.shape[0]: # object outside the cropped image - return None - - if (x is not None) and (x + w > im_new.shape[1]): # box outside the cropped image - w = im_new.shape[1] - x - - if (y is not None) and (y + h > im_new.shape[0]): # box outside the cropped image - h = im_new.shape[0] - y - - if (w / (h + 1.) > thresh_wh2) or (h / (w + 1.) > thresh_wh2): # object shape strange: too narrow - # tl.logging.info('xx', w, h) - return None - - if (w / (im_new.shape[1] * 1.) < thresh_wh) or (h / (im_new.shape[0] * 1.) < - thresh_wh): # object shape strange: too narrow - # tl.logging.info('yy', w, im_new.shape[1], h, im_new.shape[0]) - return None - - coord = [x, y, w, h] - - # convert back if input format is center. - if is_center: - coord = obj_box_coord_upleft_to_centroid(coord) - - return coord - - coords_new = list() - classes_new = list() - for i, _ in enumerate(coords): - coord = coords[i] - - if len(coord) != 4: - raise AssertionError("coordinate should be 4 values : [x, y, w, h]") - - if is_rescale: - # for scaled coord, upscaled before process and scale back in the end. - coord = obj_box_coord_scale_to_pixelunit(coord, im.shape) - coord = _get_coord(coord) - if coord is not None: - coord = obj_box_coord_rescale(coord, im_new.shape) - coords_new.append(coord) - classes_new.append(classes[i]) - else: - coord = _get_coord(coord) - if coord is not None: - coords_new.append(coord) - classes_new.append(classes[i]) - return im_new, classes_new, coords_new - - -def pad_sequences(sequences, maxlen=None, dtype='int32', padding='post', truncating='pre', value=0.): - """Pads each sequence to the same length: - the length of the longest sequence. - If maxlen is provided, any sequence longer - than maxlen is truncated to maxlen. - Truncation happens off either the beginning (default) or - the end of the sequence. - Supports post-padding and pre-padding (default). - - Parameters - ---------- - sequences : list of list of int - All sequences where each row is a sequence. - maxlen : int - Maximum length. - dtype : numpy.dtype or str - Data type to cast the resulting sequence. - padding : str - Either 'pre' or 'post', pad either before or after each sequence. - truncating : str - Either 'pre' or 'post', remove values from sequences larger than maxlen either in the beginning or in the end of the sequence - value : float - Value to pad the sequences to the desired value. - - Returns - ---------- - x : numpy.array - With dimensions (number_of_sequences, maxlen) - - Examples - ---------- - >>> sequences = [[1,1,1,1,1],[2,2,2],[3,3]] - >>> sequences = pad_sequences(sequences, maxlen=None, dtype='int32', - ... padding='post', truncating='pre', value=0.) - [[1 1 1 1 1] - [2 2 2 0 0] - [3 3 0 0 0]] - - """ - lengths = [len(s) for s in sequences] - - nb_samples = len(sequences) - if maxlen is None: - maxlen = np.max(lengths) - - # take the sample shape from the first non empty sequence - # checking for consistency in the main loop below. - sample_shape = tuple() - for s in sequences: - if len(s) > 0: - sample_shape = np.asarray(s).shape[1:] - break - - x = (np.ones((nb_samples, maxlen) + sample_shape) * value).astype(dtype) - for idx, s in enumerate(sequences): - if len(s) == 0: - continue # empty list was found - if truncating == 'pre': - trunc = s[-maxlen:] - elif truncating == 'post': - trunc = s[:maxlen] - else: - raise ValueError('Truncating type "%s" not understood' % truncating) - - # check `trunc` has expected shape - trunc = np.asarray(trunc, dtype=dtype) - if trunc.shape[1:] != sample_shape: - raise ValueError( - 'Shape of sample %s of sequence at position %s is different from expected shape %s' % - (trunc.shape[1:], idx, sample_shape) - ) - - if padding == 'post': - x[idx, :len(trunc)] = trunc - elif padding == 'pre': - x[idx, -len(trunc):] = trunc - else: - raise ValueError('Padding type "%s" not understood' % padding) - return x.tolist() - - -def remove_pad_sequences(sequences, pad_id=0): - """Remove padding. - - Parameters - ----------- - sequences : list of list of int - All sequences where each row is a sequence. - pad_id : int - The pad ID. - - Returns - ---------- - list of list of int - The processed sequences. - - Examples - ---------- - >>> sequences = [[2,3,4,0,0], [5,1,2,3,4,0,0,0], [4,5,0,2,4,0,0,0]] - >>> print(remove_pad_sequences(sequences, pad_id=0)) - [[2, 3, 4], [5, 1, 2, 3, 4], [4, 5, 0, 2, 4]] - - """ - sequences_out = copy.deepcopy(sequences) - - for i, _ in enumerate(sequences): - # for j in range(len(sequences[i])): - # if sequences[i][j] == pad_id: - # sequences_out[i] = sequences_out[i][:j] - # break - for j in range(1, len(sequences[i])): - if sequences[i][-j] != pad_id: - sequences_out[i] = sequences_out[i][0:-j + 1] - break - - return sequences_out - - -def process_sequences(sequences, end_id=0, pad_val=0, is_shorten=True, remain_end_id=False): - """Set all tokens(ids) after END token to the padding value, and then shorten (option) it to the maximum sequence length in this batch. - - Parameters - ----------- - sequences : list of list of int - All sequences where each row is a sequence. - end_id : int - The special token for END. - pad_val : int - Replace the `end_id` and the IDs after `end_id` to this value. - is_shorten : boolean - Shorten the sequences. Default is True. - remain_end_id : boolean - Keep an `end_id` in the end. Default is False. - - Returns - ---------- - list of list of int - The processed sequences. - - Examples - --------- - >>> sentences_ids = [[4, 3, 5, 3, 2, 2, 2, 2], <-- end_id is 2 - ... [5, 3, 9, 4, 9, 2, 2, 3]] <-- end_id is 2 - >>> sentences_ids = precess_sequences(sentences_ids, end_id=vocab.end_id, pad_val=0, is_shorten=True) - [[4, 3, 5, 3, 0], [5, 3, 9, 4, 9]] - - """ - max_length = 0 - for _, seq in enumerate(sequences): - is_end = False - for i_w, n in enumerate(seq): - if n == end_id and is_end == False: # 1st time to see end_id - is_end = True - if max_length < i_w: - max_length = i_w - if remain_end_id is False: - seq[i_w] = pad_val # set end_id to pad_val - elif is_end ==True: - seq[i_w] = pad_val - - if remain_end_id is True: - max_length += 1 - if is_shorten: - for i, seq in enumerate(sequences): - sequences[i] = seq[:max_length] - return sequences - - -def sequences_add_start_id(sequences, start_id=0, remove_last=False): - """Add special start token(id) in the beginning of each sequence. - - Parameters - ------------ - sequences : list of list of int - All sequences where each row is a sequence. - start_id : int - The start ID. - remove_last : boolean - Remove the last value of each sequences. Usually be used for removing the end ID. - - Returns - ---------- - list of list of int - The processed sequences. - - Examples - --------- - >>> sentences_ids = [[4,3,5,3,2,2,2,2], [5,3,9,4,9,2,2,3]] - >>> sentences_ids = sequences_add_start_id(sentences_ids, start_id=2) - [[2, 4, 3, 5, 3, 2, 2, 2, 2], [2, 5, 3, 9, 4, 9, 2, 2, 3]] - >>> sentences_ids = sequences_add_start_id(sentences_ids, start_id=2, remove_last=True) - [[2, 4, 3, 5, 3, 2, 2, 2], [2, 5, 3, 9, 4, 9, 2, 2]] - - For Seq2seq - - >>> input = [a, b, c] - >>> target = [x, y, z] - >>> decode_seq = [start_id, a, b] <-- sequences_add_start_id(input, start_id, True) - - """ - sequences_out = [[] for _ in range(len(sequences))] #[[]] * len(sequences) - for i, _ in enumerate(sequences): - if remove_last: - sequences_out[i] = [start_id] + sequences[i][:-1] - else: - sequences_out[i] = [start_id] + sequences[i] - return sequences_out - - -def sequences_add_end_id(sequences, end_id=888): - """Add special end token(id) in the end of each sequence. - - Parameters - ----------- - sequences : list of list of int - All sequences where each row is a sequence. - end_id : int - The end ID. - - Returns - ---------- - list of list of int - The processed sequences. - - Examples - --------- - >>> sequences = [[1,2,3],[4,5,6,7]] - >>> print(sequences_add_end_id(sequences, end_id=999)) - [[1, 2, 3, 999], [4, 5, 6, 999]] - - """ - sequences_out = [[] for _ in range(len(sequences))] #[[]] * len(sequences) - for i, _ in enumerate(sequences): - sequences_out[i] = sequences[i] + [end_id] - return sequences_out - - -def sequences_add_end_id_after_pad(sequences, end_id=888, pad_id=0): - """Add special end token(id) in the end of each sequence. - - Parameters - ----------- - sequences : list of list of int - All sequences where each row is a sequence. - end_id : int - The end ID. - pad_id : int - The pad ID. - - Returns - ---------- - list of list of int - The processed sequences. - - Examples - --------- - >>> sequences = [[1,2,0,0], [1,2,3,0], [1,2,3,4]] - >>> print(sequences_add_end_id_after_pad(sequences, end_id=99, pad_id=0)) - [[1, 2, 99, 0], [1, 2, 3, 99], [1, 2, 3, 4]] - - """ - # sequences_out = [[] for _ in range(len(sequences))]#[[]] * len(sequences) - - sequences_out = copy.deepcopy(sequences) - # # add a pad to all - # for i in range(len(sequences)): - # for j in range(len(sequences[i])): - # sequences_out[i].append(pad_id) - # # pad -- > end - # max_len = 0 - - for i, v in enumerate(sequences): - for j, _v2 in enumerate(v): - if sequences[i][j] == pad_id: - sequences_out[i][j] = end_id - # if j > max_len: - # max_len = j - break - - # # remove pad if too long - # for i in range(len(sequences)): - # for j in range(len(sequences[i])): - # sequences_out[i] = sequences_out[i][:max_len+1] - return sequences_out - - -def sequences_get_mask(sequences, pad_val=0): - """Return mask for sequences. - - Parameters - ----------- - sequences : list of list of int - All sequences where each row is a sequence. - pad_val : int - The pad value. - - Returns - ---------- - list of list of int - The mask. - - Examples - --------- - >>> sentences_ids = [[4, 0, 5, 3, 0, 0], - ... [5, 3, 9, 4, 9, 0]] - >>> mask = sequences_get_mask(sentences_ids, pad_val=0) - [[1 1 1 1 0 0] - [1 1 1 1 1 0]] - - """ - mask = np.ones_like(sequences) - for i, seq in enumerate(sequences): - for i_w in reversed(range(len(seq))): - if seq[i_w] == pad_val: - mask[i, i_w] = 0 - else: - break # <-- exit the for loop, prepcess next sequence - return mask - - -def keypoint_random_crop(image, annos, mask=None, size=(368, 368)): - """Randomly crop an image and corresponding keypoints without influence scales, given by ``keypoint_random_resize_shortestedge``. - - Parameters - ----------- - image : 3 channel image - The given image for augmentation. - annos : list of list of floats - The keypoints annotation of people. - mask : single channel image or None - The mask if available. - size : tuple of int - The size of returned image. - - Returns - ---------- - preprocessed image, annotation, mask - - """ - - _target_height = size[0] - _target_width = size[1] - target_size = (_target_width, _target_height) - - if len(np.shape(image)) == 2: - image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB) - height, width, _ = np.shape(image) - - for _ in range(50): - x = random.randrange(0, width - target_size[0]) if width > target_size[0] else 0 - y = random.randrange(0, height - target_size[1]) if height > target_size[1] else 0 - - # check whether any face is inside the box to generate a reasonably-balanced datasets - for joint in annos: - if x <= joint[0][0] < x + target_size[0] and y <= joint[0][1] < y + target_size[1]: - break - - def pose_crop(image, annos, mask, x, y, w, h): # TODO : speed up with affine transform - # adjust image - target_size = (w, h) - - img = image - resized = img[y:y + target_size[1], x:x + target_size[0], :] - resized_mask = mask[y:y + target_size[1], x:x + target_size[0]] - # adjust meta data - adjust_joint_list = [] - for joint in annos: - adjust_joint = [] - for point in joint: - if point[0] < -10 or point[1] < -10: - adjust_joint.append((-1000, -1000)) - continue - new_x, new_y = point[0] - x, point[1] - y - # should not crop outside the image - if new_x > w - 1 or new_y > h - 1: - adjust_joint.append((-1000, -1000)) - continue - adjust_joint.append((new_x, new_y)) - adjust_joint_list.append(adjust_joint) - - return resized, adjust_joint_list, resized_mask - - return pose_crop(image, annos, mask, x, y, target_size[0], target_size[1]) - - -def keypoint_resize_random_crop(image, annos, mask=None, size=(368, 368)): - """Reszie the image to make either its width or height equals to the given sizes. - Then randomly crop image without influence scales. - Resize the image match with the minimum size before cropping, this API will change the zoom scale of object. - - Parameters - ----------- - image : 3 channel image - The given image for augmentation. - annos : list of list of floats - The keypoints annotation of people. - mask : single channel image or None - The mask if available. - size : tuple of int - The size (height, width) of returned image. - - Returns - ---------- - preprocessed image, annos, mask - - """ - - if len(np.shape(image)) == 2: - image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB) - - def resize_image(image, annos, mask, target_width, target_height): - """Reszie image - - Parameters - ----------- - image : 3 channel image - The given image. - annos : list of list of floats - Keypoints of people - mask : single channel image or None - The mask if available. - target_width : int - Expected width of returned image. - target_height : int - Expected height of returned image. - - Returns - ---------- - preprocessed input image, annos, mask - - """ - y, x, _ = np.shape(image) - - ratio_y = target_height / y - ratio_x = target_width / x - - new_joints = [] - # update meta - for people in annos: - new_keypoints = [] - for keypoints in people: - if keypoints[0] < 0 or keypoints[1] < 0: - new_keypoints.append((-1000, -1000)) - continue - pts = (int(keypoints[0] * ratio_x + 0.5), int(keypoints[1] * ratio_y + 0.5)) - if pts[0] > target_width - 1 or pts[1] > target_height - 1: - new_keypoints.append((-1000, -1000)) - continue - - new_keypoints.append(pts) - new_joints.append(new_keypoints) - annos = new_joints - - new_image = cv2.resize(image, (target_width, target_height), interpolation=cv2.INTER_AREA) - if mask is not None: - new_mask = cv2.resize(mask, (target_width, target_height), interpolation=cv2.INTER_AREA) - return new_image, annos, new_mask - else: - return new_image, annos, None - - _target_height = size[0] - _target_width = size[1] - if len(np.shape(image)) == 2: - image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB) - height, width, _ = np.shape(image) - # print("the size of original img is:", height, width) - if height <= width: - ratio = _target_height / height - new_width = int(ratio * width) - if height == width: - new_width = _target_height - - image, annos, mask = resize_image(image, annos, mask, new_width, _target_height) - - # for i in annos: - # if len(i) is not 19: - # print('Joints of person is not 19 ERROR FROM RESIZE') - - if new_width > _target_width: - crop_range_x = np.random.randint(0, new_width - _target_width) - else: - crop_range_x = 0 - image = image[:, crop_range_x:crop_range_x + _target_width, :] - if mask is not None: - mask = mask[:, crop_range_x:crop_range_x + _target_width] - # joint_list= [] - new_joints = [] - #annos-pepople-joints (must be 19 or []) - for people in annos: - # print("number of keypoints is", np.shape(people)) - new_keypoints = [] - for keypoints in people: - if keypoints[0] < -10 or keypoints[1] < -10: - new_keypoints.append((-1000, -1000)) - continue - top = crop_range_x + _target_width - 1 - if keypoints[0] >= crop_range_x and keypoints[0] <= top: - # pts = (keypoints[0]-crop_range_x, keypoints[1]) - pts = (int(keypoints[0] - crop_range_x), int(keypoints[1])) - else: - pts = (-1000, -1000) - new_keypoints.append(pts) - - new_joints.append(new_keypoints) - # if len(new_keypoints) != 19: - # print('1:The Length of joints list should be 0 or 19 but actually:', len(new_keypoints)) - annos = new_joints - - if height > width: - ratio = _target_width / width - new_height = int(ratio * height) - image, annos, mask = resize_image(image, annos, mask, _target_width, new_height) - - # for i in annos: - # if len(i) is not 19: - # print('Joints of person is not 19 ERROR') - - if new_height > _target_height: - crop_range_y = np.random.randint(0, new_height - _target_height) - - else: - crop_range_y = 0 - image = image[crop_range_y:crop_range_y + _target_width, :, :] - if mask is not None: - mask = mask[crop_range_y:crop_range_y + _target_width, :] - new_joints = [] - - for people in annos: # TODO : speed up with affine transform - new_keypoints = [] - for keypoints in people: - - # case orginal points are not usable - if keypoints[0] < 0 or keypoints[1] < 0: - new_keypoints.append((-1000, -1000)) - continue - # y axis coordinate change - bot = crop_range_y + _target_height - 1 - if keypoints[1] >= crop_range_y and keypoints[1] <= bot: - # pts = (keypoints[0], keypoints[1]-crop_range_y) - pts = (int(keypoints[0]), int(keypoints[1] - crop_range_y)) - # if pts[0]>367 or pts[1]>367: - # print('Error2') - else: - pts = (-1000, -1000) - - new_keypoints.append(pts) - - new_joints.append(new_keypoints) - # if len(new_keypoints) != 19: - # print('2:The Length of joints list should be 0 or 19 but actually:', len(new_keypoints)) - - annos = new_joints - - # mask = cv2.resize(mask, (46, 46), interpolation=cv2.INTER_AREA) - if mask is not None: - return image, annos, mask - else: - return image, annos, None - - -def keypoint_random_rotate(image, annos, mask=None, rg=15.): - """Rotate an image and corresponding keypoints. - - Parameters - ----------- - image : 3 channel image - The given image for augmentation. - annos : list of list of floats - The keypoints annotation of people. - mask : single channel image or None - The mask if available. - rg : int or float - Degree to rotate, usually 0 ~ 180. - - Returns - ---------- - preprocessed image, annos, mask - - """ - - def _rotate_coord(shape, newxy, point, angle): - angle = -1 * angle / 180.0 * math.pi - ox, oy = shape - px, py = point - ox /= 2 - oy /= 2 - qx = math.cos(angle) * (px - ox) - math.sin(angle) * (py - oy) - qy = math.sin(angle) * (px - ox) + math.cos(angle) * (py - oy) - new_x, new_y = newxy - qx += ox - new_x - qy += oy - new_y - return int(qx + 0.5), int(qy + 0.5) - - def _largest_rotated_rect(w, h, angle): - """ - Get largest rectangle after rotation. - http://stackoverflow.com/questions/16702966/rotate-image-and-crop-out-black-borders - """ - angle = angle / 180.0 * math.pi - if w <= 0 or h <= 0: - return 0, 0 - - width_is_longer = w >= h - side_long, side_short = (w, h) if width_is_longer else (h, w) - - # since the solutions for angle, -angle and 180-angle are all the same, - # if suffices to look at the first quadrant and the absolute values of sin,cos: - sin_a, cos_a = abs(math.sin(angle)), abs(math.cos(angle)) - if side_short <= 2. * sin_a * cos_a * side_long: - # half constrained case: two crop corners touch the longer side, - # the other two corners are on the mid-line parallel to the longer line - x = 0.5 * side_short - wr, hr = (x / sin_a, x / cos_a) if width_is_longer else (x / cos_a, x / sin_a) - else: - # fully constrained case: crop touches all 4 sides - cos_2a = cos_a * cos_a - sin_a * sin_a - wr, hr = (w * cos_a - h * sin_a) / cos_2a, (h * cos_a - w * sin_a) / cos_2a - return int(np.round(wr)), int(np.round(hr)) - - img_shape = np.shape(image) - height = img_shape[0] - width = img_shape[1] - deg = np.random.uniform(-rg, rg) - - img = image - center = (img.shape[1] * 0.5, img.shape[0] * 0.5) # x, y - rot_m = cv2.getRotationMatrix2D((int(center[0]), int(center[1])), deg, 1) - ret = cv2.warpAffine(img, rot_m, img.shape[1::-1], flags=cv2.INTER_AREA, borderMode=cv2.BORDER_CONSTANT) - if img.ndim == 3 and ret.ndim == 2: - ret = ret[:, :, np.newaxis] - neww, newh = _largest_rotated_rect(ret.shape[1], ret.shape[0], deg) - neww = min(neww, ret.shape[1]) - newh = min(newh, ret.shape[0]) - newx = int(center[0] - neww * 0.5) - newy = int(center[1] - newh * 0.5) - # print(ret.shape, deg, newx, newy, neww, newh) - img = ret[newy:newy + newh, newx:newx + neww] - # adjust meta data - adjust_joint_list = [] - for joint in annos: # TODO : speed up with affine transform - adjust_joint = [] - for point in joint: - if point[0] < -100 or point[1] < -100: - adjust_joint.append((-1000, -1000)) - continue - - x, y = _rotate_coord((width, height), (newx, newy), point, deg) - - if x > neww - 1 or y > newh - 1: - adjust_joint.append((-1000, -1000)) - continue - if x < 0 or y < 0: - adjust_joint.append((-1000, -1000)) - continue - - adjust_joint.append((x, y)) - adjust_joint_list.append(adjust_joint) - joint_list = adjust_joint_list - - if mask is not None: - msk = mask - center = (msk.shape[1] * 0.5, msk.shape[0] * 0.5) # x, y - rot_m = cv2.getRotationMatrix2D((int(center[0]), int(center[1])), deg, 1) - ret = cv2.warpAffine(msk, rot_m, msk.shape[1::-1], flags=cv2.INTER_AREA, borderMode=cv2.BORDER_CONSTANT) - if msk.ndim == 3 and msk.ndim == 2: - ret = ret[:, :, np.newaxis] - neww, newh = _largest_rotated_rect(ret.shape[1], ret.shape[0], deg) - neww = min(neww, ret.shape[1]) - newh = min(newh, ret.shape[0]) - newx = int(center[0] - neww * 0.5) - newy = int(center[1] - newh * 0.5) - # print(ret.shape, deg, newx, newy, neww, newh) - msk = ret[newy:newy + newh, newx:newx + neww] - return img, joint_list, msk - else: - return img, joint_list, None - - -def keypoint_random_flip( - image, annos, mask=None, prob=0.5, flip_list=(0, 1, 5, 6, 7, 2, 3, 4, 11, 12, 13, 8, 9, 10, 15, 14, 17, 16, 18) -): - """Flip an image and corresponding keypoints. - - Parameters - ----------- - image : 3 channel image - The given image for augmentation. - annos : list of list of floats - The keypoints annotation of people. - mask : single channel image or None - The mask if available. - prob : float, 0 to 1 - The probability to flip the image, if 1, always flip the image. - flip_list : tuple of int - Denotes how the keypoints number be changed after flipping which is required for pose estimation task. - The left and right body should be maintained rather than switch. - (Default COCO format). - Set to an empty tuple if you don't need to maintain left and right information. - - Returns - ---------- - preprocessed image, annos, mask - - """ - - _prob = np.random.uniform(0, 1.0) - if _prob < prob: - return image, annos, mask - - _, width, _ = np.shape(image) - image = cv2.flip(image, 1) - mask = cv2.flip(mask, 1) - new_joints = [] - for people in annos: # TODO : speed up with affine transform - new_keypoints = [] - for k in flip_list: - point = people[k] - if point[0] < 0 or point[1] < 0: - new_keypoints.append((-1000, -1000)) - continue - if point[0] > image.shape[1] - 1 or point[1] > image.shape[0] - 1: - new_keypoints.append((-1000, -1000)) - continue - if (width - point[0]) > image.shape[1] - 1: - new_keypoints.append((-1000, -1000)) - continue - new_keypoints.append((width - point[0], point[1])) - new_joints.append(new_keypoints) - annos = new_joints - - return image, annos, mask - - -def keypoint_random_resize(image, annos, mask=None, zoom_range=(0.8, 1.2)): - """Randomly resize an image and corresponding keypoints. - The height and width of image will be changed independently, so the scale will be changed. - - Parameters - ----------- - image : 3 channel image - The given image for augmentation. - annos : list of list of floats - The keypoints annotation of people. - mask : single channel image or None - The mask if available. - zoom_range : tuple of two floats - The minimum and maximum factor to zoom in or out, e.g (0.5, 1) means zoom out 1~2 times. - - Returns - ---------- - preprocessed image, annos, mask - - """ - height = image.shape[0] - width = image.shape[1] - _min, _max = zoom_range - scalew = np.random.uniform(_min, _max) - scaleh = np.random.uniform(_min, _max) - - neww = int(width * scalew) - newh = int(height * scaleh) - - dst = cv2.resize(image, (neww, newh), interpolation=cv2.INTER_AREA) - if mask is not None: - mask = cv2.resize(mask, (neww, newh), interpolation=cv2.INTER_AREA) - # adjust meta data - adjust_joint_list = [] - for joint in annos: # TODO : speed up with affine transform - adjust_joint = [] - for point in joint: - if point[0] < -100 or point[1] < -100: - adjust_joint.append((-1000, -1000)) - continue - adjust_joint.append((int(point[0] * scalew + 0.5), int(point[1] * scaleh + 0.5))) - adjust_joint_list.append(adjust_joint) - if mask is not None: - return dst, adjust_joint_list, mask - else: - return dst, adjust_joint_list, None - - -def keypoint_random_resize_shortestedge( - image, annos, mask=None, min_size=(368, 368), zoom_range=(0.8, 1.2), - pad_val=(0, 0, np.random.uniform(0.0, 1.0)) -): - """Randomly resize an image and corresponding keypoints based on shorter edgeself. - If the resized image is smaller than `min_size`, uses padding to make shape matchs `min_size`. - The height and width of image will be changed together, the scale would not be changed. - - Parameters - ----------- - image : 3 channel image - The given image for augmentation. - annos : list of list of floats - The keypoints annotation of people. - mask : single channel image or None - The mask if available. - min_size : tuple of two int - The minimum size of height and width. - zoom_range : tuple of two floats - The minimum and maximum factor to zoom in or out, e.g (0.5, 1) means zoom out 1~2 times. - pad_val : int/float, or tuple of int or random function - The three padding values for RGB channels respectively. - - Returns - ---------- - preprocessed image, annos, mask - - """ - - _target_height = min_size[0] - _target_width = min_size[1] - - if len(np.shape(image)) == 2: - image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB) - height, width, _ = np.shape(image) - - ratio_w = _target_width / width - ratio_h = _target_height / height - ratio = min(ratio_w, ratio_h) - target_size = int(min(width * ratio + 0.5, height * ratio + 0.5)) - random_target = np.random.uniform(zoom_range[0], zoom_range[1]) - target_size = int(target_size * random_target) - - # target_size = int(min(_network_w, _network_h) * random.uniform(0.7, 1.5)) - - def pose_resize_shortestedge(image, annos, mask, target_size): - """ """ - # _target_height = 368 - # _target_width = 368 - # img = image - height, width, _ = np.shape(image) - - # adjust image - scale = target_size / min(height, width) - if height < width: - newh, neww = target_size, int(scale * width + 0.5) - else: - newh, neww = int(scale * height + 0.5), target_size - - dst = cv2.resize(image, (neww, newh), interpolation=cv2.INTER_AREA) - mask = cv2.resize(mask, (neww, newh), interpolation=cv2.INTER_AREA) - pw = ph = 0 - if neww < _target_width or newh < _target_height: - pw = max(0, (_target_width - neww) // 2) - ph = max(0, (_target_height - newh) // 2) - mw = (_target_width - neww) % 2 - mh = (_target_height - newh) % 2 - # color = np.random.uniform(0.0, 1.0) - dst = cv2.copyMakeBorder(dst, ph, ph + mh, pw, pw + mw, cv2.BORDER_CONSTANT, value=pad_val) #(0, 0, color)) - if mask is not None: - mask = cv2.copyMakeBorder(mask, ph, ph + mh, pw, pw + mw, cv2.BORDER_CONSTANT, value=1) - # adjust meta data - adjust_joint_list = [] - for joint in annos: # TODO : speed up with affine transform - adjust_joint = [] - for point in joint: - if point[0] < -100 or point[1] < -100: - adjust_joint.append((-1000, -1000)) - continue - # if point[0] <= 0 or point[1] <= 0 or int(point[0]*scale+0.5) > neww or int(point[1]*scale+0.5) > newh: - # adjust_joint.append((-1, -1)) - # continue - adjust_joint.append((int(point[0] * scale + 0.5) + pw, int(point[1] * scale + 0.5) + ph)) - adjust_joint_list.append(adjust_joint) - if mask is not None: - return dst, adjust_joint_list, mask - else: - return dst, adjust_joint_list, None - - return pose_resize_shortestedge(image, annos, mask, target_size) diff --git a/tensorlayer/rein.py b/tensorlayer/rein.py deleted file mode 100644 index 744f346..0000000 --- a/tensorlayer/rein.py +++ /dev/null @@ -1,161 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import numpy as np - -import tensorflow as tf - -from six.moves import xrange - -__all__ = [ - 'discount_episode_rewards', - 'cross_entropy_reward_loss', - 'log_weight', - 'choice_action_by_probs', -] - - -def discount_episode_rewards(rewards=None, gamma=0.99, mode=0): - """Take 1D float array of rewards and compute discounted rewards for an - episode. When encount a non-zero value, consider as the end a of an episode. - - Parameters - ---------- - rewards : list - List of rewards - gamma : float - Discounted factor - mode : int - Mode for computing the discount rewards. - - If mode == 0, reset the discount process when encount a non-zero reward (Ping-pong game). - - If mode == 1, would not reset the discount process. - - Returns - -------- - list of float - The discounted rewards. - - Examples - ---------- - >>> rewards = np.asarray([0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1]) - >>> gamma = 0.9 - >>> discount_rewards = tl.rein.discount_episode_rewards(rewards, gamma) - >>> print(discount_rewards) - [ 0.72899997 0.81 0.89999998 1. 0.72899997 0.81 - 0.89999998 1. 0.72899997 0.81 0.89999998 1. ] - >>> discount_rewards = tl.rein.discount_episode_rewards(rewards, gamma, mode=1) - >>> print(discount_rewards) - [ 1.52110755 1.69011939 1.87791049 2.08656716 1.20729685 1.34144104 - 1.49048996 1.65610003 0.72899997 0.81 0.89999998 1. ] - - """ - if rewards is None: - raise Exception("rewards should be a list") - discounted_r = np.zeros_like(rewards, dtype=np.float32) - running_add = 0 - for t in reversed(xrange(0, rewards.size)): - if mode == 0: - if rewards[t] != 0: running_add = 0 - - running_add = running_add * gamma + rewards[t] - discounted_r[t] = running_add - return discounted_r - - -def cross_entropy_reward_loss(logits, actions, rewards, name=None): - """Calculate the loss for Policy Gradient Network. - - Parameters - ---------- - logits : tensor - The network outputs without softmax. This function implements softmax inside. - actions : tensor or placeholder - The agent actions. - rewards : tensor or placeholder - The rewards. - - Returns - -------- - Tensor - The TensorFlow loss function. - - Examples - ---------- - >>> states_batch_pl = tf.placeholder(tf.float32, shape=[None, D]) - >>> network = InputLayer(states_batch_pl, name='input') - >>> network = DenseLayer(network, n_units=H, act=tf.nn.relu, name='relu1') - >>> network = DenseLayer(network, n_units=3, name='out') - >>> probs = network.outputs - >>> sampling_prob = tf.nn.softmax(probs) - >>> actions_batch_pl = tf.placeholder(tf.int32, shape=[None]) - >>> discount_rewards_batch_pl = tf.placeholder(tf.float32, shape=[None]) - >>> loss = tl.rein.cross_entropy_reward_loss(probs, actions_batch_pl, discount_rewards_batch_pl) - >>> train_op = tf.train.RMSPropOptimizer(learning_rate, decay_rate).minimize(loss) - - """ - cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=actions, logits=logits, name=name) - - return tf.reduce_sum(tf.multiply(cross_entropy, rewards)) - - -def log_weight(probs, weights, name='log_weight'): - """Log weight. - - Parameters - ----------- - probs : tensor - If it is a network output, usually we should scale it to [0, 1] via softmax. - weights : tensor - The weights. - - Returns - -------- - Tensor - The Tensor after appling the log weighted expression. - - """ - with tf.variable_scope(name): - exp_v = tf.reduce_mean(tf.log(probs) * weights) - return exp_v - - -def choice_action_by_probs(probs=(0.5, 0.5), action_list=None): - """Choice and return an an action by given the action probability distribution. - - Parameters - ------------ - probs : list of float. - The probability distribution of all actions. - action_list : None or a list of int or others - A list of action in integer, string or others. If None, returns an integer range between 0 and len(probs)-1. - - Returns - -------- - float int or str - The chosen action. - - Examples - ---------- - >>> for _ in range(5): - >>> a = choice_action_by_probs([0.2, 0.4, 0.4]) - >>> print(a) - 0 - 1 - 1 - 2 - 1 - >>> for _ in range(3): - >>> a = choice_action_by_probs([0.5, 0.5], ['a', 'b']) - >>> print(a) - a - b - b - - """ - if action_list is None: - n_action = len(probs) - action_list = np.arange(n_action) - else: - if len(action_list) != len(probs): - raise Exception("number of actions should equal to number of probabilities.") - return np.random.choice(action_list, p=probs) diff --git a/tensorlayer/third_party/__init__.py b/tensorlayer/third_party/__init__.py deleted file mode 100644 index df05229..0000000 --- a/tensorlayer/third_party/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- diff --git a/tensorlayer/third_party/roi_pooling/.gitignore b/tensorlayer/third_party/roi_pooling/.gitignore deleted file mode 100644 index 08030a8..0000000 --- a/tensorlayer/third_party/roi_pooling/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -.ipynb_checkpoints/ -build/ - diff --git a/tensorlayer/third_party/roi_pooling/README.md b/tensorlayer/third_party/roi_pooling/README.md deleted file mode 100644 index d597cea..0000000 --- a/tensorlayer/third_party/roi_pooling/README.md +++ /dev/null @@ -1,56 +0,0 @@ -# Hint from TensorLayer -- This implementation is from `https://github.com/deepsense-ai/roi-pooling`, date: 31 Aug 2017. -- To install this, you have to clone TensorLayer from Github instead of pip install. -- Remember to modify the `CUDA_LIB` in Makefile before running `python setup.py install` in this folder. -- Make sure `roi_pooling_example.py` and `test_roi_layer.py` is runable. - - ----- - - -## RoI pooling in TensorFlow - -This repo contains the implementation of **Region of Interest pooling** as a custom TensorFlow operation. The CUDA code responsible for the computations was largely taken from the original [Caffe implementation by Ross Girshick](https://github.com/rbgirshick/fast-rcnn). - -For more information about RoI pooling you can check out [Region of interest pooling explained](https://deepsense.io/region-of-interest-pooling-explained/) at our [deepsense.io](https://deepsense.io/) blog. - -![Region of Interest Pooling animation](roi_pooling_animation.gif) - - -## Requirements - -To compile and use `roi_pooling` layer you need to have: - -* [CUDA](https://developer.nvidia.com/cuda-toolkit) (tested with 8.0) -* [https://www.tensorflow.org/](TensorFlow) (tested with 0.12.0 and 1.0.0) - -Only official TensorFlow releases are currently supported. If you're using a custom built TensorFlow compiled with a different GCC version (e.g. 5.X) you may need to modify the makefile to [enable the new ABI version](https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html). - - -## Install - -Since it uses compilation - -```bash -$ git clone git@github.com:deepsense-io/roi-pooling.git -$ cd roi-pooling -$ python setup.py install -``` - -Right now we provide only GPU implementation (no CPU at this time). - - -## Usage - -After successful installation you can use the operation like this: - -```python -from roi_pooling.roi_pooling_ops import roi_pooling - -# here obtain feature map and regions of interest -rpooling = roi_pooling(feature_map, rois, 7, 7) -# continue the model -``` - -Working example in Jupyter Notebook: [examples/roi_pooling_minimal_example.ipynb](https://github.com/deepsense-io/roi-pooling/blob/master/examples/roi_pooling_minimal_example.ipynb) - diff --git a/tensorlayer/third_party/roi_pooling/examples/__init__.py b/tensorlayer/third_party/roi_pooling/examples/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tensorlayer/third_party/roi_pooling/examples/roi_pooling_minimal_example.ipynb b/tensorlayer/third_party/roi_pooling/examples/roi_pooling_minimal_example.ipynb deleted file mode 100644 index c1edc35..0000000 --- a/tensorlayer/third_party/roi_pooling/examples/roi_pooling_minimal_example.ipynb +++ /dev/null @@ -1,148 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "* blog post: [Region of interest pooling explained - deepsense.io](https://deepsense.io/region-of-interest-pooling-explained/)\n", - "* repository: [deepsense-io/roi-pooling](https://github.com/deepsense-io/roi-pooling)" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "from __future__ import print_function\n", - "\n", - "import tensorflow as tf\n", - "import numpy as np\n", - "\n", - "from roi_pooling.roi_pooling_ops import roi_pooling" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# 4x4 feature map with only 1 channel\n", - "input_value = [[\n", - " [[1], [2], [4], [4]],\n", - " [[3], [4], [1], [2]],\n", - " [[6], [2], [1], [7]],\n", - " [[1], [3], [2], [8]]\n", - "]]\n", - "input_value = np.asarray(input_value, dtype='float32')" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# regions of interest as lists of:\n", - "# feature map index, upper left, bottom right coordinates\n", - "rois_value = [\n", - " [0, 0, 0, 1, 3],\n", - " [0, 2, 2, 3, 3],\n", - " [0, 1, 0, 3, 2]\n", - "]\n", - "rois_value = np.asarray(rois_value, dtype='int32')\n", - "\n", - "# in this case we have 3 RoI pooling operations:\n", - "# * channel 0, rectangular region (0, 0) to (1, 3)\n", - "# xx..\n", - "# xx..\n", - "# xx..\n", - "# xx..\n", - "#\n", - "# * channel 0, rectangular region (2, 2) to (3, 3)\n", - "# ....\n", - "# ....\n", - "# ..xx\n", - "# ..xx\n", - "# * channel 0, rectangular region (1, 0) to (3, 2)\n", - "# ....\n", - "# xxx.\n", - "# xxx.\n", - "# xxx." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[[[ 3. 4.]\n", - " [ 6. 3.]]]\n", - "\n", - "\n", - " [[[ 1. 7.]\n", - " [ 2. 8.]]]\n", - "\n", - "\n", - " [[[ 4. 4.]\n", - " [ 4. 7.]]]]\n" - ] - } - ], - "source": [ - "input_featuremap = tf.placeholder(tf.float32)\n", - "rois = tf.placeholder(tf.int32)\n", - "input_const = tf.constant(input_value, tf.float32)\n", - "rois_const = tf.constant(rois_value, tf.int32)\n", - "y = roi_pooling(input_const, rois_const, pool_height=2, pool_width=2)\n", - "\n", - "with tf.Session('') as sess:\n", - " y_output = sess.run(y, feed_dict={input_featuremap: input_value, rois: rois_value})\n", - " print(y_output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/tensorlayer/third_party/roi_pooling/roi_pooling/Makefile b/tensorlayer/third_party/roi_pooling/roi_pooling/Makefile deleted file mode 100644 index db9de78..0000000 --- a/tensorlayer/third_party/roi_pooling/roi_pooling/Makefile +++ /dev/null @@ -1,18 +0,0 @@ -TF_INC = $(shell python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())') -CUDA_LIB = /usr/local/cuda-8.0/lib64 - -all: clean build test - -build: roi_pooling.so - -roi_pooling.cu.o: roi_pooling.cu.cc - nvcc -std=c++11 -c -o $@ $? -I $(TF_INC) -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -D _GLIBCXX_USE_CXX11_ABI=0 - -roi_pooling.so: roi_pooling.cc roi_pooling.cu.o - g++ -std=c++11 -shared -o $@ $? -I $(TF_INC) -fPIC -lcudart -L$(CUDA_LIB) -D _GLIBCXX_USE_CXX11_ABI=0 - -test: build - python roi_pooling_test.py - -clean: - rm -f *.o *.so *.pyc *.npy diff --git a/tensorlayer/third_party/roi_pooling/roi_pooling/__init__.py b/tensorlayer/third_party/roi_pooling/roi_pooling/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling.cc b/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling.cc deleted file mode 100644 index d1f123d..0000000 --- a/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling.cc +++ /dev/null @@ -1,162 +0,0 @@ -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/op_kernel.h" -#include -#include -#include - -using namespace tensorflow; -using namespace std; - -REGISTER_OP("RoiPooling") -.Input("input: float32") -.Input("rois: int32") -.Attr("pool_height: int") -.Attr("pool_width: int") -.Output("output: float32") -.Output("argmax_output: int32"); - - -#define Dtype float - -void RoiPoolingKernelLauncher(const float* input, const int* rois, int n_rois, int channels, int height, int width, - int pooled_height, int pooled_width, Dtype* output, int* argmax_output); - -// IMPORTANT(maciek): need info about storage of the data in memory, assumed something but need the docs confirming it - -class RoiPoolingOp : public OpKernel { - private: - int pool_height_, pool_width_; - public: - explicit RoiPoolingOp(OpKernelConstruction* context) : OpKernel(context) { - OP_REQUIRES_OK(context, - context->GetAttr("pool_height", &pool_height_)); - - OP_REQUIRES_OK(context, - context->GetAttr("pool_width", &pool_width_)); - } - - - void Compute(OpKernelContext* context) override { - // Grab the input tensor - const Tensor& input_tensor = context->input(0); - const Tensor& rois_tensor = context->input(1); - - auto input = input_tensor.flat(); - auto rois = rois_tensor.flat(); - - // Create an output tensor - Tensor* output_tensor = NULL; - Tensor* argmax_output_tensor = NULL; - - auto input_shape = input_tensor.shape(); - auto rois_shape = rois_tensor.shape(); - - int n_rois = rois_shape.dim_size(0); - int height = input_shape.dim_size(1); - int width = input_shape.dim_size(2); - int channels = input_shape.dim_size(3); - - TensorShape output_shape = TensorShape({static_cast(n_rois), - static_cast(channels), - static_cast(pool_height_), - static_cast(pool_width_)}); - - OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, - &output_tensor)); - - OP_REQUIRES_OK(context, context->allocate_output(1, output_shape, - &argmax_output_tensor)); - - auto output = output_tensor->template flat(); - auto argmax_output = argmax_output_tensor->template flat(); - - RoiPoolingKernelLauncher(input.data(), rois.data(), - n_rois, channels, - height, width, - pool_height_, pool_width_, - output.data(), argmax_output.data()); - } -}; - -REGISTER_KERNEL_BUILDER(Name("RoiPooling").Device(DEVICE_GPU), RoiPoolingOp); - -///////////// RoiPoolingGrad - - -REGISTER_OP("RoiPoolingGrad") -.Input("orig_input: float32") -.Input("orig_rois: int32") -.Input("orig_output: float32") -.Input("orig_argmax_output: int32") -.Input("orig_output_grad: float32") -.Attr("pool_height: int") -.Attr("pool_width: int") -.Output("output: float32") -.Doc(R"doc( - region of interest pooling grad -)doc"); - -#define Dtype float -void RoiPoolingGradKernelLauncher(const Dtype* orig_input, const int* orig_rois, - int mb_size, - int n_rois, int channels, int height, int width, - int pooled_height, int pooled_width, - const Dtype* orig_output, const int* orig_argmax_output, - const Dtype* orig_output_grad, - Dtype* output); - -// IMPORTANT(maciek): need info about storage of the data in memory, assumed something but need the docs confirming it - -class RoiPoolingGradOp : public OpKernel { - private: - int pool_height_, pool_width_; - public: - explicit RoiPoolingGradOp(OpKernelConstruction* context) : OpKernel(context) { - OP_REQUIRES_OK(context, - context->GetAttr("pool_height", &pool_height_)); - - OP_REQUIRES_OK(context, - context->GetAttr("pool_width", &pool_width_)); - } - - - void Compute(OpKernelContext* context) override { - // Grab the input tensor - const Tensor& orig_input_tensor = context->input(0); - const Tensor& orig_rois_tensor = context->input(1); - const Tensor& orig_output_tensor = context->input(2); - const Tensor& orig_argmax_output_tensor = context->input(3); - const Tensor& orig_output_grad_tensor = context->input(4); - - auto orig_input = orig_input_tensor.flat(); - auto orig_rois = orig_rois_tensor.flat(); - auto orig_output = orig_output_tensor.flat(); - auto orig_argmax_output = orig_argmax_output_tensor.flat(); - auto orig_output_grad = orig_output_grad_tensor.flat(); - - // Create an output tensor - Tensor* output_tensor = NULL; - auto orig_input_shape = orig_input_tensor.shape(); - auto orig_rois_shape = orig_rois_tensor.shape(); - auto grads_shape = orig_input_shape; - - int mb_size = orig_input_shape.dim_size(0); - int n_rois = orig_rois_shape.dim_size(0); - int height = orig_input_shape.dim_size(1); - int width = orig_input_shape.dim_size(2); - int channels = orig_input_shape.dim_size(3); - - OP_REQUIRES_OK(context, context->allocate_output(0, grads_shape, - &output_tensor)); - - auto output = output_tensor->template flat(); - - // Call the cuda kernel launcher - RoiPoolingGradKernelLauncher(orig_input.data(), orig_rois.data(), - mb_size, n_rois, channels, height, width, pool_height_, pool_width_, - orig_output.data(), orig_argmax_output.data(), orig_output_grad.data(), output.data()); - } -}; - - -REGISTER_KERNEL_BUILDER(Name("RoiPoolingGrad").Device(DEVICE_GPU), RoiPoolingGradOp); diff --git a/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling.cu.cc b/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling.cu.cc deleted file mode 100644 index bbacb55..0000000 --- a/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling.cu.cc +++ /dev/null @@ -1,214 +0,0 @@ -#if GOOGLE_CUDA - -#include -#include -#define EIGEN_USE_GPU -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" - -// CUDA: index helpers -#define idx4_4(index, d1, d2, d3, d4) (index % d4) -#define idx4_3(index, d1, d2, d3, d4) ((index / d4) % d3) -#define idx4_2(index, d1, d2, d3, d4) ((index / d4 / d3) % d2) -#define idx4_1(index, d1, d2, d3, d4) ((index / d4 / d3 / d2) %d1) - -// CUDA: various checks for different function calls. -#define CUDA_CHECK(condition) \ - /* Code block avoids redefinition of cudaError_t error */ \ - do { \ - cudaError_t error = condition; \ - if (error != cudaSuccess) { \ - return 1; \ - } \ - } while (0) - -// CUDA: grid stride looping -#define CUDA_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; \ - i < (n); \ - i += blockDim.x * gridDim.x) - -// CUDA: use 512 threads per block -const int CAFFE_CUDA_NUM_THREADS = 512; - -// CUDA: number of blocks for threads. -inline int CAFFE_GET_BLOCKS(const int N) { - // TODO rewrite this part to be consistent with tf conventions - int optimal_number_of_blocks = (N + CAFFE_CUDA_NUM_THREADS - 1) / CAFFE_CUDA_NUM_THREADS; - int max_number_of_blocks = 65000; - return std::min(optimal_number_of_blocks, max_number_of_blocks); -} - - -#define Dtype float - -__global__ void RoiPoolingKernel(const Dtype* input, const int* rois, - int n_rois, int channels, int height, int width, - int pooled_height, int pooled_width, - Dtype* output, int* argmax_output) { - int output_size = n_rois * channels * pooled_height * pooled_width; - - CUDA_KERNEL_LOOP(index, output_size) { - // (n, c, ph, pw) is an element in the pooled output - int pw = idx4_4(index, n_rois, channels, pooled_height, pooled_width); - int ph = idx4_3(index, n_rois, channels, pooled_height, pooled_width); - int c = idx4_2(index, n_rois, channels, pooled_height, pooled_width); - int n = idx4_1(index, n_rois, channels, pooled_height, pooled_width); - - auto bottom_rois_act = rois + n * 5; - - int roi_batch_ind = bottom_rois_act[0]; - int roi_start_w = bottom_rois_act[1]; - int roi_start_h = bottom_rois_act[2]; - int roi_end_w = bottom_rois_act[3]; - int roi_end_h = bottom_rois_act[4]; - - // Force malformed ROIs to be 1x1 - // NOTE(maciek): roi_start, roi_end seems to be inclusive - int roi_width = max(roi_end_w - roi_start_w + 1, 1); - int roi_height = max(roi_end_h - roi_start_h + 1, 1); - - // divide the ROIs into smaller regions for max pooling - Dtype bin_size_h = static_cast(roi_height) / static_cast(pooled_height); - Dtype bin_size_w = static_cast(roi_width) / static_cast(pooled_width); - - // compute the precise coordinates of each pooling subregion of the ROIs - int hstart = static_cast(floor(static_cast(ph) * bin_size_h)); - int wstart = static_cast(floor(static_cast(pw) * bin_size_w)); - int hend = static_cast(ceil(static_cast(ph + 1) * bin_size_h)); - int wend = static_cast(ceil(static_cast(pw + 1) * bin_size_w)); - - // Add roi offsets and clip to input boundaries - hstart = min(max(hstart + roi_start_h, 0), height); - hend = min(max(hend + roi_start_h, 0), height); - wstart = min(max(wstart + roi_start_w, 0), width); - wend = min(max(wend + roi_start_w, 0), width); - - //printf("%d %d %d %d %d %d %d %d\n", n, c, pw, ph, hstart, hend, wstart, wend); - - bool is_empty = (hend <= hstart) || (wend <= wstart); - - // Define an empty pooling region to be zero - - Dtype maxval = is_empty ? 0 : -999999999.0; - //Dtype maxval = is_empty ? 0 : -FLT_MAX; - // If nothing is pooled, argmax = -1 causes nothing to be backprop'd - - int maxidx = -1; - auto input_act = input + (roi_batch_ind * height * width * channels); - for (int h = hstart; h < hend; ++h) { - for (int w = wstart; w < wend; ++w) { - int bottom_index = (h * width + w) * channels + c; - - // bottom index is relative to 2d image only - if (input_act[bottom_index] > maxval) { - maxval = input_act[bottom_index]; - maxidx = bottom_index; - } - } - } - output[index] = maxval; - argmax_output[index] = maxidx; - } -} - - -void RoiPoolingKernelLauncher(const float* input, const int* rois, int n_rois, int channels, int height, int width, - int pooled_height, int pooled_width, Dtype* output, int* argmax_output) { - int out_size = n_rois * channels * pooled_height * pooled_width; - - RoiPoolingKernel<<>>(input, rois, n_rois, channels, height, width, - pooled_height, pooled_width, output, argmax_output); -} - - -/////////////// Grad -__global__ void RoiPoolingGradKernel(const Dtype* orig_input, const int* orig_rois, - int mb_size, - int n_rois, int channels, int height, int width, - int pooled_height, int pooled_width, - const Dtype* orig_output, const int* orig_argmax_output, - const Dtype* orig_output_grad, - Dtype* output) { - - int orig_input_size = mb_size * height * width * channels; - - CUDA_KERNEL_LOOP(index, orig_input_size) { - // (n, h, w, c) coords in bottom data - int c = idx4_4(index, mb_size, height, width, channels); - int w = idx4_3(index, mb_size, height, width, channels); - int h = idx4_2(index, mb_size, height, width, channels); - int n = idx4_1(index, mb_size, height, width, channels); - - Dtype gradient = 0; - // Accumulate gradient over all ROIs that pooled this element - for (int roi_n = 0; roi_n < n_rois; ++roi_n) { - const int* offset_bottom_rois = orig_rois + roi_n * 5; - int roi_batch_ind = offset_bottom_rois[0]; - // Skip if ROI's batch index doesn't match n - if (n != roi_batch_ind) { - continue; - } - - int roi_start_w = offset_bottom_rois[1]; - int roi_start_h = offset_bottom_rois[2]; - int roi_end_w = offset_bottom_rois[3]; - int roi_end_h = offset_bottom_rois[4]; - - // Skip if ROI doesn't include (h, w) - const bool in_roi = (w >= roi_start_w && w <= roi_end_w && - h >= roi_start_h && h <= roi_end_h); - if (!in_roi) { - continue; - } - - int offset = (roi_n * channels + c) * pooled_height * pooled_width; - const Dtype* offset_top_diff = orig_output_grad + offset; - const int* offset_argmax_data = orig_argmax_output + offset; - - // Compute feasible set of pooled units that could have pooled - // this bottom unit - - // Force malformed ROIs to be 1x1 - int roi_width = max(roi_end_w - roi_start_w + 1, 1); - int roi_height = max(roi_end_h - roi_start_h + 1, 1); - - Dtype bin_size_h = static_cast(roi_height) / static_cast(pooled_height); - Dtype bin_size_w = static_cast(roi_width) / static_cast(pooled_width); - - int phstart = floor(static_cast(h - roi_start_h) / bin_size_h); - int phend = ceil(static_cast(h - roi_start_h + 1) / bin_size_h); - int pwstart = floor(static_cast(w - roi_start_w) / bin_size_w); - int pwend = ceil(static_cast(w - roi_start_w + 1) / bin_size_w); - - phstart = min(max(phstart, 0), pooled_height); - phend = min(max(phend, 0), pooled_height); - pwstart = min(max(pwstart, 0), pooled_width); - pwend = min(max(pwend, 0), pooled_width); - - for (int ph = phstart; ph < phend; ++ph) { - for (int pw = pwstart; pw < pwend; ++pw) { - if (offset_argmax_data[ph * pooled_width + pw] == (h * width + w)) { - gradient += offset_top_diff[ph * pooled_width + pw]; - } - } - } - } - output[index] = gradient; - } - -} - -void RoiPoolingGradKernelLauncher(const Dtype* orig_input, const int* orig_rois, - int mb_size, - int n_rois, int channels, int height, int width, - int pooled_height, int pooled_width, - const Dtype* orig_output, const int* orig_argmax_output, - const Dtype* orig_output_grad, - Dtype* output) { - int out_size = mb_size * height * width * channels; - RoiPoolingGradKernel<<>>(orig_input, orig_rois, - mb_size, n_rois, channels, height, width, pooled_height, pooled_width, - orig_output, orig_argmax_output, orig_output_grad, output); -} - -#endif diff --git a/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling_ops.py b/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling_ops.py deleted file mode 100644 index 7cf2fb4..0000000 --- a/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling_ops.py +++ /dev/null @@ -1,52 +0,0 @@ -import os - -import tensorflow as tf -from tensorflow.python.framework import ops - -module_path = os.path.realpath(__file__) -module_dir = os.path.dirname(module_path) -lib_path = os.path.join(module_dir, 'roi_pooling.so') -roi_pooling_module = tf.load_op_library(lib_path) - - -def roi_pooling(input, rois, pool_height, pool_width): - """ - returns a tensorflow operation for computing the Region of Interest Pooling - - @arg input: feature maps on which to perform the pooling operation - @arg rois: list of regions of interest in the format (feature map index, upper left, bottom right) - @arg pool_width: size of the pooling sections - """ - # TODO(maciek): ops scope - out = roi_pooling_module.roi_pooling(input, rois, pool_height=pool_height, pool_width=pool_width) - output, argmax_output = out[0], out[1] - return output - - -@ops.RegisterGradient("RoiPooling") -def _RoiPoolingGrad(op, *grads): - orig_inputs = op.inputs[0] - orig_rois = op.inputs[1] - orig_output = op.outputs[0] - orig_argmax_output = op.outputs[1] - - orig_output_grad = grads[0] - output_grad = roi_pooling_module.roi_pooling_grad( - orig_inputs, orig_rois, orig_output, orig_argmax_output, orig_output_grad, - pool_height=op.get_attr('pool_height'), pool_width=op.get_attr('pool_width') - ) - return [output_grad, None] - - -@ops.RegisterShape("RoiPooling") -def _RoiPoolingShape(op): - input = op.inputs[0] - rois = op.inputs[1] - - n_rois = rois.get_shape()[0] - n_channels = input.get_shape()[3] - pool_height = op.get_attr('pool_height') - pool_width = op.get_attr('pool_width') - - #TODO: check the width/hegiht order - return [tf.TensorShape([n_rois, n_channels, pool_width, pool_height]), tf.TensorShape(None)] diff --git a/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling_test.py b/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling_test.py deleted file mode 100644 index 0fb7e5c..0000000 --- a/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling_test.py +++ /dev/null @@ -1,103 +0,0 @@ -import numpy as np -import tensorflow as tf - -from roi_pooling_ops import roi_pooling - - -class RoiPoolingTest(tf.test.TestCase): - # TODO(maciek): add python, implementation and test outputs - # TODO(maciek): test pool_height != pool_width, height != width - - def test_roi_pooling_grad(self): - # TODO(maciek): corner cases - input_value = [[[[1], [2], [4], [4]], [[3], [4], [1], [2]], [[6], [2], [1], [7.0]], [[1], [3], [2], [8]]]] - input_value = np.asarray(input_value, dtype='float32') - - rois_value = [[0, 0, 0, 1, 1], [0, 1, 1, 2, 2], [0, 2, 2, 3, 3], [0, 0, 0, 2, 2], [0, 0, 0, 3, 3]] - rois_value = np.asarray(rois_value, dtype='int32') - - with tf.compat.v1.Session(''): - # NOTE(maciek): looks like we have to use consts here, based on tensorflow/python/ops/nn_test.py - input_const = tf.constant(input_value, tf.float32) - rois_const = tf.constant(rois_value, tf.int32) - y = roi_pooling(input_const, rois_const, pool_height=2, pool_width=2) - mean = tf.reduce_mean(input_tensor=y) - - numerical_grad_error_1 = tf.compat.v1.test.compute_gradient_error( - [input_const], [input_value.shape], y, [5, 2, 2, 1] - ) - - numerical_grad_error_2 = tf.compat.v1.test.compute_gradient_error( - [input_const], [input_value.shape], mean, [] - ) - - self.assertLess(numerical_grad_error_1, 1e-4) - self.assertLess(numerical_grad_error_2, 1e-4) - - def test_shape_inference_1(self): - pooled_w, pooled_h = 2, 2 - input_w, input_h = 200, 200 - n_channels = 3 - n_batches = None - input = tf.compat.v1.placeholder(tf.float32, shape=[n_batches, input_w, input_h, n_channels]) - - n_rois = None - single_roi_dimension = 5 - rois = tf.compat.v1.placeholder(tf.int32, shape=[n_rois, single_roi_dimension]) - - y = roi_pooling(input, rois, pool_height=pooled_w, pool_width=pooled_h) - - self.assertEqual(y.get_shape().ndims, 4) - self.assertIs(y.get_shape()[0].value, n_rois) - self.assertIs(y.get_shape()[1].value, n_channels) - self.assertIs(y.get_shape()[2].value, pooled_h) - self.assertIs(y.get_shape()[3].value, pooled_w) - - def test_shape_inference_2(self): - pooled_w, pooled_h = 3, 4 - input_w, input_h = 200, 300 - n_channels = 3 - n_batches = None - input = tf.compat.v1.placeholder(tf.float32, shape=[n_batches, input_w, input_h, n_channels]) - - n_rois = None - single_roi_dimension = 5 - rois = tf.compat.v1.placeholder(tf.int32, shape=[n_rois, single_roi_dimension]) - - y = roi_pooling(input, rois, pool_height=pooled_w, pool_width=pooled_h) - - self.assertEqual(y.get_shape().ndims, 4) - self.assertIs(y.get_shape()[0].value, n_rois) - self.assertIs(y.get_shape()[1].value, n_channels) - self.assertIs(y.get_shape()[2].value, pooled_h) - self.assertIs(y.get_shape()[3].value, pooled_w) - - def test_very_big_output(self): - """ - This test checks whether the layer can handle a corner case - where the number of output pixels is very large, possibly larger - than the number of available GPU threads - """ - - pooled_w, pooled_h = 7, 7 - input_w, input_h = 72, 240 - n_channels = 512 - n_batches = 2 - x_input = np.ones(shape=(n_batches, input_w, input_h, n_channels)) - n_rois = 5000 - rois_input = np.ones(shape=(n_rois, 5)) - - input = tf.compat.v1.placeholder(tf.float32, shape=[n_batches, input_w, input_h, n_channels]) - single_roi_dimension = 5 - rois = tf.compat.v1.placeholder(tf.int32, shape=[n_rois, single_roi_dimension]) - - y = roi_pooling(input, rois, pool_height=pooled_w, pool_width=pooled_h) - - with tf.compat.v1.Session('') as sess: - y_output = sess.run(y, feed_dict={input: x_input, rois: rois_input}) - - self.assertTrue(np.all(y_output == 1)) - - -if __name__ == '__main__': - tf.test.main() diff --git a/tensorlayer/third_party/roi_pooling/roi_pooling_animation.gif b/tensorlayer/third_party/roi_pooling/roi_pooling_animation.gif deleted file mode 100644 index 9d35d21..0000000 Binary files a/tensorlayer/third_party/roi_pooling/roi_pooling_animation.gif and /dev/null differ diff --git a/tensorlayer/third_party/roi_pooling/roi_pooling_example.py b/tensorlayer/third_party/roi_pooling/roi_pooling_example.py deleted file mode 100644 index 50c497a..0000000 --- a/tensorlayer/third_party/roi_pooling/roi_pooling_example.py +++ /dev/null @@ -1,52 +0,0 @@ -from __future__ import print_function - -import numpy as np -import tensorflow as tf - -from roi_pooling.roi_pooling_ops import roi_pooling - -# input feature map going into the RoI pooling -input_value = [[[[1], [2], [4], [4]], [[3], [4], [1], [2]], [[6], [2], [1], [7.0]], [[1], [3], [2], [8]]]] -input_value = np.asarray(input_value, dtype='float32') - -# Regions of interest as lists of: -# feature map index, upper left, bottom right coordinates -rois_value = [[0, 0, 0, 1, 1], [0, 1, 1, 2, 2], [0, 2, 2, 3, 3], [0, 0, 0, 2, 2], [0, 0, 0, 3, 3]] -rois_value = np.asarray(rois_value, dtype='int32') - -# the pool_height and width are parameters of the ROI layer -pool_height, pool_width = (2, 2) -n_rois = len(rois_value) -y_shape = [n_rois, 1, pool_height, pool_width] - -print('Input: ', input_value, ', shape: ', input_value.shape) -print('ROIs: ', rois_value, ', shape: ', rois_value.shape) - -# precise semantics is now only defined by the kernel, need tests -input = tf.compat.v1.placeholder(tf.float32) -rois = tf.compat.v1.placeholder(tf.int32) - -y = roi_pooling(input, rois, pool_height=2, pool_width=2) -mean = tf.reduce_mean(input_tensor=y) - -grads = tf.gradients(mean, input) -print(type(grads)) -print(len(grads)) -print(grads) -print(input_value.shape) - -with tf.compat.v1.Session('') as sess: - input_const = tf.constant(input_value, tf.float32) - rois_const = tf.constant(rois_value, tf.int32) - y = roi_pooling(input_const, rois_const, pool_height=2, pool_width=2) - mean = tf.reduce_mean(input_tensor=y) - - numerical_grad_error_1 = tf.compat.v1.test.compute_gradient_error([input_const], [input_value.shape], y, y_shape) - numerical_grad_error_2 = tf.compat.v1.test.compute_gradient_error([input_const], [input_value.shape], mean, []) - print(numerical_grad_error_1, numerical_grad_error_2) - -with tf.compat.v1.Session('') as sess: - y_output = sess.run(y, feed_dict={input: input_value, rois: rois_value}) - print('y: ', y_output) - grads_output = sess.run(grads, feed_dict={input: input_value, rois: rois_value}) - print('grads: ', grads_output) diff --git a/tensorlayer/third_party/roi_pooling/setup.py b/tensorlayer/third_party/roi_pooling/setup.py deleted file mode 100644 index a0c2d8b..0000000 --- a/tensorlayer/third_party/roi_pooling/setup.py +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env python - -from __future__ import print_function - -import subprocess -import sys -from distutils.command.install import install as DistutilsInstall -from distutils.core import setup - -try: - import tensorflow -except ImportError: - print("Please install tensorflow 0.12.0 or later") - sys.exit() - - -class MyInstall(DistutilsInstall): - - def run(self): - subprocess.call(['make', '-C', 'roi_pooling', 'build']) - DistutilsInstall.run(self) - - -setup( - name='roi-pooling', version='1.0', description='ROI pooling as a custom TensorFlow operation', - author='deepsense.io', packages=['roi_pooling'], package_data={'roi_pooling': - ['roi_pooling.so']}, cmdclass={'install': MyInstall} -) diff --git a/tensorlayer/third_party/roi_pooling/test_roi_layer.py b/tensorlayer/third_party/roi_pooling/test_roi_layer.py deleted file mode 100644 index 8a26032..0000000 --- a/tensorlayer/third_party/roi_pooling/test_roi_layer.py +++ /dev/null @@ -1,53 +0,0 @@ -from tensorlayer.layers import * -from tensorlayer.third_party.roi_pooling.roi_pooling.roi_pooling_ops import roi_pooling - -# from roi_pooling.roi_pooling_ops import roi_pooling - -# input feature map going into the RoI pooling -input_value = [[[[1], [2], [4], [4]], [[3], [4], [1], [2]], [[6], [2], [1], [7.0]], [[1], [3], [2], [8]]]] -input_value = np.asarray(input_value, dtype='float32') - -# Regions of interest as lists of: -# feature map index, upper left, bottom right coordinates -rois_value = [[0, 0, 0, 1, 1], [0, 1, 1, 2, 2], [0, 2, 2, 3, 3], [0, 0, 0, 2, 2], [0, 0, 0, 3, 3]] -rois_value = np.asarray(rois_value, dtype='int32') - -# the pool_height and width are parameters of the ROI layer -pool_height, pool_width = (2, 2) -n_rois = len(rois_value) -y_shape = [n_rois, 1, pool_height, pool_width] - -print('Input: ', input_value, ', shape: ', input_value.shape) -print('ROIs: ', rois_value, ', shape: ', rois_value.shape) - -# precise semantics is now only defined by the kernel, need tests -input = tf.compat.v1.placeholder(tf.float32) -rois = tf.compat.v1.placeholder(tf.int32) - -# y = roi_pooling(input, rois, pool_height=2, pool_width=2) -n = InputLayer(input, name='in') -n = ROIPoolingLayer(n, rois=rois, pool_height=2, pool_width=2, name='roi') -y = n.outputs -mean = tf.reduce_mean(input_tensor=y) - -grads = tf.gradients(mean, input) -print(type(grads)) -print(len(grads)) -print(grads) -print(input_value.shape) - -with tf.compat.v1.Session('') as sess: - input_const = tf.constant(input_value, tf.float32) - rois_const = tf.constant(rois_value, tf.int32) - y = roi_pooling(input_const, rois_const, pool_height=2, pool_width=2) - mean = tf.reduce_mean(input_tensor=y) - - numerical_grad_error_1 = tf.compat.v1.test.compute_gradient_error([input_const], [input_value.shape], y, y_shape) - numerical_grad_error_2 = tf.compat.v1.test.compute_gradient_error([input_const], [input_value.shape], mean, []) - print(numerical_grad_error_1, numerical_grad_error_2) - -with tf.compat.v1.Session('') as sess: - y_output = sess.run(y, feed_dict={input: input_value, rois: rois_value}) - print('y: ', y_output) - grads_output = sess.run(grads, feed_dict={input: input_value, rois: rois_value}) - print('grads: ', grads_output) diff --git a/tensorlayer/utils.py b/tensorlayer/utils.py deleted file mode 100644 index 85305a7..0000000 --- a/tensorlayer/utils.py +++ /dev/null @@ -1,656 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import os - -import sys -from sys import exit as _exit -from sys import platform as _platform - -import random -import subprocess -import time - -from collections import Counter - -import numpy as np - -from sklearn.metrics import accuracy_score -from sklearn.metrics import confusion_matrix -from sklearn.metrics import f1_score - -import tensorflow as tf -import tensorlayer as tl - -__all__ = [ - 'fit', - 'test', - 'predict', - 'evaluation', - 'dict_to_one', - 'flatten_list', - 'class_balancing_oversample', - 'get_random_int', - 'list_string_to_dict', - 'exit_tensorflow', - 'open_tensorboard', - 'clear_all_placeholder_variables', - 'set_gpu_fraction', -] - - -def fit( - sess, network, train_op, cost, X_train, y_train, x, y_, acc=None, batch_size=100, n_epoch=100, print_freq=5, - X_val=None, y_val=None, eval_train=True, tensorboard_dir=None, tensorboard_epoch_freq=5, - tensorboard_weight_histograms=True, tensorboard_graph_vis=True -): - """Training a given non time-series network by the given cost function, training data, batch_size, n_epoch etc. - - - MNIST example click `here `_. - - In order to control the training details, the authors HIGHLY recommend ``tl.iterate`` see two MNIST examples `1 `_, `2 `_. - - Parameters - ---------- - sess : Session - TensorFlow Session. - network : TensorLayer layer - the network to be trained. - train_op : TensorFlow optimizer - The optimizer for training e.g. tf.train.AdamOptimizer. - X_train : numpy.array - The input of training data - y_train : numpy.array - The target of training data - x : placeholder - For inputs. - y_ : placeholder - For targets. - acc : TensorFlow expression or None - Metric for accuracy or others. If None, would not print the information. - batch_size : int - The batch size for training and evaluating. - n_epoch : int - The number of training epochs. - print_freq : int - Print the training information every ``print_freq`` epochs. - X_val : numpy.array or None - The input of validation data. If None, would not perform validation. - y_val : numpy.array or None - The target of validation data. If None, would not perform validation. - eval_train : boolean - Whether to evaluate the model during training. - If X_val and y_val are not None, it reflects whether to evaluate the model on training data. - tensorboard_dir : string - path to log dir, if set, summary data will be stored to the tensorboard_dir/ directory for visualization with tensorboard. (default None) - Also runs `tl.layers.initialize_global_variables(sess)` internally in fit() to setup the summary nodes. - tensorboard_epoch_freq : int - How many epochs between storing tensorboard checkpoint for visualization to log/ directory (default 5). - tensorboard_weight_histograms : boolean - If True updates tensorboard data in the logs/ directory for visualization - of the weight histograms every tensorboard_epoch_freq epoch (default True). - tensorboard_graph_vis : boolean - If True stores the graph in the tensorboard summaries saved to log/ (default True). - - Examples - -------- - See `tutorial_mnist_simple.py `_ - - >>> tl.utils.fit(sess, network, train_op, cost, X_train, y_train, x, y_, - ... acc=acc, batch_size=500, n_epoch=200, print_freq=5, - ... X_val=X_val, y_val=y_val, eval_train=False) - >>> tl.utils.fit(sess, network, train_op, cost, X_train, y_train, x, y_, - ... acc=acc, batch_size=500, n_epoch=200, print_freq=5, - ... X_val=X_val, y_val=y_val, eval_train=False, - ... tensorboard=True, tensorboard_weight_histograms=True, tensorboard_graph_vis=True) - - Notes - -------- - If tensorboard_dir not None, the `global_variables_initializer` will be run inside the fit function - in order to initialize the automatically generated summary nodes used for tensorboard visualization, - thus `tf.global_variables_initializer().run()` before the `fit()` call will be undefined. - - """ - if X_train.shape[0] < batch_size: - raise AssertionError("Number of training examples should be bigger than the batch size") - - if tensorboard_dir is not None: - tl.logging.info("Setting up tensorboard ...") - #Set up tensorboard summaries and saver - tl.files.exists_or_mkdir(tensorboard_dir) - - #Only write summaries for more recent TensorFlow versions - if hasattr(tf, 'summary') and hasattr(tf.summary, 'FileWriter'): - if tensorboard_graph_vis: - train_writer = tf.summary.FileWriter(tensorboard_dir + '/train', sess.graph) - val_writer = tf.summary.FileWriter(tensorboard_dir + '/validation', sess.graph) - else: - train_writer = tf.summary.FileWriter(tensorboard_dir + '/train') - val_writer = tf.summary.FileWriter(tensorboard_dir + '/validation') - - #Set up summary nodes - if (tensorboard_weight_histograms): - for param in network.all_params: - if hasattr(tf, 'summary') and hasattr(tf.summary, 'histogram'): - tl.logging.info('Param name %s' % param.name) - tf.summary.histogram(param.name, param) - - if hasattr(tf, 'summary') and hasattr(tf.summary, 'histogram'): - tf.summary.scalar('cost', cost) - - merged = tf.summary.merge_all() - - #Initalize all variables and summaries - tl.layers.initialize_global_variables(sess) - tl.logging.info("Finished! use `tensorboard --logdir=%s/` to start tensorboard" % tensorboard_dir) - - tl.logging.info("Start training the network ...") - start_time_begin = time.time() - tensorboard_train_index, tensorboard_val_index = 0, 0 - for epoch in range(n_epoch): - start_time = time.time() - loss_ep = 0 - n_step = 0 - for X_train_a, y_train_a in tl.iterate.minibatches(X_train, y_train, batch_size, shuffle=True): - feed_dict = {x: X_train_a, y_: y_train_a} - feed_dict.update(network.all_drop) # enable noise layers - loss, _ = sess.run([cost, train_op], feed_dict=feed_dict) - loss_ep += loss - n_step += 1 - loss_ep = loss_ep / n_step - - if tensorboard_dir is not None and hasattr(tf, 'summary'): - if epoch + 1 == 1 or (epoch + 1) % tensorboard_epoch_freq == 0: - for X_train_a, y_train_a in tl.iterate.minibatches(X_train, y_train, batch_size, shuffle=True): - dp_dict = dict_to_one(network.all_drop) # disable noise layers - feed_dict = {x: X_train_a, y_: y_train_a} - feed_dict.update(dp_dict) - result = sess.run(merged, feed_dict=feed_dict) - train_writer.add_summary(result, tensorboard_train_index) - tensorboard_train_index += 1 - if (X_val is not None) and (y_val is not None): - for X_val_a, y_val_a in tl.iterate.minibatches(X_val, y_val, batch_size, shuffle=True): - dp_dict = dict_to_one(network.all_drop) # disable noise layers - feed_dict = {x: X_val_a, y_: y_val_a} - feed_dict.update(dp_dict) - result = sess.run(merged, feed_dict=feed_dict) - val_writer.add_summary(result, tensorboard_val_index) - tensorboard_val_index += 1 - - if epoch + 1 == 1 or (epoch + 1) % print_freq == 0: - if (X_val is not None) and (y_val is not None): - tl.logging.info("Epoch %d of %d took %fs" % (epoch + 1, n_epoch, time.time() - start_time)) - if eval_train is True: - train_loss, train_acc, n_batch = 0, 0, 0 - for X_train_a, y_train_a in tl.iterate.minibatches(X_train, y_train, batch_size, shuffle=True): - dp_dict = dict_to_one(network.all_drop) # disable noise layers - feed_dict = {x: X_train_a, y_: y_train_a} - feed_dict.update(dp_dict) - if acc is not None: - err, ac = sess.run([cost, acc], feed_dict=feed_dict) - train_acc += ac - else: - err = sess.run(cost, feed_dict=feed_dict) - train_loss += err - n_batch += 1 - tl.logging.info(" train loss: %f" % (train_loss / n_batch)) - if acc is not None: - tl.logging.info(" train acc: %f" % (train_acc / n_batch)) - val_loss, val_acc, n_batch = 0, 0, 0 - for X_val_a, y_val_a in tl.iterate.minibatches(X_val, y_val, batch_size, shuffle=True): - dp_dict = dict_to_one(network.all_drop) # disable noise layers - feed_dict = {x: X_val_a, y_: y_val_a} - feed_dict.update(dp_dict) - if acc is not None: - err, ac = sess.run([cost, acc], feed_dict=feed_dict) - val_acc += ac - else: - err = sess.run(cost, feed_dict=feed_dict) - val_loss += err - n_batch += 1 - - tl.logging.info(" val loss: %f" % (val_loss / n_batch)) - - if acc is not None: - tl.logging.info(" val acc: %f" % (val_acc / n_batch)) - else: - tl.logging.info( - "Epoch %d of %d took %fs, loss %f" % (epoch + 1, n_epoch, time.time() - start_time, loss_ep) - ) - tl.logging.info("Total training time: %fs" % (time.time() - start_time_begin)) - - -def test(sess, network, acc, X_test, y_test, x, y_, batch_size, cost=None): - """ - Test a given non time-series network by the given test data and metric. - - Parameters - ---------- - sess : Session - TensorFlow session. - network : TensorLayer layer - The network. - acc : TensorFlow expression or None - Metric for accuracy or others. - - If None, would not print the information. - X_test : numpy.array - The input of testing data. - y_test : numpy array - The target of testing data - x : placeholder - For inputs. - y_ : placeholder - For targets. - batch_size : int or None - The batch size for testing, when dataset is large, we should use minibatche for testing; - if dataset is small, we can set it to None. - cost : TensorFlow expression or None - Metric for cost or others. If None, would not print the information. - - Examples - -------- - See `tutorial_mnist_simple.py `_ - - >>> tl.utils.test(sess, network, acc, X_test, y_test, x, y_, batch_size=None, cost=cost) - - """ - tl.logging.info('Start testing the network ...') - if batch_size is None: - dp_dict = dict_to_one(network.all_drop) - feed_dict = {x: X_test, y_: y_test} - feed_dict.update(dp_dict) - - if cost is not None: - tl.logging.info(" test loss: %f" % sess.run(cost, feed_dict=feed_dict)) - - test_acc = sess.run(acc, feed_dict=feed_dict) - tl.logging.info(" test acc: %f" % test_acc) - # tl.logging.info(" test acc: %f" % np.mean(y_test == sess.run(y_op, - # feed_dict=feed_dict))) - return test_acc - else: - test_loss, test_acc, n_batch = 0, 0, 0 - for X_test_a, y_test_a in tl.iterate.minibatches(X_test, y_test, batch_size, shuffle=True): - dp_dict = dict_to_one(network.all_drop) # disable noise layers - feed_dict = {x: X_test_a, y_: y_test_a} - feed_dict.update(dp_dict) - if cost is not None: - err, ac = sess.run([cost, acc], feed_dict=feed_dict) - test_loss += err - else: - ac = sess.run(acc, feed_dict=feed_dict) - test_acc += ac - n_batch += 1 - if cost is not None: - tl.logging.info(" test loss: %f" % (test_loss / n_batch)) - tl.logging.info(" test acc: %f" % (test_acc / n_batch)) - return test_acc / n_batch - - -def predict(sess, network, X, x, y_op, batch_size=None): - """ - Return the predict results of given non time-series network. - - Parameters - ---------- - sess : Session - TensorFlow Session. - network : TensorLayer layer - The network. - X : numpy.array - The inputs. - x : placeholder - For inputs. - y_op : placeholder - The argmax expression of softmax outputs. - batch_size : int or None - The batch size for prediction, when dataset is large, we should use minibatche for prediction; - if dataset is small, we can set it to None. - - Examples - -------- - See `tutorial_mnist_simple.py `_ - - >>> y = network.outputs - >>> y_op = tf.argmax(tf.nn.softmax(y), 1) - >>> print(tl.utils.predict(sess, network, X_test, x, y_op)) - - """ - if batch_size is None: - dp_dict = dict_to_one(network.all_drop) # disable noise layers - feed_dict = { - x: X, - } - feed_dict.update(dp_dict) - return sess.run(y_op, feed_dict=feed_dict) - else: - result = None - for X_a, _ in tl.iterate.minibatches(X, X, batch_size, shuffle=False): - dp_dict = dict_to_one(network.all_drop) - feed_dict = { - x: X_a, - } - feed_dict.update(dp_dict) - result_a = sess.run(y_op, feed_dict=feed_dict) - if result is None: - result = result_a - else: - result = np.concatenate((result, result_a)) - if result is None: - if len(X) % batch_size != 0: - dp_dict = dict_to_one(network.all_drop) - feed_dict = { - x: X[-(len(X) % batch_size):, :], - } - feed_dict.update(dp_dict) - result_a = sess.run(y_op, feed_dict=feed_dict) - result = result_a - else: - if len(X) != len(result) and len(X) % batch_size != 0: - dp_dict = dict_to_one(network.all_drop) - feed_dict = { - x: X[-(len(X) % batch_size):, :], - } - feed_dict.update(dp_dict) - result_a = sess.run(y_op, feed_dict=feed_dict) - result = np.concatenate((result, result_a)) - return result - - -## Evaluation -def evaluation(y_test=None, y_predict=None, n_classes=None): - """ - Input the predicted results, targets results and - the number of class, return the confusion matrix, F1-score of each class, - accuracy and macro F1-score. - - Parameters - ---------- - y_test : list - The target results - y_predict : list - The predicted results - n_classes : int - The number of classes - - Examples - -------- - >>> c_mat, f1, acc, f1_macro = tl.utils.evaluation(y_test, y_predict, n_classes) - - """ - c_mat = confusion_matrix(y_test, y_predict, labels=[x for x in range(n_classes)]) - f1 = f1_score(y_test, y_predict, average=None, labels=[x for x in range(n_classes)]) - f1_macro = f1_score(y_test, y_predict, average='macro') - acc = accuracy_score(y_test, y_predict) - tl.logging.info('confusion matrix: \n%s' % c_mat) - tl.logging.info('f1-score : %s' % f1) - tl.logging.info('f1-score(macro) : %f' % f1_macro) # same output with > f1_score(y_true, y_pred, average='macro') - tl.logging.info('accuracy-score : %f' % acc) - return c_mat, f1, acc, f1_macro - - -def dict_to_one(dp_dict): - """Input a dictionary, return a dictionary that all items are set to one. - - Used for disable dropout, dropconnect layer and so on. - - Parameters - ---------- - dp_dict : dictionary - The dictionary contains key and number, e.g. keeping probabilities. - - Examples - -------- - >>> dp_dict = dict_to_one( network.all_drop ) - >>> dp_dict = dict_to_one( network.all_drop ) - >>> feed_dict.update(dp_dict) - - """ - return {x: 1 for x in dp_dict} - - -def flatten_list(list_of_list): - """Input a list of list, return a list that all items are in a list. - - Parameters - ---------- - list_of_list : a list of list - - Examples - -------- - >>> tl.utils.flatten_list([[1, 2, 3],[4, 5],[6]]) - [1, 2, 3, 4, 5, 6] - - """ - return sum(list_of_list, []) - - -def class_balancing_oversample(X_train=None, y_train=None, printable=True): - """Input the features and labels, return the features and labels after oversampling. - - Parameters - ---------- - X_train : numpy.array - The inputs. - y_train : numpy.array - The targets. - - Examples - -------- - One X - - >>> X_train, y_train = class_balancing_oversample(X_train, y_train, printable=True) - - Two X - - >>> X, y = tl.utils.class_balancing_oversample(X_train=np.hstack((X1, X2)), y_train=y, printable=False) - >>> X1 = X[:, 0:5] - >>> X2 = X[:, 5:] - - """ - # ======== Classes balancing - if printable: - tl.logging.info("Classes balancing for training examples...") - - c = Counter(y_train) - - if printable: - tl.logging.info('the occurrence number of each stage: %s' % c.most_common()) - tl.logging.info('the least stage is Label %s have %s instances' % c.most_common()[-1]) - tl.logging.info('the most stage is Label %s have %s instances' % c.most_common(1)[0]) - - most_num = c.most_common(1)[0][1] - - if printable: - tl.logging.info('most num is %d, all classes tend to be this num' % most_num) - - locations = {} - number = {} - - for lab, num in c.most_common(): # find the index from y_train - number[lab] = num - locations[lab] = np.where(np.array(y_train) == lab)[0] - if printable: - tl.logging.info('convert list(np.array) to dict format') - X = {} # convert list to dict - for lab, num in number.items(): - X[lab] = X_train[locations[lab]] - - # oversampling - if printable: - tl.logging.info('start oversampling') - for key in X: - temp = X[key] - while True: - if len(X[key]) >= most_num: - break - X[key] = np.vstack((X[key], temp)) - if printable: - tl.logging.info('first features of label 0 > %d' % len(X[0][0])) - tl.logging.info('the occurrence num of each stage after oversampling') - for key in X: - tl.logging.info("%s %d" % (key, len(X[key]))) - if printable: - tl.logging.info('make each stage have same num of instances') - for key in X: - X[key] = X[key][0:most_num, :] - tl.logging.info("%s %d" % (key, len(X[key]))) - - # convert dict to list - if printable: - tl.logging.info('convert from dict to list format') - y_train = [] - X_train = np.empty(shape=(0, len(X[0][0]))) - for key in X: - X_train = np.vstack((X_train, X[key])) - y_train.extend([key for i in range(len(X[key]))]) - # tl.logging.info(len(X_train), len(y_train)) - c = Counter(y_train) - if printable: - tl.logging.info('the occurrence number of each stage after oversampling: %s' % c.most_common()) - # ================ End of Classes balancing - return X_train, y_train - - -## Random -def get_random_int(min_v=0, max_v=10, number=5, seed=None): - """Return a list of random integer by the given range and quantity. - - Parameters - ----------- - min_v : number - The minimum value. - max_v : number - The maximum value. - number : int - Number of value. - seed : int or None - The seed for random. - - Examples - --------- - >>> r = get_random_int(min_v=0, max_v=10, number=5) - [10, 2, 3, 3, 7] - - """ - rnd = random.Random() - if seed: - rnd = random.Random(seed) - # return [random.randint(min,max) for p in range(0, number)] - return [rnd.randint(min_v, max_v) for p in range(0, number)] - - -def list_string_to_dict(string): - """Inputs ``['a', 'b', 'c']``, returns ``{'a': 0, 'b': 1, 'c': 2}``.""" - dictionary = {} - for idx, c in enumerate(string): - dictionary.update({c: idx}) - return dictionary - - -def exit_tensorflow(sess=None, port=6006): - """Close TensorFlow session, TensorBoard and Nvidia-process if available. - - Parameters - ---------- - sess : Session - TensorFlow Session. - tb_port : int - TensorBoard port you want to close, `6006` as default. - - """ - text = "[TL] Close tensorboard and nvidia-process if available" - text2 = "[TL] Close tensorboard and nvidia-process not yet supported by this function (tl.ops.exit_tf) on " - - if sess is not None: - sess.close() - - if _platform == "linux" or _platform == "linux2": - tl.logging.info('linux: %s' % text) - os.system('nvidia-smi') - os.system('fuser ' + port + '/tcp -k') # kill tensorboard 6006 - os.system("nvidia-smi | grep python |awk '{print $3}'|xargs kill") # kill all nvidia-smi python process - _exit() - - elif _platform == "darwin": - tl.logging.info('OS X: %s' % text) - subprocess.Popen( - "lsof -i tcp:" + str(port) + " | grep -v PID | awk '{print $2}' | xargs kill", shell=True - ) # kill tensorboard - elif _platform == "win32": - raise NotImplementedError("this function is not supported on the Windows platform") - - else: - tl.logging.info(text2 + _platform) - - -def open_tensorboard(log_dir='/tmp/tensorflow', port=6006): - """Open Tensorboard. - - Parameters - ---------- - log_dir : str - Directory where your tensorboard logs are saved - port : int - TensorBoard port you want to open, 6006 is tensorboard default - - """ - text = "[TL] Open tensorboard, go to localhost:" + str(port) + " to access" - text2 = " not yet supported by this function (tl.ops.open_tb)" - - if not tl.files.exists_or_mkdir(log_dir, verbose=False): - tl.logging.info("[TL] Log reportory was created at %s" % log_dir) - - if _platform == "linux" or _platform == "linux2": - raise NotImplementedError() - elif _platform == "darwin": - tl.logging.info('OS X: %s' % text) - subprocess.Popen( - sys.prefix + " | python -m tensorflow.tensorboard --logdir=" + log_dir + " --port=" + str(port), shell=True - ) # open tensorboard in localhost:6006/ or whatever port you chose - elif _platform == "win32": - raise NotImplementedError("this function is not supported on the Windows platform") - else: - tl.logging.info(_platform + text2) - - -def clear_all_placeholder_variables(printable=True): - """Clears all the placeholder variables of keep prob, - including keeping probabilities of all dropout, denoising, dropconnect etc. - - Parameters - ---------- - printable : boolean - If True, print all deleted variables. - - """ - tl.logging.info('clear all .....................................') - gl = globals().copy() - for var in gl: - if var[0] == '_': continue - if 'func' in str(globals()[var]): continue - if 'module' in str(globals()[var]): continue - if 'class' in str(globals()[var]): continue - - if printable: - tl.logging.info(" clear_all ------- %s" % str(globals()[var])) - - del globals()[var] - - -def set_gpu_fraction(gpu_fraction=0.3): - """Set the GPU memory fraction for the application. - - Parameters - ---------- - gpu_fraction : float - Fraction of GPU memory, (0 ~ 1] - - References - ---------- - - `TensorFlow using GPU `__ - - """ - tl.logging.info("[TL]: GPU MEM Fraction %f" % gpu_fraction) - gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction) - sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) - return sess diff --git a/tensorlayer/visualize.py b/tensorlayer/visualize.py deleted file mode 100644 index 9fe27c7..0000000 --- a/tensorlayer/visualize.py +++ /dev/null @@ -1,664 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import os - -import imageio - -import numpy as np - -import tensorlayer as tl -from tensorlayer.lazy_imports import LazyImport -cv2 = LazyImport("cv2") - -# Uncomment the following line if you got: _tkinter.TclError: no display name and no $DISPLAY environment variable -# import matplotlib -# matplotlib.use('Agg') - -__all__ = [ - 'read_image', - 'read_images', - 'save_image', - 'save_images', - 'draw_boxes_and_labels_to_image', - 'draw_mpii_people_to_image', - 'frame', - 'CNN2d', - 'images2d', - 'tsne_embedding', - 'draw_weights', - 'W', -] - - -def read_image(image, path=''): - """Read one image. - - Parameters - ----------- - image : str - The image file name. - path : str - The image folder path. - - Returns - ------- - numpy.array - The image. - - """ - return imageio.imread(os.path.join(path, image)) - - -def read_images(img_list, path='', n_threads=10, printable=True): - """Returns all images in list by given path and name of each image file. - - Parameters - ------------- - img_list : list of str - The image file names. - path : str - The image folder path. - n_threads : int - The number of threads to read image. - printable : boolean - Whether to print information when reading images. - - Returns - ------- - list of numpy.array - The images. - - """ - imgs = [] - for idx in range(0, len(img_list), n_threads): - b_imgs_list = img_list[idx:idx + n_threads] - b_imgs = tl.prepro.threading_data(b_imgs_list, fn=read_image, path=path) - # tl.logging.info(b_imgs.shape) - imgs.extend(b_imgs) - if printable: - tl.logging.info('read %d from %s' % (len(imgs), path)) - return imgs - - -def save_image(image, image_path='_temp.png'): - """Save a image. - - Parameters - ----------- - image : numpy array - [w, h, c] - image_path : str - path - - """ - try: # RGB - imageio.imwrite(image_path, image) - except Exception: # Greyscale - imageio.imwrite(image_path, image[:, :, 0]) - - -def save_images(images, size, image_path='_temp.png'): - """Save multiple images into one single image. - - Parameters - ----------- - images : numpy array - (batch, w, h, c) - size : list of 2 ints - row and column number. - number of images should be equal or less than size[0] * size[1] - image_path : str - save path - - Examples - --------- - >>> import numpy as np - >>> import tensorlayer as tl - >>> images = np.random.rand(64, 100, 100, 3) - >>> tl.visualize.save_images(images, [8, 8], 'temp.png') - - """ - if len(images.shape) == 3: # Greyscale [batch, h, w] --> [batch, h, w, 1] - images = images[:, :, :, np.newaxis] - - def merge(images, size): - h, w = images.shape[1], images.shape[2] - img = np.zeros((h * size[0], w * size[1], 3), dtype=images.dtype) - for idx, image in enumerate(images): - i = idx % size[1] - j = idx // size[1] - img[j * h:j * h + h, i * w:i * w + w, :] = image - return img - - def imsave(images, size, path): - if np.max(images) <= 1 and (-1 <= np.min(images) < 0): - images = ((images + 1) * 127.5).astype(np.uint8) - elif np.max(images) <= 1 and np.min(images) >= 0: - images = (images * 255).astype(np.uint8) - - return imageio.imwrite(path, merge(images, size)) - - if len(images) > size[0] * size[1]: - raise AssertionError("number of images should be equal or less than size[0] * size[1] {}".format(len(images))) - - return imsave(images, size, image_path) - - -def draw_boxes_and_labels_to_image( - image, classes, coords, scores, classes_list, is_center=True, is_rescale=True, save_name=None -): - """Draw bboxes and class labels on image. Return or save the image with bboxes, example in the docs of ``tl.prepro``. - - Parameters - ----------- - image : numpy.array - The RGB image [height, width, channel]. - classes : list of int - A list of class ID (int). - coords : list of int - A list of list for coordinates. - - Should be [x, y, x2, y2] (up-left and botton-right format) - - If [x_center, y_center, w, h] (set is_center to True). - scores : list of float - A list of score (float). (Optional) - classes_list : list of str - for converting ID to string on image. - is_center : boolean - Whether the coordinates is [x_center, y_center, w, h] - - If coordinates are [x_center, y_center, w, h], set it to True for converting it to [x, y, x2, y2] (up-left and botton-right) internally. - - If coordinates are [x1, x2, y1, y2], set it to False. - is_rescale : boolean - Whether to rescale the coordinates from pixel-unit format to ratio format. - - If True, the input coordinates are the portion of width and high, this API will scale the coordinates to pixel unit internally. - - If False, feed the coordinates with pixel unit format. - save_name : None or str - The name of image file (i.e. image.png), if None, not to save image. - - Returns - ------- - numpy.array - The saved image. - - References - ----------- - - OpenCV rectangle and putText. - - `scikit-image `__. - - """ - if len(coords) != len(classes): - raise AssertionError("number of coordinates and classes are equal") - - if len(scores) > 0 and len(scores) != len(classes): - raise AssertionError("number of scores and classes are equal") - - # don't change the original image, and avoid error https://stackoverflow.com/questions/30249053/python-opencv-drawing-errors-after-manipulating-array-with-numpy - image = image.copy() - - imh, imw = image.shape[0:2] - thick = int((imh + imw) // 430) - - for i, _v in enumerate(coords): - if is_center: - x, y, x2, y2 = tl.prepro.obj_box_coord_centroid_to_upleft_butright(coords[i]) - else: - x, y, x2, y2 = coords[i] - - if is_rescale: # scale back to pixel unit if the coords are the portion of width and high - x, y, x2, y2 = tl.prepro.obj_box_coord_scale_to_pixelunit([x, y, x2, y2], (imh, imw)) - - cv2.rectangle( - image, - (int(x), int(y)), - (int(x2), int(y2)), # up-left and botton-right - [0, 255, 0], - thick - ) - - cv2.putText( - image, - classes_list[classes[i]] + ((" %.2f" % (scores[i])) if (len(scores) != 0) else " "), - (int(x), int(y)), # button left - 0, - 1.5e-3 * imh, # bigger = larger font - [0, 0, 256], # self.meta['colors'][max_indx], - int(thick / 2) + 1 - ) # bold - - if save_name is not None: - # cv2.imwrite('_my.png', image) - save_image(image, save_name) - # if len(coords) == 0: - # tl.logging.info("draw_boxes_and_labels_to_image: no bboxes exist, cannot draw !") - return image - - -def draw_mpii_pose_to_image(image, poses, save_name='image.png'): - """Draw people(s) into image using MPII dataset format as input, return or save the result image. - - This is an experimental API, can be changed in the future. - - Parameters - ----------- - image : numpy.array - The RGB image [height, width, channel]. - poses : list of dict - The people(s) annotation in MPII format, see ``tl.files.load_mpii_pose_dataset``. - save_name : None or str - The name of image file (i.e. image.png), if None, not to save image. - - Returns - -------- - numpy.array - The saved image. - - Examples - -------- - >>> import pprint - >>> import tensorlayer as tl - >>> img_train_list, ann_train_list, img_test_list, ann_test_list = tl.files.load_mpii_pose_dataset() - >>> image = tl.vis.read_image(img_train_list[0]) - >>> tl.vis.draw_mpii_pose_to_image(image, ann_train_list[0], 'image.png') - >>> pprint.pprint(ann_train_list[0]) - - References - ----------- - - `MPII Keyponts and ID `__ - """ - # import skimage - # don't change the original image, and avoid error https://stackoverflow.com/questions/30249053/python-opencv-drawing-errors-after-manipulating-array-with-numpy - image = image.copy() - - imh, imw = image.shape[0:2] - thick = int((imh + imw) // 430) - # radius = int(image.shape[1] / 500) + 1 - radius = int(thick * 1.5) - - if image.max() < 1: - image = image * 255 - - for people in poses: - # Pose Keyponts - joint_pos = people['joint_pos'] - # draw sketch - # joint id (0 - r ankle, 1 - r knee, 2 - r hip, 3 - l hip, 4 - l knee, - # 5 - l ankle, 6 - pelvis, 7 - thorax, 8 - upper neck, - # 9 - head top, 10 - r wrist, 11 - r elbow, 12 - r shoulder, - # 13 - l shoulder, 14 - l elbow, 15 - l wrist) - # - # 9 - # 8 - # 12 ** 7 ** 13 - # * * * - # 11 * 14 - # * * * - # 10 2 * 6 * 3 15 - # * * - # 1 4 - # * * - # 0 5 - - lines = [ - [(0, 1), [100, 255, 100]], - [(1, 2), [50, 255, 50]], - [(2, 6), [0, 255, 0]], # right leg - [(3, 4), [100, 100, 255]], - [(4, 5), [50, 50, 255]], - [(6, 3), [0, 0, 255]], # left leg - [(6, 7), [255, 255, 100]], - [(7, 8), [255, 150, 50]], # body - [(8, 9), [255, 200, 100]], # head - [(10, 11), [255, 100, 255]], - [(11, 12), [255, 50, 255]], - [(12, 8), [255, 0, 255]], # right hand - [(8, 13), [0, 255, 255]], - [(13, 14), [100, 255, 255]], - [(14, 15), [200, 255, 255]] # left hand - ] - for line in lines: - start, end = line[0] - if (start in joint_pos) and (end in joint_pos): - cv2.line( - image, - (int(joint_pos[start][0]), int(joint_pos[start][1])), - (int(joint_pos[end][0]), int(joint_pos[end][1])), # up-left and botton-right - line[1], - thick - ) - # rr, cc, val = skimage.draw.line_aa(int(joint_pos[start][1]), int(joint_pos[start][0]), int(joint_pos[end][1]), int(joint_pos[end][0])) - # image[rr, cc] = line[1] - # draw circles - for pos in joint_pos.items(): - _, pos_loc = pos # pos_id, pos_loc - pos_loc = (int(pos_loc[0]), int(pos_loc[1])) - cv2.circle(image, center=pos_loc, radius=radius, color=(200, 200, 200), thickness=-1) - # rr, cc = skimage.draw.circle(int(pos_loc[1]), int(pos_loc[0]), radius) - # image[rr, cc] = [0, 255, 0] - - # Head - head_rect = people['head_rect'] - if head_rect: # if head exists - cv2.rectangle( - image, - (int(head_rect[0]), int(head_rect[1])), - (int(head_rect[2]), int(head_rect[3])), # up-left and botton-right - [0, 180, 0], - thick - ) - - if save_name is not None: - # cv2.imwrite(save_name, image) - save_image(image, save_name) - return image - - -draw_mpii_people_to_image = draw_mpii_pose_to_image - - -def frame(I=None, second=5, saveable=True, name='frame', cmap=None, fig_idx=12836): - """Display a frame. Make sure OpenAI Gym render() is disable before using it. - - Parameters - ---------- - I : numpy.array - The image. - second : int - The display second(s) for the image(s), if saveable is False. - saveable : boolean - Save or plot the figure. - name : str - A name to save the image, if saveable is True. - cmap : None or str - 'gray' for greyscale, None for default, etc. - fig_idx : int - matplotlib figure index. - - Examples - -------- - >>> env = gym.make("Pong-v0") - >>> observation = env.reset() - >>> tl.visualize.frame(observation) - - """ - import matplotlib.pyplot as plt - if saveable is False: - plt.ion() - plt.figure(fig_idx) # show all feature images - - if len(I.shape) and I.shape[-1] == 1: # (10,10,1) --> (10,10) - I = I[:, :, 0] - - plt.imshow(I, cmap) - plt.title(name) - # plt.gca().xaxis.set_major_locator(plt.NullLocator()) # distable tick - # plt.gca().yaxis.set_major_locator(plt.NullLocator()) - - if saveable: - plt.savefig(name + '.pdf', format='pdf') - else: - plt.draw() - plt.pause(second) - - -def CNN2d(CNN=None, second=10, saveable=True, name='cnn', fig_idx=3119362): - """Display a group of RGB or Greyscale CNN masks. - - Parameters - ---------- - CNN : numpy.array - The image. e.g: 64 5x5 RGB images can be (5, 5, 3, 64). - second : int - The display second(s) for the image(s), if saveable is False. - saveable : boolean - Save or plot the figure. - name : str - A name to save the image, if saveable is True. - fig_idx : int - The matplotlib figure index. - - Examples - -------- - >>> tl.visualize.CNN2d(network.all_params[0].eval(), second=10, saveable=True, name='cnn1_mnist', fig_idx=2012) - - """ - import matplotlib.pyplot as plt - # tl.logging.info(CNN.shape) # (5, 5, 3, 64) - # exit() - n_mask = CNN.shape[3] - n_row = CNN.shape[0] - n_col = CNN.shape[1] - n_color = CNN.shape[2] - row = int(np.sqrt(n_mask)) - col = int(np.ceil(n_mask / row)) - plt.ion() # active mode - fig = plt.figure(fig_idx) - count = 1 - for _ir in range(1, row + 1): - for _ic in range(1, col + 1): - if count > n_mask: - break - fig.add_subplot(col, row, count) - # tl.logging.info(CNN[:,:,:,count-1].shape, n_row, n_col) # (5, 1, 32) 5 5 - # exit() - # plt.imshow( - # np.reshape(CNN[count-1,:,:,:], (n_row, n_col)), - # cmap='gray', interpolation="nearest") # theano - if n_color == 1: - plt.imshow(np.reshape(CNN[:, :, :, count - 1], (n_row, n_col)), cmap='gray', interpolation="nearest") - elif n_color == 3: - plt.imshow( - np.reshape(CNN[:, :, :, count - 1], (n_row, n_col, n_color)), cmap='gray', interpolation="nearest" - ) - else: - raise Exception("Unknown n_color") - plt.gca().xaxis.set_major_locator(plt.NullLocator()) # distable tick - plt.gca().yaxis.set_major_locator(plt.NullLocator()) - count = count + 1 - if saveable: - plt.savefig(name + '.pdf', format='pdf') - else: - plt.draw() - plt.pause(second) - - -def images2d(images=None, second=10, saveable=True, name='images', dtype=None, fig_idx=3119362): - """Display a group of RGB or Greyscale images. - - Parameters - ---------- - images : numpy.array - The images. - second : int - The display second(s) for the image(s), if saveable is False. - saveable : boolean - Save or plot the figure. - name : str - A name to save the image, if saveable is True. - dtype : None or numpy data type - The data type for displaying the images. - fig_idx : int - matplotlib figure index. - - Examples - -------- - >>> X_train, y_train, X_test, y_test = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3), plotable=False) - >>> tl.visualize.images2d(X_train[0:100,:,:,:], second=10, saveable=False, name='cifar10', dtype=np.uint8, fig_idx=20212) - - """ - import matplotlib.pyplot as plt - # tl.logging.info(images.shape) # (50000, 32, 32, 3) - # exit() - if dtype: - images = np.asarray(images, dtype=dtype) - n_mask = images.shape[0] - n_row = images.shape[1] - n_col = images.shape[2] - n_color = images.shape[3] - row = int(np.sqrt(n_mask)) - col = int(np.ceil(n_mask / row)) - plt.ion() # active mode - fig = plt.figure(fig_idx) - count = 1 - for _ir in range(1, row + 1): - for _ic in range(1, col + 1): - if count > n_mask: - break - fig.add_subplot(col, row, count) - # tl.logging.info(images[:,:,:,count-1].shape, n_row, n_col) # (5, 1, 32) 5 5 - # plt.imshow( - # np.reshape(images[count-1,:,:,:], (n_row, n_col)), - # cmap='gray', interpolation="nearest") # theano - if n_color == 1: - plt.imshow(np.reshape(images[count - 1, :, :], (n_row, n_col)), cmap='gray', interpolation="nearest") - # plt.title(name) - elif n_color == 3: - plt.imshow(images[count - 1, :, :], cmap='gray', interpolation="nearest") - # plt.title(name) - else: - raise Exception("Unknown n_color") - plt.gca().xaxis.set_major_locator(plt.NullLocator()) # distable tick - plt.gca().yaxis.set_major_locator(plt.NullLocator()) - count = count + 1 - if saveable: - plt.savefig(name + '.pdf', format='pdf') - else: - plt.draw() - plt.pause(second) - - -def tsne_embedding(embeddings, reverse_dictionary, plot_only=500, second=5, saveable=False, name='tsne', fig_idx=9862): - """Visualize the embeddings by using t-SNE. - - Parameters - ---------- - embeddings : numpy.array - The embedding matrix. - reverse_dictionary : dictionary - id_to_word, mapping id to unique word. - plot_only : int - The number of examples to plot, choice the most common words. - second : int - The display second(s) for the image(s), if saveable is False. - saveable : boolean - Save or plot the figure. - name : str - A name to save the image, if saveable is True. - fig_idx : int - matplotlib figure index. - - Examples - -------- - >>> see 'tutorial_word2vec_basic.py' - >>> final_embeddings = normalized_embeddings.eval() - >>> tl.visualize.tsne_embedding(final_embeddings, labels, reverse_dictionary, - ... plot_only=500, second=5, saveable=False, name='tsne') - - """ - import matplotlib.pyplot as plt - - def plot_with_labels(low_dim_embs, labels, figsize=(18, 18), second=5, saveable=True, name='tsne', fig_idx=9862): - - if low_dim_embs.shape[0] < len(labels): - raise AssertionError("More labels than embeddings") - - if saveable is False: - plt.ion() - plt.figure(fig_idx) - - plt.figure(figsize=figsize) # in inches - - for i, label in enumerate(labels): - x, y = low_dim_embs[i, :] - plt.scatter(x, y) - plt.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom') - - if saveable: - plt.savefig(name + '.pdf', format='pdf') - else: - plt.draw() - plt.pause(second) - - try: - from sklearn.manifold import TSNE - from six.moves import xrange - - tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000) - # plot_only = 500 - low_dim_embs = tsne.fit_transform(embeddings[:plot_only, :]) - labels = [reverse_dictionary[i] for i in xrange(plot_only)] - plot_with_labels(low_dim_embs, labels, second=second, saveable=saveable, name=name, fig_idx=fig_idx) - - except ImportError: - _err = "Please install sklearn and matplotlib to visualize embeddings." - tl.logging.error(_err) - raise ImportError(_err) - - -def draw_weights(W=None, second=10, saveable=True, shape=None, name='mnist', fig_idx=2396512): - """Visualize every columns of the weight matrix to a group of Greyscale img. - - Parameters - ---------- - W : numpy.array - The weight matrix - second : int - The display second(s) for the image(s), if saveable is False. - saveable : boolean - Save or plot the figure. - shape : a list with 2 int or None - The shape of feature image, MNIST is [28, 80]. - name : a string - A name to save the image, if saveable is True. - fig_idx : int - matplotlib figure index. - - Examples - -------- - >>> tl.visualize.draw_weights(network.all_params[0].eval(), second=10, saveable=True, name='weight_of_1st_layer', fig_idx=2012) - - """ - if shape is None: - shape = [28, 28] - - import matplotlib.pyplot as plt - if saveable is False: - plt.ion() - fig = plt.figure(fig_idx) # show all feature images - n_units = W.shape[1] - - num_r = int(np.sqrt(n_units)) # 每行显示的个数 若25个hidden unit -> 每行显示5个 - num_c = int(np.ceil(n_units / num_r)) - count = int(1) - for _row in range(1, num_r + 1): - for _col in range(1, num_c + 1): - if count > n_units: - break - fig.add_subplot(num_r, num_c, count) - # ------------------------------------------------------------ - # plt.imshow(np.reshape(W[:,count-1],(28,28)), cmap='gray') - # ------------------------------------------------------------ - feature = W[:, count - 1] / np.sqrt((W[:, count - 1]**2).sum()) - # feature[feature<0.0001] = 0 # value threshold - # if count == 1 or count == 2: - # print(np.mean(feature)) - # if np.std(feature) < 0.03: # condition threshold - # feature = np.zeros_like(feature) - # if np.mean(feature) < -0.015: # condition threshold - # feature = np.zeros_like(feature) - plt.imshow( - np.reshape(feature, (shape[0], shape[1])), cmap='gray', interpolation="nearest" - ) # , vmin=np.min(feature), vmax=np.max(feature)) - # plt.title(name) - # ------------------------------------------------------------ - # plt.imshow(np.reshape(W[:,count-1] ,(np.sqrt(size),np.sqrt(size))), cmap='gray', interpolation="nearest") - plt.gca().xaxis.set_major_locator(plt.NullLocator()) # distable tick - plt.gca().yaxis.set_major_locator(plt.NullLocator()) - count = count + 1 - if saveable: - plt.savefig(name + '.pdf', format='pdf') - else: - plt.draw() - plt.pause(second) - - -W = draw_weights diff --git a/main_eager_mode.py b/train.py similarity index 58% rename from main_eager_mode.py rename to train.py index 5119b3e..3c8fc60 100755 --- a/main_eager_mode.py +++ b/train.py @@ -1,35 +1,31 @@ -"""Eager mode, single GPU -""" - import os, time, multiprocessing import numpy as np import tensorflow as tf -tf.enable_eager_execution() # for TF 1.13 import tensorlayer as tl from glob import glob -from utils import get_celebA, flags # get_image +from data import get_celebA, flags from model import get_generator, get_discriminator -FLAGS = flags.FLAGS -num_tiles = int(np.sqrt(FLAGS.sample_size)) +num_tiles = int(np.sqrt(flags.sample_size)) def train(): - images, images_path = get_celebA(FLAGS.output_size, FLAGS.n_epoch, FLAGS.batch_size) - G = get_generator([None, FLAGS.z_dim]) - D = get_discriminator([None, FLAGS.output_size, FLAGS.output_size, FLAGS.c_dim]) + images, images_path = get_celebA(flags.output_size, flags.n_epoch, flags.batch_size) + G = get_generator([None, flags.z_dim]) + D = get_discriminator([None, flags.output_size, flags.output_size, flags.c_dim]) G.train() D.train() - d_optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate, beta1=FLAGS.beta1) - g_optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate, beta1=FLAGS.beta1) + d_optimizer = tf.optimizers.Adam(flags.learning_rate, beta_1=flags.beta1) + g_optimizer = tf.optimizers.Adam(flags.learning_rate, beta_1=flags.beta1) - n_step_epoch = int(len(images_path) // FLAGS.batch_size) + n_step_epoch = int(len(images_path) // flags.batch_size) for step, batch_images in enumerate(images): step_time = time.time() with tf.GradientTape(persistent=True) as tape: - z = tf.contrib.distributions.Normal(0., 1.).sample([FLAGS.batch_size, FLAGS.z_dim]) #tf.placeholder(tf.float32, [None, z_dim], name='z_noise') + # z = tf.distributions.Normal(0., 1.).sample([flags.batch_size, flags.z_dim]) #tf.placeholder(tf.float32, [None, z_dim], name='z_noise') + z = np.random.normal(loc=0.0, scale=1.0, size=[flags.batch_size, flags.z_dim]).astype(np.float32) d_logits = D(G(z)) d2_logits = D(batch_images) # discriminator: real images are labelled as 1 @@ -47,12 +43,14 @@ def train(): d_optimizer.apply_gradients(zip(grad, D.weights)) del tape - print("Epoch: [{}/{}] [{}/{}] took: {:3f}, d_loss: {:5f}, g_loss: {:5f}".format(step//n_step_epoch, FLAGS.n_epoch, step, n_step_epoch, time.time()-step_time, d_loss, g_loss)) - if np.mod(step, FLAGS.save_step) == 0: - G.save_weights('{}/G.npz'.format(FLAGS.checkpoint_dir), format='npz') - D.save_weights('{}/D.npz'.format(FLAGS.checkpoint_dir), format='npz') + print("Epoch: [{}/{}] [{}/{}] took: {:3f}, d_loss: {:5f}, g_loss: {:5f}".format(step//n_step_epoch, flags.n_epoch, step, n_step_epoch, time.time()-step_time, d_loss, g_loss)) + if np.mod(step, flags.save_step) == 0: + G.save_weights('{}/G.npz'.format(flags.checkpoint_dir), format='npz') + D.save_weights('{}/D.npz'.format(flags.checkpoint_dir), format='npz') + G.eval() result = G(z) - tl.visualize.save_images(result.numpy(), [num_tiles, num_tiles], '{}/train_{:02d}_{:04d}.png'.format(FLAGS.sample_dir, step//n_step_epoch, step)) + G.train() + tl.visualize.save_images(result.numpy(), [num_tiles, num_tiles], '{}/train_{:02d}_{:04d}.png'.format(flags.sample_dir, step//n_step_epoch, step)) if __name__ == '__main__': train() diff --git a/utils.py b/utils.py deleted file mode 100755 index 9b8d6ca..0000000 --- a/utils.py +++ /dev/null @@ -1,113 +0,0 @@ -import numpy as np -import tensorflow as tf -import tensorlayer as tl -## enable debug logging -tl.logging.set_verbosity(tl.logging.DEBUG) -tl.logging.set_verbosity(tl.logging.DEBUG) - -# Define TF Flags -flags = tf.app.flags -flags.DEFINE_integer("n_epoch", 25, "Epoch to train [25]") -flags.DEFINE_integer("z_dim", 100, "Num of noise value]") -flags.DEFINE_float("learning_rate", 0.0002, "Learning rate of for adam [0.0002]") -flags.DEFINE_float("beta1", 0.5, "Momentum term of adam [0.5]") -flags.DEFINE_float("train_size", np.inf, "The size of train images [np.inf]") -flags.DEFINE_integer("batch_size", 64, "The number of batch images [64]") -flags.DEFINE_integer("image_size", 108, "The size of image to use (will be center cropped) [108]") -flags.DEFINE_integer("output_size", 64, "The size of the output images to produce [64]") -flags.DEFINE_integer("sample_size", 64, "The number of sample images [64]") -# flags.DEFINE_integer("c_dim", 3, "Number of image channels. [3]") -flags.DEFINE_integer("sample_step", 500, "The interval of generating sample. [500]") -flags.DEFINE_integer("save_step", 500, "The interval of saveing checkpoints. [500]") -flags.DEFINE_string("dataset", "celebA", "The name of dataset [celebA, mnist, lsun]") -flags.DEFINE_string("checkpoint_dir", "checkpoint", "Directory name to save the checkpoints [checkpoint]") -flags.DEFINE_string("sample_dir", "samples", "Directory name to save the image samples [samples]") -flags.DEFINE_boolean("is_train", False, "True for training, False for testing [False]") -flags.DEFINE_boolean("is_crop", True, "True for training, False for testing [False]") -assert np.sqrt(flags.FLAGS.sample_size) % 1 == 0., 'Flag `sample_size` needs to be a perfect square' -tl.files.exists_or_mkdir(flags.FLAGS.checkpoint_dir) # save model -tl.files.exists_or_mkdir(flags.FLAGS.sample_dir) # save generated image - -# model_dir = "%s_%s_%s" % (FLAGS.dataset, FLAGS.batch_size, FLAGS.output_size) -# save_dir = os.path.join(FLAGS.checkpoint_dir, model_dir) -# tl.files.exists_or_mkdir(save_dir) - -def get_celebA(output_size, n_epoch, batch_size): - # dataset API and augmentation - images_path = tl.files.load_file_list(path='data', regx='.*.jpg', keep_prefix=True, printable=False) - def generator_train(): - for image_path in images_path: - yield image_path.encode('utf-8') - def _map_fn(image_path): - image = tf.read_file(image_path) - image = tf.image.decode_jpeg(image, channels=3) # get RGB with 0~1 - image = tf.image.convert_image_dtype(image, dtype=tf.float32) - # image = tf.image.crop_central(image, [FLAGS.output_size, FLAGS.output_size, FLAGS.c_dim]) - # image = tf.image.resize_images(image, FLAGS.output_size]) - image = image[45:173, 25:153, :] - image = tf.image.resize_bicubic([image], (output_size, output_size))[0] - # image = tf.image.crop_and_resize(image, boxes=[[]], crop_size=[64, 64]) - # image = tf.image.resize_image_with_crop_or_pad(image, FLAGS.output_size, FLAGS.output_size) # central crop - image = tf.image.random_flip_left_right(image) - image = image * 2 - 1 - return image - train_ds = tf.data.Dataset.from_generator(generator_train, output_types=tf.string) - ds = train_ds.shuffle(buffer_size=4096) - # ds = ds.shard(num_shards=hvd.size(), index=hvd.rank()) - ds = ds.repeat(n_epoch) - ds = ds.map(_map_fn, num_parallel_calls=4) - ds = ds.batch(batch_size) - ds = ds.prefetch(buffer_size=2) - return ds, images_path - # for batch_images in train_ds: - # print(batch_images.shape) - # value = ds.make_one_shot_iterator().get_next() - - -## old code -# import scipy.misc -# import imageio as io -# import numpy as np -# -# def center_crop(x, crop_h, crop_w=None, resize_w=64): -# if crop_w is None: -# crop_w = crop_h -# h, w = x.shape[:2] -# j = int(round((h - crop_h)/2.)) -# i = int(round((w - crop_w)/2.)) -# return scipy.misc.imresize(x[j:j+crop_h, i:i+crop_w], -# [resize_w, resize_w]) -# -# def merge(images, size): -# h, w = images.shape[1], images.shape[2] -# img = np.zeros((h * size[0], w * size[1], 3)) -# for idx, image in enumerate(images): -# i = idx % size[1] -# j = idx // size[1] -# img[j * h: j * h + h, i * w: i * w + w, :] = image -# return img -# -# def transform(image, npx=64, is_crop=True, resize_w=64): -# if is_crop: -# cropped_image = center_crop(image, npx, resize_w=resize_w) -# else: -# cropped_image = image -# return (np.array(cropped_image) / 127.5) - 1. -# -# def inverse_transform(images): -# return (images + 1.) / 2. -# -# def imread(path, is_grayscale = False): -# if (is_grayscale): -# return io.imread(path).astype(np.float).flatten() -# else: -# return io.imread(path).astype(np.float) -# -# def imsave(images, size, path): -# return io.imsave(path, merge(images, size)) -# -# def get_image(image_path, image_size, is_crop=True, resize_w=64, is_grayscale = False): -# return transform(imread(image_path, is_grayscale), image_size, is_crop, resize_w) -# -# def save_images(images, size, image_path): -# return imsave(inverse_transform(images), size, image_path)