diff --git a/.gitignore b/.gitignore index 6da6370..e330b9b 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,7 @@ dist docs/_build tensorlayer.egg-info tensorlayer/__pacache__ + +.vscode/* +data/* +samples/* \ No newline at end of file diff --git a/README.md b/README.md index fc4008b..1729248 100755 --- a/README.md +++ b/README.md @@ -13,8 +13,8 @@ Looking for Text to Image Synthesis ? [click here](https://github.com/zsdonghao/ ## Prerequisites - Python 2.7 or Python 3.3+ -- [TensorFlow==1.0+](https://www.tensorflow.org/) -- [TensorLayer==1.4+](https://github.com/zsdonghao/tensorlayer) +- [TensorFlow==1.10.0+](https://www.tensorflow.org/) +- [TensorLayer==1.10.1+](https://github.com/tensorlayer/tensorlayer) ## Usage diff --git a/main.py b/main.py index ecc132c..b7e6985 100755 --- a/main.py +++ b/main.py @@ -1,25 +1,37 @@ -import os, pprint, time +""" TensorLayer implementation of Deep Convolutional Generative Adversarial Network (DCGAN). +Using deep convolutional generative adversarial networks (DCGAN) +to generate face images from a noise distribution. +References: + -Generative Adversarial Nets. + Goodfellow et al. arXiv: 1406.2661. + - Unsupervised Representation Learning with Deep Convolutional + Generative Adversarial Networks. A Radford, L Metz, S Chintala. + arXiv: 1511.06434. +Links: + - [GAN Paper](https://arxiv.org/pdf/1406.2661.pdf) + - [DCGAN Paper](https://arxiv.org/abs/1511.06434) +Usage: + - See README.md +""" +import os +import time + import numpy as np import tensorflow as tf import tensorlayer as tl -from tensorlayer.layers import * + from glob import glob from random import shuffle -from model import * -from utils import * - -pp = pprint.PrettyPrinter() -""" -TensorLayer implementation of DCGAN to generate face image. +from model import generator_simplified_api, discriminator_simplified_api +from utils import get_image -Usage : see README.md -""" +# Defile TF Flags flags = tf.app.flags flags.DEFINE_integer("epoch", 25, "Epoch to train [25]") flags.DEFINE_float("learning_rate", 0.0002, "Learning rate of for adam [0.0002]") flags.DEFINE_float("beta1", 0.5, "Momentum term of adam [0.5]") -flags.DEFINE_integer("train_size", np.inf, "The size of train images [np.inf]") +flags.DEFINE_float("train_size", np.inf, "The size of train images [np.inf]") flags.DEFINE_integer("batch_size", 64, "The number of batch images [64]") flags.DEFINE_integer("image_size", 108, "The size of image to use (will be center cropped) [108]") flags.DEFINE_integer("output_size", 64, "The size of the output images to produce [64]") @@ -36,57 +48,65 @@ FLAGS = flags.FLAGS def main(_): - pp.pprint(flags.FLAGS.__flags) + # Print flags + for flag, _ in FLAGS.__flags.items(): + print('"{}": {}'.format(flag, getattr(FLAGS, flag))) + print("--------------------") + # Configure checkpoint/samples dir tl.files.exists_or_mkdir(FLAGS.checkpoint_dir) tl.files.exists_or_mkdir(FLAGS.sample_dir) - z_dim = 100 + z_dim = 100 # noise dim + + # Construct graph on GPU with tf.device("/gpu:0"): - ##========================= DEFINE MODEL ===========================## + + """ Define Models """ z = tf.placeholder(tf.float32, [FLAGS.batch_size, z_dim], name='z_noise') real_images = tf.placeholder(tf.float32, [FLAGS.batch_size, FLAGS.output_size, FLAGS.output_size, FLAGS.c_dim], name='real_images') - # z --> generator for training + # Input noise into generator for training net_g, g_logits = generator_simplified_api(z, is_train=True, reuse=False) - # generated fake images --> discriminator + + # Input real and generated fake images into discriminator for training net_d, d_logits = discriminator_simplified_api(net_g.outputs, is_train=True, reuse=False) - # real images --> discriminator net_d2, d2_logits = discriminator_simplified_api(real_images, is_train=True, reuse=True) - # sample_z --> generator for evaluation, set is_train to False - # so that BatchNormLayer behave differently + + # Input noise into generator for evaluation + # set is_train to False so that BatchNormLayer behave differently net_g2, g2_logits = generator_simplified_api(z, is_train=False, reuse=True) - ##========================= DEFINE TRAIN OPS =======================## + """ Define Training Operations """ # cost for updating discriminator and generator # discriminator: real images are labelled as 1 d_loss_real = tl.cost.sigmoid_cross_entropy(d2_logits, tf.ones_like(d2_logits), name='dreal') + # discriminator: images from generator (fake) are labelled as 0 d_loss_fake = tl.cost.sigmoid_cross_entropy(d_logits, tf.zeros_like(d_logits), name='dfake') d_loss = d_loss_real + d_loss_fake + # generator: try to make the the fake images look real (1) g_loss = tl.cost.sigmoid_cross_entropy(d_logits, tf.ones_like(d_logits), name='gfake') g_vars = tl.layers.get_variables_with_name('generator', True, True) d_vars = tl.layers.get_variables_with_name('discriminator', True, True) - net_g.print_params(False) - print("---------------") - net_d.print_params(False) - - # optimizers for updating discriminator and generator + # Define optimizers for updating discriminator and generator d_optim = tf.train.AdamOptimizer(FLAGS.learning_rate, beta1=FLAGS.beta1) \ .minimize(d_loss, var_list=d_vars) g_optim = tf.train.AdamOptimizer(FLAGS.learning_rate, beta1=FLAGS.beta1) \ .minimize(g_loss, var_list=g_vars) + # Init Session sess = tf.InteractiveSession() - tl.layers.initialize_global_variables(sess) + sess.run(tf.global_variables_initializer()) model_dir = "%s_%s_%s" % (FLAGS.dataset, FLAGS.batch_size, FLAGS.output_size) save_dir = os.path.join(FLAGS.checkpoint_dir, model_dir) tl.files.exists_or_mkdir(FLAGS.sample_dir) tl.files.exists_or_mkdir(save_dir) + # load the latest checkpoints net_g_name = os.path.join(save_dir, 'net_g.npz') net_d_name = os.path.join(save_dir, 'net_d.npz') @@ -95,32 +115,36 @@ def main(_): sample_seed = np.random.normal(loc=0.0, scale=1.0, size=(FLAGS.sample_size, z_dim)).astype(np.float32)# sample_seed = np.random.uniform(low=-1, high=1, size=(FLAGS.sample_size, z_dim)).astype(np.float32) - ##========================= TRAIN MODELS ================================## + """ Training models """ iter_counter = 0 for epoch in range(FLAGS.epoch): - ## shuffle data + + # Shuffle data shuffle(data_files) - ## update sample files based on shuffled data + # Update sample files based on shuffled data sample_files = data_files[0:FLAGS.sample_size] sample = [get_image(sample_file, FLAGS.image_size, is_crop=FLAGS.is_crop, resize_w=FLAGS.output_size, is_grayscale = 0) for sample_file in sample_files] sample_images = np.array(sample).astype(np.float32) print("[*] Sample images updated!") - ## load image data + # Load image data batch_idxs = min(len(data_files), FLAGS.train_size) // FLAGS.batch_size for idx in range(0, batch_idxs): - batch_files = data_files[idx*FLAGS.batch_size:(idx+1)*FLAGS.batch_size] - ## get real images - # more image augmentation functions in http://tensorlayer.readthedocs.io/en/latest/modules/prepro.html + batch_files = data_files[idx*FLAGS.batch_size:(idx + 1) * FLAGS.batch_size] + + # Get real images (more image augmentation functions at [http://tensorlayer.readthedocs.io/en/latest/modules/prepro.html]) batch = [get_image(batch_file, FLAGS.image_size, is_crop=FLAGS.is_crop, resize_w=FLAGS.output_size, is_grayscale = 0) for batch_file in batch_files] batch_images = np.array(batch).astype(np.float32) - batch_z = np.random.normal(loc=0.0, scale=1.0, size=(FLAGS.sample_size, z_dim)).astype(np.float32) # batch_z = np.random.uniform(low=-1, high=1, size=(FLAGS.batch_size, z_dim)).astype(np.float32) + batch_z = np.random.normal(loc=0.0, scale=1.0, size=(FLAGS.sample_size, z_dim)).astype(np.float32) start_time = time.time() - # updates the discriminator + + # Updates the Discriminator(D) errD, _ = sess.run([d_loss, d_optim], feed_dict={z: batch_z, real_images: batch_images }) - # updates the generator, run generator twice to make sure that d_loss does not go to zero (difference from paper) + + # Updates the Generator(G) + # run generator twice to make sure that d_loss does not go to zero (different from paper) for _ in range(2): errG, _ = sess.run([g_loss, g_optim], feed_dict={z: batch_z}) print("Epoch: [%2d/%2d] [%4d/%4d] time: %4.4f, d_loss: %.8f, g_loss: %.8f" \ @@ -128,17 +152,20 @@ def main(_): iter_counter += 1 if np.mod(iter_counter, FLAGS.sample_step) == 0: - # generate and visualize generated images + # Generate images img, errD, errG = sess.run([net_g2.outputs, d_loss, g_loss], feed_dict={z : sample_seed, real_images: sample_images}) + # Visualize generated images tl.visualize.save_images(img, [8, 8], './{}/train_{:02d}_{:04d}.png'.format(FLAGS.sample_dir, epoch, idx)) print("[Sample] d_loss: %.8f, g_loss: %.8f" % (errD, errG)) if np.mod(iter_counter, FLAGS.save_step) == 0: - # save current network parameters + # Save current network parameters print("[*] Saving checkpoints...") tl.files.save_npz(net_g.all_params, name=net_g_name, sess=sess) tl.files.save_npz(net_d.all_params, name=net_d_name, sess=sess) print("[*] Saving checkpoints SUCCESS!") + + sess.close() if __name__ == '__main__': tf.app.run() diff --git a/model.py b/model.py index b1fd554..5970d80 100755 --- a/model.py +++ b/model.py @@ -1,7 +1,14 @@ - import tensorflow as tf import tensorlayer as tl -from tensorlayer.layers import * +from tensorlayer.layers import ( + InputLayer, + DenseLayer, + DeConv2d, + ReshapeLayer, + BatchNormLayer, + Conv2d, + FlattenLayer +) flags = tf.app.flags FLAGS = flags.FLAGS @@ -11,11 +18,10 @@ def generator_simplified_api(inputs, is_train=True, reuse=False): s2, s4, s8, s16 = int(image_size/2), int(image_size/4), int(image_size/8), int(image_size/16) gf_dim = 64 # Dimension of gen filters in first conv layer. [64] c_dim = FLAGS.c_dim # n_color 3 - batch_size = FLAGS.batch_size # 64 w_init = tf.random_normal_initializer(stddev=0.02) gamma_init = tf.random_normal_initializer(1., 0.02) + with tf.variable_scope("generator", reuse=reuse): - tl.layers.set_name_reuse(reuse) net_in = InputLayer(inputs, name='g/in') net_h0 = DenseLayer(net_in, n_units=gf_dim*8*s16*s16, W_init=w_init, @@ -24,23 +30,23 @@ def generator_simplified_api(inputs, is_train=True, reuse=False): net_h0 = BatchNormLayer(net_h0, act=tf.nn.relu, is_train=is_train, gamma_init=gamma_init, name='g/h0/batch_norm') - net_h1 = DeConv2d(net_h0, gf_dim*4, (5, 5), out_size=(s8, s8), strides=(2, 2), - padding='SAME', batch_size=batch_size, act=None, W_init=w_init, name='g/h1/decon2d') + net_h1 = DeConv2d(net_h0, gf_dim*4, (5, 5), strides=(2, 2), + padding='SAME', act=None, W_init=w_init, name='g/h1/decon2d') net_h1 = BatchNormLayer(net_h1, act=tf.nn.relu, is_train=is_train, gamma_init=gamma_init, name='g/h1/batch_norm') - net_h2 = DeConv2d(net_h1, gf_dim*2, (5, 5), out_size=(s4, s4), strides=(2, 2), - padding='SAME', batch_size=batch_size, act=None, W_init=w_init, name='g/h2/decon2d') + net_h2 = DeConv2d(net_h1, gf_dim*2, (5, 5), strides=(2, 2), + padding='SAME', act=None, W_init=w_init, name='g/h2/decon2d') net_h2 = BatchNormLayer(net_h2, act=tf.nn.relu, is_train=is_train, gamma_init=gamma_init, name='g/h2/batch_norm') - net_h3 = DeConv2d(net_h2, gf_dim, (5, 5), out_size=(s2, s2), strides=(2, 2), - padding='SAME', batch_size=batch_size, act=None, W_init=w_init, name='g/h3/decon2d') + net_h3 = DeConv2d(net_h2, gf_dim, (5, 5), strides=(2, 2), + padding='SAME', act=None, W_init=w_init, name='g/h3/decon2d') net_h3 = BatchNormLayer(net_h3, act=tf.nn.relu, is_train=is_train, gamma_init=gamma_init, name='g/h3/batch_norm') - net_h4 = DeConv2d(net_h3, c_dim, (5, 5), out_size=(image_size, image_size), strides=(2, 2), - padding='SAME', batch_size=batch_size, act=None, W_init=w_init, name='g/h4/decon2d') + net_h4 = DeConv2d(net_h3, c_dim, (5, 5), strides=(2, 2), + padding='SAME', act=None, W_init=w_init, name='g/h4/decon2d') logits = net_h4.outputs net_h4.outputs = tf.nn.tanh(net_h4.outputs) return net_h4, logits @@ -48,29 +54,28 @@ def generator_simplified_api(inputs, is_train=True, reuse=False): def discriminator_simplified_api(inputs, is_train=True, reuse=False): df_dim = 64 # Dimension of discrim filters in first conv layer. [64] c_dim = FLAGS.c_dim # n_color 3 - batch_size = FLAGS.batch_size # 64 w_init = tf.random_normal_initializer(stddev=0.02) gamma_init = tf.random_normal_initializer(1., 0.02) + with tf.variable_scope("discriminator", reuse=reuse): - tl.layers.set_name_reuse(reuse) net_in = InputLayer(inputs, name='d/in') - net_h0 = Conv2d(net_in, df_dim, (5, 5), (2, 2), act=lambda x: tl.act.lrelu(x, 0.2), + net_h0 = Conv2d(net_in, df_dim, (5, 5), (2, 2), act=tf.nn.leaky_relu, padding='SAME', W_init=w_init, name='d/h0/conv2d') net_h1 = Conv2d(net_h0, df_dim*2, (5, 5), (2, 2), act=None, padding='SAME', W_init=w_init, name='d/h1/conv2d') - net_h1 = BatchNormLayer(net_h1, act=lambda x: tl.act.lrelu(x, 0.2), + net_h1 = BatchNormLayer(net_h1, act=tf.nn.leaky_relu, is_train=is_train, gamma_init=gamma_init, name='d/h1/batch_norm') net_h2 = Conv2d(net_h1, df_dim*4, (5, 5), (2, 2), act=None, padding='SAME', W_init=w_init, name='d/h2/conv2d') - net_h2 = BatchNormLayer(net_h2, act=lambda x: tl.act.lrelu(x, 0.2), + net_h2 = BatchNormLayer(net_h2, act=tf.nn.leaky_relu, is_train=is_train, gamma_init=gamma_init, name='d/h2/batch_norm') net_h3 = Conv2d(net_h2, df_dim*8, (5, 5), (2, 2), act=None, padding='SAME', W_init=w_init, name='d/h3/conv2d') - net_h3 = BatchNormLayer(net_h3, act=lambda x: tl.act.lrelu(x, 0.2), + net_h3 = BatchNormLayer(net_h3, act=tf.nn.leaky_relu, is_train=is_train, gamma_init=gamma_init, name='d/h3/batch_norm') net_h4 = FlattenLayer(net_h3, name='d/h4/flatten') diff --git a/tensorlayer/__init__.py b/tensorlayer/__init__.py deleted file mode 100644 index 799353f..0000000 --- a/tensorlayer/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -"""Deep learning and Reinforcement learning library for Researchers and Engineers""" -from __future__ import absolute_import - -try: - install_instr = "Please make sure you install a recent enough version of TensorFlow." - import tensorflow -except ImportError: - raise ImportError("__init__.py : Could not import TensorFlow." + install_instr) - -from . import activation -from . import cost -from . import files -from . import iterate -from . import layers -from . import utils -from . import visualize -from . import prepro -from . import nlp -from . import rein -from . import distributed - -# alias -act = activation -vis = visualize - -__version__ = "1.8.1" - -global_flag = {} -global_dict = {} diff --git a/tensorlayer/__pycache__/__init__.cpython-34.pyc b/tensorlayer/__pycache__/__init__.cpython-34.pyc deleted file mode 100644 index a1a6c73..0000000 Binary files a/tensorlayer/__pycache__/__init__.cpython-34.pyc and /dev/null differ diff --git a/tensorlayer/__pycache__/__init__.cpython-35.pyc b/tensorlayer/__pycache__/__init__.cpython-35.pyc deleted file mode 100644 index ddcf5bd..0000000 Binary files a/tensorlayer/__pycache__/__init__.cpython-35.pyc and /dev/null differ diff --git a/tensorlayer/__pycache__/_logging.cpython-34.pyc b/tensorlayer/__pycache__/_logging.cpython-34.pyc deleted file mode 100644 index 30d46c3..0000000 Binary files a/tensorlayer/__pycache__/_logging.cpython-34.pyc and /dev/null differ diff --git a/tensorlayer/__pycache__/_logging.cpython-35.pyc b/tensorlayer/__pycache__/_logging.cpython-35.pyc deleted file mode 100644 index 2529067..0000000 Binary files a/tensorlayer/__pycache__/_logging.cpython-35.pyc and /dev/null differ diff --git a/tensorlayer/__pycache__/activation.cpython-34.pyc b/tensorlayer/__pycache__/activation.cpython-34.pyc deleted file mode 100644 index 253f0e0..0000000 Binary files a/tensorlayer/__pycache__/activation.cpython-34.pyc and /dev/null differ diff --git a/tensorlayer/__pycache__/activation.cpython-35.pyc b/tensorlayer/__pycache__/activation.cpython-35.pyc deleted file mode 100644 index 523166e..0000000 Binary files a/tensorlayer/__pycache__/activation.cpython-35.pyc and /dev/null differ diff --git a/tensorlayer/__pycache__/cost.cpython-34.pyc b/tensorlayer/__pycache__/cost.cpython-34.pyc deleted file mode 100644 index 19d4473..0000000 Binary files a/tensorlayer/__pycache__/cost.cpython-34.pyc and /dev/null differ diff --git a/tensorlayer/__pycache__/cost.cpython-35.pyc b/tensorlayer/__pycache__/cost.cpython-35.pyc deleted file mode 100644 index 8237488..0000000 Binary files a/tensorlayer/__pycache__/cost.cpython-35.pyc and /dev/null differ diff --git a/tensorlayer/__pycache__/distributed.cpython-34.pyc b/tensorlayer/__pycache__/distributed.cpython-34.pyc deleted file mode 100644 index 7caded1..0000000 Binary files a/tensorlayer/__pycache__/distributed.cpython-34.pyc and /dev/null differ diff --git a/tensorlayer/__pycache__/distributed.cpython-35.pyc b/tensorlayer/__pycache__/distributed.cpython-35.pyc deleted file mode 100644 index 9554c12..0000000 Binary files a/tensorlayer/__pycache__/distributed.cpython-35.pyc and /dev/null differ diff --git a/tensorlayer/__pycache__/files.cpython-34.pyc b/tensorlayer/__pycache__/files.cpython-34.pyc deleted file mode 100644 index fa187d7..0000000 Binary files a/tensorlayer/__pycache__/files.cpython-34.pyc and /dev/null differ diff --git a/tensorlayer/__pycache__/files.cpython-35.pyc b/tensorlayer/__pycache__/files.cpython-35.pyc deleted file mode 100644 index f959b9f..0000000 Binary files a/tensorlayer/__pycache__/files.cpython-35.pyc and /dev/null differ diff --git a/tensorlayer/__pycache__/iterate.cpython-34.pyc b/tensorlayer/__pycache__/iterate.cpython-34.pyc deleted file mode 100644 index 30fe0f5..0000000 Binary files a/tensorlayer/__pycache__/iterate.cpython-34.pyc and /dev/null differ diff --git a/tensorlayer/__pycache__/iterate.cpython-35.pyc b/tensorlayer/__pycache__/iterate.cpython-35.pyc deleted file mode 100644 index bb4ff9f..0000000 Binary files a/tensorlayer/__pycache__/iterate.cpython-35.pyc and /dev/null differ diff --git a/tensorlayer/__pycache__/nlp.cpython-34.pyc b/tensorlayer/__pycache__/nlp.cpython-34.pyc deleted file mode 100644 index 27af78f..0000000 Binary files a/tensorlayer/__pycache__/nlp.cpython-34.pyc and /dev/null differ diff --git a/tensorlayer/__pycache__/nlp.cpython-35.pyc b/tensorlayer/__pycache__/nlp.cpython-35.pyc deleted file mode 100644 index 33c08fc..0000000 Binary files a/tensorlayer/__pycache__/nlp.cpython-35.pyc and /dev/null differ diff --git a/tensorlayer/__pycache__/prepro.cpython-34.pyc b/tensorlayer/__pycache__/prepro.cpython-34.pyc deleted file mode 100644 index e957015..0000000 Binary files a/tensorlayer/__pycache__/prepro.cpython-34.pyc and /dev/null differ diff --git a/tensorlayer/__pycache__/prepro.cpython-35.pyc b/tensorlayer/__pycache__/prepro.cpython-35.pyc deleted file mode 100644 index e73f952..0000000 Binary files a/tensorlayer/__pycache__/prepro.cpython-35.pyc and /dev/null differ diff --git a/tensorlayer/__pycache__/rein.cpython-34.pyc b/tensorlayer/__pycache__/rein.cpython-34.pyc deleted file mode 100644 index 96dbac1..0000000 Binary files a/tensorlayer/__pycache__/rein.cpython-34.pyc and /dev/null differ diff --git a/tensorlayer/__pycache__/rein.cpython-35.pyc b/tensorlayer/__pycache__/rein.cpython-35.pyc deleted file mode 100644 index ff2ff56..0000000 Binary files a/tensorlayer/__pycache__/rein.cpython-35.pyc and /dev/null differ diff --git a/tensorlayer/__pycache__/utils.cpython-34.pyc b/tensorlayer/__pycache__/utils.cpython-34.pyc deleted file mode 100644 index d89f31f..0000000 Binary files a/tensorlayer/__pycache__/utils.cpython-34.pyc and /dev/null differ diff --git a/tensorlayer/__pycache__/utils.cpython-35.pyc b/tensorlayer/__pycache__/utils.cpython-35.pyc deleted file mode 100644 index 8f8b9c5..0000000 Binary files a/tensorlayer/__pycache__/utils.cpython-35.pyc and /dev/null differ diff --git a/tensorlayer/__pycache__/visualize.cpython-34.pyc b/tensorlayer/__pycache__/visualize.cpython-34.pyc deleted file mode 100644 index 5df167e..0000000 Binary files a/tensorlayer/__pycache__/visualize.cpython-34.pyc and /dev/null differ diff --git a/tensorlayer/__pycache__/visualize.cpython-35.pyc b/tensorlayer/__pycache__/visualize.cpython-35.pyc deleted file mode 100644 index 6e4aaf9..0000000 Binary files a/tensorlayer/__pycache__/visualize.cpython-35.pyc and /dev/null differ diff --git a/tensorlayer/_logging.py b/tensorlayer/_logging.py deleted file mode 100644 index be075bb..0000000 --- a/tensorlayer/_logging.py +++ /dev/null @@ -1,16 +0,0 @@ -import logging as _logger - -logging = _logger.getLogger('tensorlayer') -logging.setLevel(_logger.INFO) -_hander = _logger.StreamHandler() -formatter = _logger.Formatter('[TL] %(message)s') -_hander.setFormatter(formatter) -logging.addHandler(_hander) - - -def info(fmt, *args): - logging.info(fmt, *args) - - -def warning(fmt, *args): - logging.warning(fmt, *args) diff --git a/tensorlayer/activation.py b/tensorlayer/activation.py deleted file mode 100644 index 9c812d1..0000000 --- a/tensorlayer/activation.py +++ /dev/null @@ -1,154 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import tensorflow as tf -from tensorflow.python.util.deprecation import deprecated - -__all__ = [ - 'identity', - 'ramp', - 'leaky_relu', - 'swish', - 'pixel_wise_softmax', - 'linear', - 'lrelu', -] - - -@deprecated("2018-06-30", "This API will be deprecated soon as tf.identity can do the same thing.") -def identity(x): - """The identity activation function. - Shortcut is ``linear``. - - Parameters - ---------- - x : Tensor - input. - - Returns - ------- - Tensor - A ``Tensor`` in the same type as ``x``. - - """ - return x - - -def ramp(x, v_min=0, v_max=1, name=None): - """The ramp activation function. - - Parameters - ---------- - x : Tensor - input. - v_min : float - cap input to v_min as a lower bound. - v_max : float - cap input to v_max as a upper bound. - name : str - The function name (optional). - - Returns - ------- - Tensor - A ``Tensor`` in the same type as ``x``. - - """ - return tf.clip_by_value(x, clip_value_min=v_min, clip_value_max=v_max, name=name) - - -def leaky_relu(x, alpha=0.1, name="lrelu"): - """The LeakyReLU, Shortcut is ``lrelu``. - - Modified version of ReLU, introducing a nonzero gradient for negative input. - - Parameters - ---------- - x : Tensor - Support input type ``float``, ``double``, ``int32``, ``int64``, ``uint8``, - ``int16``, or ``int8``. - alpha : float - Slope. - name : str - The function name (optional). - - Examples - -------- - >>> net = tl.layers.DenseLayer(net, 100, act=lambda x : tl.act.lrelu(x, 0.2), name='dense') - - Returns - ------- - Tensor - A ``Tensor`` in the same type as ``x``. - - References - ------------ - - `Rectifier Nonlinearities Improve Neural Network Acoustic Models, Maas et al. (2013) `__ - - """ - # with tf.name_scope(name) as scope: - # x = tf.nn.relu(x) - # m_x = tf.nn.relu(-x) - # x -= alpha * m_x - x = tf.maximum(x, alpha * x, name=name) - return x - - -def swish(x, name='swish'): - """The Swish function. - See `Swish: a Self-Gated Activation Function `__. - - Parameters - ---------- - x : Tensor - input. - name: str - function name (optional). - - Returns - ------- - Tensor - A ``Tensor`` in the same type as ``x``. - - """ - with tf.name_scope(name): - x = tf.nn.sigmoid(x) * x - return x - - -@deprecated("2018-06-30", "This API will be deprecated soon as tf.nn.softmax can do the same thing.") -def pixel_wise_softmax(x, name='pixel_wise_softmax'): - """Return the softmax outputs of images, every pixels have multiple label, the sum of a pixel is 1. - Usually be used for image segmentation. - - Parameters - ---------- - x : Tensor - input. - - For 2d image, 4D tensor (batch_size, height, weight, channel), where channel >= 2. - - For 3d image, 5D tensor (batch_size, depth, height, weight, channel), where channel >= 2. - name : str - function name (optional) - - Returns - ------- - Tensor - A ``Tensor`` in the same type as ``x``. - - Examples - -------- - >>> outputs = pixel_wise_softmax(network.outputs) - >>> dice_loss = 1 - dice_coe(outputs, y_, epsilon=1e-5) - - References - ---------- - - `tf.reverse `__ - - """ - with tf.name_scope(name): - return tf.nn.softmax(x) - - -# Alias -linear = identity -lrelu = leaky_relu diff --git a/tensorlayer/cli/__init__.py b/tensorlayer/cli/__init__.py deleted file mode 100644 index 1857582..0000000 --- a/tensorlayer/cli/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""The tensorlayer.cli module provides a command-line tool for some common tasks.""" diff --git a/tensorlayer/cli/__main__.py b/tensorlayer/cli/__main__.py deleted file mode 100644 index 1a65b3d..0000000 --- a/tensorlayer/cli/__main__.py +++ /dev/null @@ -1,14 +0,0 @@ -import argparse - -from tensorlayer.cli import train - -if __name__ == "__main__": - parser = argparse.ArgumentParser(prog='tl') - subparsers = parser.add_subparsers(dest='cmd') - train_parser = subparsers.add_parser('train', help='train a model using multiple local GPUs or CPUs.') - train.build_arg_parser(train_parser) - args = parser.parse_args() - if args.cmd == 'train': - train.main(args) - else: - parser.print_help() diff --git a/tensorlayer/cli/train.py b/tensorlayer/cli/train.py deleted file mode 100644 index 24744a4..0000000 --- a/tensorlayer/cli/train.py +++ /dev/null @@ -1,169 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 -""" -tl train -======== - -(Alpha release - usage might change later) - -The tensorlayer.cli.train module provides the ``tl train`` subcommand. -It helps the user bootstrap a TensorFlow/TensorLayer program for distributed training -using multiple GPU cards or CPUs on a computer. - -You need to first setup the `CUDA_VISIBLE_DEVICES `_ -to tell ``tl train`` which GPUs are available. If the CUDA_VISIBLE_DEVICES is not given, -``tl train`` would try best to discover all available GPUs. - -In distribute training, each TensorFlow program needs a TF_CONFIG environment variable to describe -the cluster. It also needs a master daemon to -monitor all trainers. ``tl train`` is responsible -for automatically managing these two tasks. - -Usage ------ - -tl train [-h] [-p NUM_PSS] [-c CPU_TRAINERS] [args [args ...]] - -.. code-block:: bash - - # example of using GPU 0 and 1 for training mnist - CUDA_VISIBLE_DEVICES="0,1" - tl train example/tutorial_mnist_distributed.py - - # example of using CPU trainers for inception v3 - tl train -c 16 example/tutorial_imagenet_inceptionV3_distributed.py - - # example of using GPU trainers for inception v3 with customized arguments - # as CUDA_VISIBLE_DEVICES is not given, tl would try to discover all available GPUs - tl train example/tutorial_imagenet_inceptionV3_distributed.py -- --batch_size 16 - - -Command-line Arguments ----------------------- - -- ``file``: python file path. - -- ``NUM_PSS`` : The number of parameter servers. - -- ``CPU_TRAINERS``: The number of CPU trainers. - - It is recommended that ``NUM_PSS + CPU_TRAINERS <= cpu count`` - -- ``args``: Any parameter after ``--`` would be passed to the python program. - - -Notes ------ -A parallel training program would require multiple parameter servers -to help parallel trainers to exchange intermediate gradients. -The best number of parameter servers is often proportional to the -size of your model as well as the number of CPUs available. -You can control the number of parameter servers using the ``-p`` parameter. - -If you have a single computer with massive CPUs, you can use the ``-c`` parameter -to enable CPU-only parallel training. -The reason we are not supporting GPU-CPU co-training is because GPU and -CPU are running at different speeds. Using them together in training would -incur stragglers. - -""" - -import argparse -import json -import multiprocessing -import os -import platform -import re -import subprocess -import sys - -PORT_BASE = 10000 - - -def _get_gpu_ids(): - if 'CUDA_VISIBLE_DEVICES' in os.environ: - return [int(x) for x in os.environ.get('CUDA_VISIBLE_DEVICES', '').split(',')] - if platform.system() in ['Darwin', 'Linux']: - return [int(d.replace('nvidia', '')) for d in os.listdir('/dev') if re.match('^nvidia\d+$', d)] - else: - print('Please set CUDA_VISIBLE_DEVICES (see http://acceleware.com/blog/cudavisibledevices-masking-gpus)') - return [] - - -GPU_IDS = _get_gpu_ids() - - -def create_tf_config(cluster_spec, task_type, task_index): - return { - 'cluster': cluster_spec, - 'task': { - 'type': task_type, - 'index': task_index - }, - } - - -def create_tf_jobs(cluster_spec, prog, args): - gpu_assignment = dict((('worker', idx), gpu_idx) for (idx, gpu_idx) in enumerate(GPU_IDS)) - for job_type in cluster_spec: - for task_index in range(len(cluster_spec[job_type])): - new_env = os.environ.copy() - new_env.update({ - 'CUDA_VISIBLE_DEVICES': str(gpu_assignment.get((job_type, task_index), '')), - 'TF_CONFIG': json.dumps(create_tf_config(cluster_spec, job_type, task_index)), - }) - yield subprocess.Popen(['python3', prog] + args, env=new_env) - - -def validate_arguments(args): - if args.num_pss < 1: - print('Value error: must have ore than one parameter servers.') - exit(1) - - if not GPU_IDS: - num_cpus = multiprocessing.cpu_count() - if args.cpu_trainers > num_cpus: - print('Value error: there are %s available CPUs but you are requiring %s.' % (num_cpus, args.cpu_trainers)) - exit(1) - - if not os.path.isfile(args.file): - print('Value error: model trainning file does not exist') - exit(1) - - -def main(args): - validate_arguments(args) - num_workers = len(GPU_IDS) if GPU_IDS else args.cpu_trainers - print('Using program %s with args %s' % (args.file, ' '.join(args.args))) - print('Using %d workers, %d parameter servers, %d GPUs.' % (num_workers, args.num_pss, len(GPU_IDS))) - cluster_spec = { - 'ps': ['localhost:%d' % (PORT_BASE + i) for i in range(args.num_pss)], - 'worker': ['localhost:%d' % (PORT_BASE + args.num_pss + i) for i in range(num_workers)] - } - processes = list(create_tf_jobs(cluster_spec, args.file, args.args)) - try: - print('Press ENTER to exit the training ...') - sys.stdin.readline() - except KeyboardInterrupt: # https://docs.python.org/3/library/exceptions.html#KeyboardInterrupt - print('Keyboard interrupt received') - finally: - print('stopping all subprocesses ...') - for p in processes: - p.kill() - for p in processes: - p.wait() - print('END') - - -def build_arg_parser(parser): - parser.add_argument('-p', '--pss', dest='num_pss', type=int, default=1, help='number of parameter servers') - parser.add_argument('-c', '--cpu_trainers', dest='cpu_trainers', type=int, default=1, help='number of CPU trainers') - parser.add_argument('file', help='model trainning file path') - parser.add_argument('args', nargs='*', type=str, help='arguments to ') - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - build_arg_parser(parser) - args = parser.parse_args() - main(args) diff --git a/tensorlayer/cost.py b/tensorlayer/cost.py deleted file mode 100644 index 3934afd..0000000 --- a/tensorlayer/cost.py +++ /dev/null @@ -1,739 +0,0 @@ -# -*- coding: utf-8 -*- - -import logging -import tensorflow as tf - -__all__ = [ - 'cross_entropy', - 'sigmoid_cross_entropy', - 'binary_cross_entropy', - 'mean_squared_error', - 'normalized_mean_square_error', - 'absolute_difference_error', - 'dice_coe', - 'dice_hard_coe', - 'iou_coe', - 'cross_entropy_seq', - 'cross_entropy_seq_with_mask', - 'cosine_similarity', - 'li_regularizer', - 'lo_regularizer', - 'maxnorm_regularizer', - 'maxnorm_o_regularizer', - 'maxnorm_i_regularizer', -] - - -def cross_entropy(output, target, name=None): - """Softmax cross-entropy operation, returns the TensorFlow expression of cross-entropy for two distributions, it implements - softmax internally. See ``tf.nn.sparse_softmax_cross_entropy_with_logits``. - - Parameters - ---------- - output : Tensor - A batch of distribution with shape: [batch_size, num of classes]. - target : Tensor - A batch of index with shape: [batch_size, ]. - name : string - Name of this loss. - - Examples - -------- - >>> ce = tl.cost.cross_entropy(y_logits, y_target_logits, 'my_loss') - - References - ----------- - - About cross-entropy: ``__. - - The code is borrowed from: ``__. - - """ - # try: # old - # return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output, targets=target)) - # except: # TF 1.0 - if name is None: - raise Exception("Please give a unique name to tl.cost.cross_entropy for TF1.0+") - return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target, logits=output, name=name)) - - -def sigmoid_cross_entropy(output, target, name=None): - """Sigmoid cross-entropy operation, see ``tf.nn.sigmoid_cross_entropy_with_logits``. - - Parameters - ---------- - output : Tensor - A batch of distribution with shape: [batch_size, num of classes]. - target : Tensor - A batch of index with shape: [batch_size, ]. - name : string - Name of this loss. - - """ - # try: # TF 1.0 - return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output, name=name)) - # except: - # return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=output, targets=target)) - - -def binary_cross_entropy(output, target, epsilon=1e-8, name='bce_loss'): - """Binary cross entropy operation. - - Parameters - ---------- - output : Tensor - Tensor with type of `float32` or `float64`. - target : Tensor - The target distribution, format the same with `output`. - epsilon : float - A small value to avoid output to be zero. - name : str - An optional name to attach to this function. - - References - ----------- - - `ericjang-DRAW `__ - - """ - # from tensorflow.python.framework import ops - # with ops.op_scope([output, target], name, "bce_loss") as name: - # output = ops.convert_to_tensor(output, name="preds") - # target = ops.convert_to_tensor(targets, name="target") - with tf.name_scope(name): - return tf.reduce_mean(tf.reduce_sum(-(target * tf.log(output + epsilon) + (1. - target) * tf.log(1. - output + epsilon)), axis=1)) - - # For brevity, let `x = output`, `z = target`. The binary cross entropy loss is - # - # loss(x, z) = - sum_i (x[i] * log(z[i]) + (1 - x[i]) * log(1 - z[i])) - - -def mean_squared_error(output, target, is_mean=False, name="mean_squared_error"): - """Return the TensorFlow expression of mean-square-error (L2) of two batch of data. - - Parameters - ---------- - output : Tensor - 2D, 3D or 4D tensor i.e. [batch_size, n_feature], [batch_size, height, width] or [batch_size, height, width, channel]. - target : Tensor - The target distribution, format the same with `output`. - is_mean : boolean - Whether compute the mean or sum for each example. - - If True, use ``tf.reduce_mean`` to compute the loss between one target and predict data. - - If False, use ``tf.reduce_sum`` (default). - - References - ------------ - - `Wiki Mean Squared Error `__ - - """ - with tf.name_scope(name): - if output.get_shape().ndims == 2: # [batch_size, n_feature] - if is_mean: - mse = tf.reduce_mean(tf.reduce_mean(tf.squared_difference(output, target), 1)) - else: - mse = tf.reduce_mean(tf.reduce_sum(tf.squared_difference(output, target), 1)) - elif output.get_shape().ndims == 3: # [batch_size, w, h] - if is_mean: - mse = tf.reduce_mean(tf.reduce_mean(tf.squared_difference(output, target), [1, 2])) - else: - mse = tf.reduce_mean(tf.reduce_sum(tf.squared_difference(output, target), [1, 2])) - elif output.get_shape().ndims == 4: # [batch_size, w, h, c] - if is_mean: - mse = tf.reduce_mean(tf.reduce_mean(tf.squared_difference(output, target), [1, 2, 3])) - else: - mse = tf.reduce_mean(tf.reduce_sum(tf.squared_difference(output, target), [1, 2, 3])) - else: - raise Exception("Unknow dimension") - return mse - - -def normalized_mean_square_error(output, target): - """Return the TensorFlow expression of normalized mean-square-error of two distributions. - - Parameters - ---------- - output : Tensor - 2D, 3D or 4D tensor i.e. [batch_size, n_feature], [batch_size, height, width] or [batch_size, height, width, channel]. - target : Tensor - The target distribution, format the same with `output`. - - """ - with tf.name_scope("mean_squared_error_loss"): - if output.get_shape().ndims == 2: # [batch_size, n_feature] - nmse_a = tf.sqrt(tf.reduce_sum(tf.squared_difference(output, target), axis=1)) - nmse_b = tf.sqrt(tf.reduce_sum(tf.square(target), axis=1)) - elif output.get_shape().ndims == 3: # [batch_size, w, h] - nmse_a = tf.sqrt(tf.reduce_sum(tf.squared_difference(output, target), axis=[1, 2])) - nmse_b = tf.sqrt(tf.reduce_sum(tf.square(target), axis=[1, 2])) - elif output.get_shape().ndims == 4: # [batch_size, w, h, c] - nmse_a = tf.sqrt(tf.reduce_sum(tf.squared_difference(output, target), axis=[1, 2, 3])) - nmse_b = tf.sqrt(tf.reduce_sum(tf.square(target), axis=[1, 2, 3])) - nmse = tf.reduce_mean(nmse_a / nmse_b) - return nmse - - -def absolute_difference_error(output, target, is_mean=False): - """Return the TensorFlow expression of absolute difference error (L1) of two batch of data. - - Parameters - ---------- - output : Tensor - 2D, 3D or 4D tensor i.e. [batch_size, n_feature], [batch_size, height, width] or [batch_size, height, width, channel]. - target : Tensor - The target distribution, format the same with `output`. - is_mean : boolean - Whether compute the mean or sum for each example. - - If True, use ``tf.reduce_mean`` to compute the loss between one target and predict data. - - If False, use ``tf.reduce_sum`` (default). - - """ - with tf.name_scope("mean_squared_error_loss"): - if output.get_shape().ndims == 2: # [batch_size, n_feature] - if is_mean: - loss = tf.reduce_mean(tf.reduce_mean(tf.abs(output - target), 1)) - else: - loss = tf.reduce_mean(tf.reduce_sum(tf.abs(output - target), 1)) - elif output.get_shape().ndims == 3: # [batch_size, w, h] - if is_mean: - loss = tf.reduce_mean(tf.reduce_mean(tf.abs(output - target), [1, 2])) - else: - loss = tf.reduce_mean(tf.reduce_sum(tf.abs(output - target), [1, 2])) - elif output.get_shape().ndims == 4: # [batch_size, w, h, c] - if is_mean: - loss = tf.reduce_mean(tf.reduce_mean(tf.abs(output - target), [1, 2, 3])) - else: - loss = tf.reduce_mean(tf.reduce_sum(tf.abs(output - target), [1, 2, 3])) - else: - raise Exception("Unknow dimension") - return loss - - -def dice_coe(output, target, loss_type='jaccard', axis=(1, 2, 3), smooth=1e-5): - """Soft dice (Sørensen or Jaccard) coefficient for comparing the similarity - of two batch of data, usually be used for binary image segmentation - i.e. labels are binary. The coefficient between 0 to 1, 1 means totally match. - - Parameters - ----------- - output : Tensor - A distribution with shape: [batch_size, ....], (any dimensions). - target : Tensor - The target distribution, format the same with `output`. - loss_type : str - ``jaccard`` or ``sorensen``, default is ``jaccard``. - axis : tuple of int - All dimensions are reduced, default ``[1,2,3]``. - smooth : float - This small value will be added to the numerator and denominator. - - If both output and target are empty, it makes sure dice is 1. - - If either output or target are empty (all pixels are background), dice = ```smooth/(small_value + smooth)``, then if smooth is very small, dice close to 0 (even the image values lower than the threshold), so in this case, higher smooth can have a higher dice. - - Examples - --------- - >>> outputs = tl.act.pixel_wise_softmax(network.outputs) - >>> dice_loss = 1 - tl.cost.dice_coe(outputs, y_) - - References - ----------- - - `Wiki-Dice `__ - - """ - inse = tf.reduce_sum(output * target, axis=axis) - if loss_type == 'jaccard': - l = tf.reduce_sum(output * output, axis=axis) - r = tf.reduce_sum(target * target, axis=axis) - elif loss_type == 'sorensen': - l = tf.reduce_sum(output, axis=axis) - r = tf.reduce_sum(target, axis=axis) - else: - raise Exception("Unknow loss_type") - ## old axis=[0,1,2,3] - # dice = 2 * (inse) / (l + r) - # epsilon = 1e-5 - # dice = tf.clip_by_value(dice, 0, 1.0-epsilon) # if all empty, dice = 1 - ## new haodong - dice = (2. * inse + smooth) / (l + r + smooth) - ## - dice = tf.reduce_mean(dice) - return dice - - -def dice_hard_coe(output, target, threshold=0.5, axis=(1, 2, 3), smooth=1e-5): - """Non-differentiable Sørensen–Dice coefficient for comparing the similarity - of two batch of data, usually be used for binary image segmentation i.e. labels are binary. - The coefficient between 0 to 1, 1 if totally match. - - Parameters - ----------- - output : tensor - A distribution with shape: [batch_size, ....], (any dimensions). - target : tensor - The target distribution, format the same with `output`. - threshold : float - The threshold value to be true. - axis : tuple of integer - All dimensions are reduced, default ``(1,2,3)``. - smooth : float - This small value will be added to the numerator and denominator, see ``dice_coe``. - - References - ----------- - - `Wiki-Dice `__ - - """ - output = tf.cast(output > threshold, dtype=tf.float32) - target = tf.cast(target > threshold, dtype=tf.float32) - inse = tf.reduce_sum(tf.multiply(output, target), axis=axis) - l = tf.reduce_sum(output, axis=axis) - r = tf.reduce_sum(target, axis=axis) - ## old axis=[0,1,2,3] - # hard_dice = 2 * (inse) / (l + r) - # epsilon = 1e-5 - # hard_dice = tf.clip_by_value(hard_dice, 0, 1.0-epsilon) - ## new haodong - hard_dice = (2. * inse + smooth) / (l + r + smooth) - ## - hard_dice = tf.reduce_mean(hard_dice) - return hard_dice - - -def iou_coe(output, target, threshold=0.5, axis=(1, 2, 3), smooth=1e-5): - """Non-differentiable Intersection over Union (IoU) for comparing the - similarity of two batch of data, usually be used for evaluating binary image segmentation. - The coefficient between 0 to 1, and 1 means totally match. - - Parameters - ----------- - output : tensor - A batch of distribution with shape: [batch_size, ....], (any dimensions). - target : tensor - The target distribution, format the same with `output`. - threshold : float - The threshold value to be true. - axis : tuple of integer - All dimensions are reduced, default ``(1,2,3)``. - smooth : float - This small value will be added to the numerator and denominator, see ``dice_coe``. - - Notes - ------ - - IoU cannot be used as training loss, people usually use dice coefficient for training, IoU and hard-dice for evaluating. - - """ - pre = tf.cast(output > threshold, dtype=tf.float32) - truth = tf.cast(target > threshold, dtype=tf.float32) - inse = tf.reduce_sum(tf.multiply(pre, truth), axis=axis) # AND - union = tf.reduce_sum(tf.cast(tf.add(pre, truth) >= 1, dtype=tf.float32), axis=axis) # OR - ## old axis=[0,1,2,3] - # epsilon = 1e-5 - # batch_iou = inse / (union + epsilon) - ## new haodong - batch_iou = (inse + smooth) / (union + smooth) - iou = tf.reduce_mean(batch_iou) - return iou #, pre, truth, inse, union - - -# ## test soft/hard dice and iou -# import numpy as np -# y = np.zeros((1,10,10,1)) -# # y[0,0:5,0:5]=1.0 -# o = np.zeros((1,10,10,1)) -# # o[:,:,:,:] = 0 # what we want: dice=0 iou=0 OK -# # o[0,0:2,0:2]=0.3 # what we want: dice larger iou=0 OK -# # o[0,0:2,0:2]=0.6 # what we want: dice larger iou small OK -# # o[0,0:3,0:3]=0.6 # what we want: dice larger iou larger OK -# # o[0,0:3,0:3]=1 # what we want: dice larger iou same OK -# # o[0,0:5,0:5]=1 # what we want: dice=1 iou=1 OK -# # o[0,0:5,0:5]=0.3 # what we want: dice smaller iou=0 OK -# # o[0,0:5,0:5]=1e-2 # what we want: dice≈0 iou=0 OK -# # o[0,8:10,8:10]=1.0 # what we want: dice=0 iou=0 OK -# # o[0,8:10,8:10]=1e-10 # what we want: dice=0 iou=0 OK -# # y[:,:,:,:] = o[:,:,:,:] = 0 # what we want: dice=1 iou=1 OK -# ## why in u-net, dice=1 hard-dice=1 iou=1 exist?? print bug? -# -# d = dice_coe(o, y, 'jaccard', smooth=1.) -# hd = dice_hard_coe(o, y, smooth=1e-5) -# i = iou_coe(o, y, smooth=1e-5) -# sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) -# # sess.run(tf.local_variables_initializer()) -# print(sess.run([d,hd,i])) -# # p, t, i, u = sess.run([pre, truth, inse, union]) -# # import pprint -# # pprint.pprint(((y>0.5)*(o>0.5)).astype(int).tolist()) -# # pprint.pprint(p.tolist()) -# # pprint.pprint(t.tolist()) -# # pprint.pprint(i) -# # pprint.pprint(u) -# exit() - - -def cross_entropy_seq(logits, target_seqs, batch_size=None): #, batch_size=1, num_steps=None): - """Returns the expression of cross-entropy of two sequences, implement - softmax internally. Normally be used for fixed length RNN outputs, see `PTB example `__. - - Parameters - ---------- - logits : Tensor - 2D tensor with shape of `[batch_size * n_steps, n_classes]`. - target_seqs : Tensor - The target sequence, 2D tensor `[batch_size, n_steps]`, if the number of step is dynamic, please use ``tl.cost.cross_entropy_seq_with_mask`` instead. - batch_size : None or int. - Whether to divide the cost by batch size. - - If integer, the return cost will be divided by `batch_size`. - - If None (default), the return cost will not be divided by anything. - - Examples - -------- - >>> see `PTB example `__.for more details - >>> input_data = tf.placeholder(tf.int32, [batch_size, n_steps]) - >>> targets = tf.placeholder(tf.int32, [batch_size, n_steps]) - >>> # build the network - >>> print(net.outputs) - ... (batch_size * n_steps, n_classes) - >>> cost = tl.cost.cross_entropy_seq(network.outputs, targets) - - """ - # try: # TF 1.0 - sequence_loss_by_example_fn = tf.contrib.legacy_seq2seq.sequence_loss_by_example - # except: - # sequence_loss_by_example_fn = tf.nn.seq2seq.sequence_loss_by_example - - loss = sequence_loss_by_example_fn([logits], [tf.reshape(target_seqs, [-1])], [tf.ones_like(tf.reshape(target_seqs, [-1]), dtype=tf.float32)]) - # [tf.ones([batch_size * num_steps])]) - cost = tf.reduce_sum(loss) #/ batch_size - if batch_size is not None: - cost = cost / batch_size - return cost - - -def cross_entropy_seq_with_mask(logits, target_seqs, input_mask, return_details=False, name=None): - """Returns the expression of cross-entropy of two sequences, implement - softmax internally. Normally be used for Dynamic RNN with Synced sequence input and output. - - Parameters - ----------- - logits : Tensor - 2D tensor with shape of [batch_size * ?, n_classes], `?` means dynamic IDs for each example. - - Can be get from `DynamicRNNLayer` by setting ``return_seq_2d`` to `True`. - target_seqs : Tensor - int of tensor, like word ID. [batch_size, ?], `?` means dynamic IDs for each example. - input_mask : Tensor - The mask to compute loss, it has the same size with `target_seqs`, normally 0 or 1. - return_details : boolean - Whether to return detailed losses. - - If False (default), only returns the loss. - - If True, returns the loss, losses, weights and targets (see source code). - - Examples - -------- - >>> batch_size = 64 - >>> vocab_size = 10000 - >>> embedding_size = 256 - >>> input_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="input") - >>> target_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="target") - >>> input_mask = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="mask") - >>> net = tl.layers.EmbeddingInputlayer( - ... inputs = input_seqs, - ... vocabulary_size = vocab_size, - ... embedding_size = embedding_size, - ... name = 'seq_embedding') - >>> net = tl.layers.DynamicRNNLayer(net, - ... cell_fn = tf.contrib.rnn.BasicLSTMCell, - ... n_hidden = embedding_size, - ... dropout = (0.7 if is_train else None), - ... sequence_length = tl.layers.retrieve_seq_length_op2(input_seqs), - ... return_seq_2d = True, - ... name = 'dynamicrnn') - >>> print(net.outputs) - ... (?, 256) - >>> net = tl.layers.DenseLayer(net, n_units=vocab_size, name="output") - >>> print(net.outputs) - ... (?, 10000) - >>> loss = tl.cost.cross_entropy_seq_with_mask(net.outputs, target_seqs, input_mask) - - """ - targets = tf.reshape(target_seqs, [-1]) # to one vector - weights = tf.to_float(tf.reshape(input_mask, [-1])) # to one vector like targets - losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=targets, name=name) * weights - #losses = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=targets, name=name)) # for TF1.0 and others - - # try: ## TF1.0 - loss = tf.divide( - tf.reduce_sum(losses), # loss from mask. reduce_sum before element-wise mul with mask !! - tf.reduce_sum(weights), - name="seq_loss_with_mask") - # except: ## TF0.12 - # loss = tf.div(tf.reduce_sum(losses), # loss from mask. reduce_sum before element-wise mul with mask !! - # tf.reduce_sum(weights), - # name="seq_loss_with_mask") - if return_details: - return loss, losses, weights, targets - else: - return loss - - -def cosine_similarity(v1, v2): - """Cosine similarity [-1, 1]. - - Parameters - ---------- - v1, v2 : Tensor - Tensor with the same shape [batch_size, n_feature]. - - Returns - ------- - Tensor - a tensor of shape [batch_size]. - - References - ---------- - - ``__. - - """ - # try: ## TF1.0 - cost = tf.reduce_sum(tf.multiply(v1, v2), 1) / (tf.sqrt(tf.reduce_sum(tf.multiply(v1, v1), 1)) * tf.sqrt(tf.reduce_sum(tf.multiply(v2, v2), 1))) - # except: ## TF0.12 - # cost = tf.reduce_sum(tf.mul(v1, v2), reduction_indices=1) / (tf.sqrt(tf.reduce_sum(tf.mul(v1, v1), reduction_indices=1)) * tf.sqrt(tf.reduce_sum(tf.mul(v2, v2), reduction_indices=1))) - return cost - - -## Regularization Functions -def li_regularizer(scale, scope=None): - """Li regularization removes the neurons of previous layer. The `i` represents `inputs`. - Returns a function that can be used to apply group li regularization to weights. - The implementation follows `TensorFlow contrib `__. - - Parameters - ---------- - scale : float - A scalar multiplier `Tensor`. 0.0 disables the regularizer. - scope: str - An optional scope name for this function. - - Returns - -------- - A function with signature `li(weights, name=None)` that apply Li regularization. - - Raises - ------ - ValueError : if scale is outside of the range [0.0, 1.0] or if scale is not a float. - - """ - import numbers - from tensorflow.python.framework import ops - from tensorflow.python.ops import standard_ops - # from tensorflow.python.platform import tf_logging as logging - - if isinstance(scale, numbers.Integral): - raise ValueError('scale cannot be an integer: %s' % scale) - if isinstance(scale, numbers.Real): - if scale < 0.: - raise ValueError('Setting a scale less than 0 on a regularizer: %g' % scale) - if scale >= 1.: - raise ValueError('Setting a scale greater than 1 on a regularizer: %g' % scale) - if scale == 0.: - logging.info('Scale of 0 disables regularizer.') - return lambda _, name=None: None - - def li(weights): - """Applies li regularization to weights.""" - with tf.name_scope('li_regularizer') as scope: - my_scale = ops.convert_to_tensor(scale, dtype=weights.dtype.base_dtype, name='scale') - # if tf.__version__ <= '0.12': - # standard_ops_fn = standard_ops.mul - # else: - standard_ops_fn = standard_ops.multiply - return standard_ops_fn(my_scale, standard_ops.reduce_sum(standard_ops.sqrt(standard_ops.reduce_sum(tf.square(weights), 1))), name=scope) - - return li - - -def lo_regularizer(scale): - """Lo regularization removes the neurons of current layer. The `o` represents `outputs` - Returns a function that can be used to apply group lo regularization to weights. - The implementation follows `TensorFlow contrib `__. - - Parameters - ---------- - scale : float - A scalar multiplier `Tensor`. 0.0 disables the regularizer. - - Returns - ------- - A function with signature `lo(weights, name=None)` that apply Lo regularization. - - Raises - ------ - ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float. - - """ - import numbers - from tensorflow.python.framework import ops - from tensorflow.python.ops import standard_ops - # from tensorflow.python.platform import tf_logging as logging - - if isinstance(scale, numbers.Integral): - raise ValueError('scale cannot be an integer: %s' % scale) - if isinstance(scale, numbers.Real): - if scale < 0.: - raise ValueError('Setting a scale less than 0 on a regularizer: %g' % scale) - if scale >= 1.: - raise ValueError('Setting a scale greater than 1 on a regularizer: %g' % scale) - if scale == 0.: - logging.info('Scale of 0 disables regularizer.') - return lambda _, name=None: None - - def lo(weights, name='lo_regularizer'): - """Applies group column regularization to weights.""" - with tf.name_scope(name) as scope: - my_scale = ops.convert_to_tensor(scale, dtype=weights.dtype.base_dtype, name='scale') - # if tf.__version__ <= '0.12': - # standard_ops_fn = standard_ops.mul - # else: - standard_ops_fn = standard_ops.multiply - return standard_ops_fn(my_scale, standard_ops.reduce_sum(standard_ops.sqrt(standard_ops.reduce_sum(tf.square(weights), 0))), name=scope) - - return lo - - -def maxnorm_regularizer(scale=1.0): - """Max-norm regularization returns a function that can be used to apply max-norm regularization to weights. - - More about max-norm, see `wiki-max norm `_. - The implementation follows `TensorFlow contrib `__. - - Parameters - ---------- - scale : float - A scalar multiplier `Tensor`. 0.0 disables the regularizer. - - Returns - --------- - A function with signature `mn(weights, name=None)` that apply Lo regularization. - - Raises - -------- - ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float. - - """ - import numbers - from tensorflow.python.framework import ops - from tensorflow.python.ops import standard_ops - - if isinstance(scale, numbers.Integral): - raise ValueError('scale cannot be an integer: %s' % scale) - if isinstance(scale, numbers.Real): - if scale < 0.: - raise ValueError('Setting a scale less than 0 on a regularizer: %g' % scale) - # if scale >= 1.: - # raise ValueError('Setting a scale greater than 1 on a regularizer: %g' % - # scale) - if scale == 0.: - logging.info('Scale of 0 disables regularizer.') - return lambda _, name=None: None - - def mn(weights, name='max_regularizer'): - """Applies max-norm regularization to weights.""" - with tf.name_scope(name) as scope: - my_scale = ops.convert_to_tensor(scale, dtype=weights.dtype.base_dtype, name='scale') - # if tf.__version__ <= '0.12': - # standard_ops_fn = standard_ops.mul - # else: - standard_ops_fn = standard_ops.multiply - return standard_ops_fn(my_scale, standard_ops.reduce_max(standard_ops.abs(weights)), name=scope) - - return mn - - -def maxnorm_o_regularizer(scale): - """Max-norm output regularization removes the neurons of current layer. - Returns a function that can be used to apply max-norm regularization to each column of weight matrix. - The implementation follows `TensorFlow contrib `__. - - Parameters - ---------- - scale : float - A scalar multiplier `Tensor`. 0.0 disables the regularizer. - - Returns - --------- - A function with signature `mn_o(weights, name=None)` that apply Lo regularization. - - Raises - --------- - ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float. - - """ - import numbers - from tensorflow.python.framework import ops - from tensorflow.python.ops import standard_ops - - if isinstance(scale, numbers.Integral): - raise ValueError('scale cannot be an integer: %s' % scale) - if isinstance(scale, numbers.Real): - if scale < 0.: - raise ValueError('Setting a scale less than 0 on a regularizer: %g' % scale) - # if scale >= 1.: - # raise ValueError('Setting a scale greater than 1 on a regularizer: %g' % - # scale) - if scale == 0.: - logging.info('Scale of 0 disables regularizer.') - return lambda _, name=None: None - - def mn_o(weights, name='maxnorm_o_regularizer'): - """Applies max-norm regularization to weights.""" - with tf.name_scope(name) as scope: - my_scale = ops.convert_to_tensor(scale, dtype=weights.dtype.base_dtype, name='scale') - if tf.__version__ <= '0.12': - standard_ops_fn = standard_ops.mul - else: - standard_ops_fn = standard_ops.multiply - return standard_ops_fn(my_scale, standard_ops.reduce_sum(standard_ops.reduce_max(standard_ops.abs(weights), 0)), name=scope) - - return mn_o - - -def maxnorm_i_regularizer(scale): - """Max-norm input regularization removes the neurons of previous layer. - Returns a function that can be used to apply max-norm regularization to each row of weight matrix. - The implementation follows `TensorFlow contrib `__. - - Parameters - ---------- - scale : float - A scalar multiplier `Tensor`. 0.0 disables the regularizer. - - Returns - --------- - A function with signature `mn_i(weights, name=None)` that apply Lo regularization. - - Raises - --------- - ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float. - - """ - import numbers - from tensorflow.python.framework import ops - from tensorflow.python.ops import standard_ops - - if isinstance(scale, numbers.Integral): - raise ValueError('scale cannot be an integer: %s' % scale) - if isinstance(scale, numbers.Real): - if scale < 0.: - raise ValueError('Setting a scale less than 0 on a regularizer: %g' % scale) - # if scale >= 1.: - # raise ValueError('Setting a scale greater than 1 on a regularizer: %g' % - # scale) - if scale == 0.: - logging.info('Scale of 0 disables regularizer.') - return lambda _, name=None: None - - def mn_i(weights, name='maxnorm_i_regularizer'): - """Applies max-norm regularization to weights.""" - with tf.name_scope(name) as scope: - my_scale = ops.convert_to_tensor(scale, dtype=weights.dtype.base_dtype, name='scale') - if tf.__version__ <= '0.12': - standard_ops_fn = standard_ops.mul - else: - standard_ops_fn = standard_ops.multiply - return standard_ops_fn(my_scale, standard_ops.reduce_sum(standard_ops.reduce_max(standard_ops.abs(weights), 1)), name=scope) - - return mn_i diff --git a/tensorlayer/db.py b/tensorlayer/db.py deleted file mode 100644 index 3ffe2f9..0000000 --- a/tensorlayer/db.py +++ /dev/null @@ -1,449 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- -""" -Experimental Database Management System. - -Latest Version -""" - -import inspect -import pickle -import time -import uuid -from datetime import datetime - -import gridfs -from pymongo import MongoClient - - -def AutoFill(func): - def func_wrapper(self, *args, **kwargs): - d = inspect.getcallargs(func, self, *args, **kwargs) - d['args'].update({"studyID": self.studyID}) - return func(**d) - - return func_wrapper - - -class TensorDB(object): - """TensorDB is a MongoDB based manager that help you to manage data, network topology, parameters and logging. - - Parameters - ------------- - ip : str - Localhost or IP address. - port : int - Port number. - db_name : str - Database name. - user_name : str - User name. Set to None if it donnot need authentication. - password : str - Password - - Attributes - ------------ - db : ``pymongo.MongoClient[db_name]``, xxxxxx - datafs : ``gridfs.GridFS(self.db, collection="datafs")``, xxxxxxxxxx - modelfs : ``gridfs.GridFS(self.db, collection="modelfs")``, - paramsfs : ``gridfs.GridFS(self.db, collection="paramsfs")``, - db.Params : Collection for - db.TrainLog : Collection for - db.ValidLog : Collection for - db.TestLog : Collection for - studyID : string, unique ID, if None random generate one. - - Notes - ------------- - - MongoDB, as TensorDB is based on MongoDB, you need to install it in your local machine or remote machine. - - pip install pymongo, for MongoDB python API. - - You may like to install MongoChef or Mongo Management Studo APP for visualizing or testing your MongoDB. - """ - - def __init__(self, ip='localhost', port=27017, db_name='db_name', user_name=None, password='password', studyID=None): - ## connect mongodb - client = MongoClient(ip, port) - self.db = client[db_name] - if user_name != None: - self.db.authenticate(user_name, password) - - if studyID is None: - self.studyID = str(uuid.uuid1()) - else: - self.studyID = studyID - - ## define file system (Buckets) - self.datafs = gridfs.GridFS(self.db, collection="datafs") - self.modelfs = gridfs.GridFS(self.db, collection="modelfs") - self.paramsfs = gridfs.GridFS(self.db, collection="paramsfs") - self.archfs = gridfs.GridFS(self.db, collection="ModelArchitecture") - ## - print("[TensorDB] Connect SUCCESS {}:{} {} {} {}".format(ip, port, db_name, user_name, studyID)) - - self.ip = ip - self.port = port - self.db_name = db_name - self.user_name = user_name - - @classmethod - def __autofill(self, args): - return args.update({'studyID': self.studyID}) - - @staticmethod - def __serialization(ps): - return pickle.dumps(ps, protocol=2) - - @staticmethod - def __deserialization(ps): - return pickle.loads(ps) - - def save_params(self, params=None, args=None): #, file_name='parameters'): - """ Save parameters into MongoDB Buckets, and save the file ID into Params Collections. - - Parameters - ---------- - params : a list of parameters - args : dictionary, item meta data. - - Returns - --------- - f_id : the Buckets ID of the parameters. - """ - if params is None: - params = [] - if args is None: - args = {} - self.__autofill(args) - s = time.time() - f_id = self.paramsfs.put(self.__serialization(params)) #, file_name=file_name) - args.update({'f_id': f_id, 'time': datetime.utcnow()}) - self.db.Params.insert_one(args) - # print("[TensorDB] Save params: {} SUCCESS, took: {}s".format(file_name, round(time.time()-s, 2))) - print("[TensorDB] Save params: SUCCESS, took: {}s".format(round(time.time() - s, 2))) - return f_id - - @AutoFill - def find_one_params(self, args=None, sort=None): - """ Find one parameter from MongoDB Buckets. - - Parameters - ---------- - args : dictionary - For finding items. - - Returns - -------- - params : the parameters, return False if nothing found. - f_id : the Buckets ID of the parameters, return False if nothing found. - """ - if args is None: - args = {} - s = time.time() - # print(args) - d = self.db.Params.find_one(filter=args, sort=sort) - - if d is not None: - f_id = d['f_id'] - else: - print("[TensorDB] FAIL! Cannot find: {}".format(args)) - return False, False - try: - params = self.__deserialization(self.paramsfs.get(f_id).read()) - print("[TensorDB] Find one params SUCCESS, {} took: {}s".format(args, round(time.time() - s, 2))) - return params, f_id - except Exception: - return False, False - - @AutoFill - def find_all_params(self, args=None): - """ Find all parameter from MongoDB Buckets - - Parameters - ---------- - args : dictionary, find items - - Returns - -------- - params : the parameters, return False if nothing found. - - """ - if args is None: - args = {} - s = time.time() - pc = self.db.Params.find(args) - - if pc is not None: - f_id_list = pc.distinct('f_id') - params = [] - for f_id in f_id_list: # you may have multiple Buckets files - tmp = self.paramsfs.get(f_id).read() - params.append(self.__deserialization(tmp)) - else: - print("[TensorDB] FAIL! Cannot find any: {}".format(args)) - return False - - print("[TensorDB] Find all params SUCCESS, took: {}s".format(round(time.time() - s, 2))) - return params - - @AutoFill - def del_params(self, args=None): - """ Delete params in MongoDB uckets. - - Parameters - ----------- - args : dictionary, find items to delete, leave it empty to delete all parameters. - """ - if args is None: - args = {} - pc = self.db.Params.find(args) - f_id_list = pc.distinct('f_id') - # remove from Buckets - for f in f_id_list: - self.paramsfs.delete(f) - # remove from Collections - self.db.Params.remove(args) - - print("[TensorDB] Delete params SUCCESS: {}".format(args)) - - @staticmethod - def _print_dict(args): - # return " / ".join(str(key) + ": "+ str(value) for key, value in args.items()) - - string = '' - for key, value in args.items(): - if key is not '_id': - string += str(key) + ": " + str(value) + " / " - return string - - ## =========================== LOG =================================== ## - @AutoFill - def train_log(self, args=None): - """Save the training log. - - Parameters - ----------- - args : dictionary, items to save. - - Examples - --------- - >>> db.train_log(time=time.time(), {'loss': loss, 'acc': acc}) - """ - if args is None: - args = {} - _result = self.db.TrainLog.insert_one(args) - _log = self._print_dict(args) - #print("[TensorDB] TrainLog: " +_log) - return _result - - @AutoFill - def del_train_log(self, args=None): - """ Delete train log. - - Parameters - ----------- - args : dictionary, find items to delete, leave it empty to delete all log. - """ - if args is None: - args = {} - self.db.TrainLog.delete_many(args) - print("[TensorDB] Delete TrainLog SUCCESS") - - @AutoFill - def valid_log(self, args=None): - """Save the validating log. - - Parameters - ----------- - args : dictionary, items to save. - - Examples - --------- - >>> db.valid_log(time=time.time(), {'loss': loss, 'acc': acc}) - """ - if args is None: - args = {} - _result = self.db.ValidLog.insert_one(args) - # _log = "".join(str(key) + ": " + str(value) for key, value in args.items()) - _log = self._print_dict(args) - print("[TensorDB] ValidLog: " + _log) - return _result - - @AutoFill - def del_valid_log(self, args=None): - """ Delete validation log. - - Parameters - ----------- - args : dictionary, find items to delete, leave it empty to delete all log. - """ - if args is None: - args = {} - self.db.ValidLog.delete_many(args) - print("[TensorDB] Delete ValidLog SUCCESS") - - @AutoFill - def test_log(self, args=None): - """Save the testing log. - - Parameters - ----------- - args : dictionary, items to save. - - Examples - --------- - >>> db.test_log(time=time.time(), {'loss': loss, 'acc': acc}) - """ - if args is None: - args = {} - _result = self.db.TestLog.insert_one(args) - # _log = "".join(str(key) + str(value) for key, value in args.items()) - _log = self._print_dict(args) - print("[TensorDB] TestLog: " + _log) - return _result - - @AutoFill - def del_test_log(self, args=None): - """ Delete test log. - - Parameters - ----------- - args : dictionary, find items to delete, leave it empty to delete all log. - """ - if args is None: - args = {} - - self.db.TestLog.delete_many(args) - print("[TensorDB] Delete TestLog SUCCESS") - - # =========================== Network Architecture ================== ## - @AutoFill - def save_model_architecture(self, s, args=None): - if args is None: - args = {} - - self.__autofill(args) - fid = self.archfs.put(s, filename="modelarchitecture") - args.update({"fid": fid}) - self.db.march.insert_one(args) - - @AutoFill - def load_model_architecture(self, args=None): - - if args is None: - args = {} - - d = self.db.march.find_one(args) - if d is not None: - fid = d['fid'] - print(d) - print(fid) - # "print find" - else: - print("[TensorDB] FAIL! Cannot find: {}".format(args)) - print("no idtem") - return False, False - try: - archs = self.archfs.get(fid).read() - return archs, fid - except Exception as e: - print("exception") - print(e) - return False, False - - @AutoFill - def save_job(self, script=None, args=None): - """Save the job. - - Parameters - ----------- - script : a script file name or None. - args : dictionary, items to save. - - Examples - --------- - >>> # Save your job - >>> db.save_job('your_script.py', {'job_id': 1, 'learning_rate': 0.01, 'n_units': 100}) - >>> # Run your job - >>> temp = db.find_one_job(args={'job_id': 1}) - >>> print(temp['learning_rate']) - ... 0.01 - >>> import _your_script - ... running your script - """ - - if args is None: - args = {} - - self.__autofill(args) - if script is not None: - _script = open(script, 'rb').read() - args.update({'script': _script, 'script_name': script}) - # _result = self.db.Job.insert_one(args) - _result = self.db.Job.replace_one(args, args, upsert=True) - _log = self._print_dict(args) - print("[TensorDB] Save Job: script={}, args={}".format(script, args)) - return _result - - @AutoFill - def find_one_job(self, args=None): - """ Find one job from MongoDB Job Collections. - - Parameters - ---------- - args : dictionary, find items. - - Returns - -------- - dictionary : contains all meta data and script. - """ - - if args is None: - args = {} - - temp = self.db.Job.find_one(args) - - if temp is not None: - if 'script_name' in temp.keys(): - f = open('_' + temp['script_name'], 'wb') - f.write(temp['script']) - f.close() - print("[TensorDB] Find Job: {}".format(args)) - else: - print("[TensorDB] FAIL! Cannot find any: {}".format(args)) - return False - - return temp - - def push_job(self, margs, wargs, dargs, epoch): - - _ms, mid = self.load_model_architecture(margs) - _weight, wid = self.find_one_params(wargs) - args = {"weight": wid, "model": mid, "dargs": dargs, "epoch": epoch, "time": datetime.utcnow(), "Running": False} - self.__autofill(args) - self.db.JOBS.insert_one(args) - - def peek_job(self): - args = {'Running': False} - self.__autofill(args) - m = self.db.JOBS.find_one(args) - print(m) - if m is None: - return False - - s = self.paramsfs.get(m['weight']).read() - w = self.__deserialization(s) - - ach = self.archfs.get(m['model']).read() - - return m['_id'], ach, w, m["dargs"], m['epoch'] - - def run_job(self, jid): - self.db.JOBS.find_one_and_update({'_id': jid}, {'$set': {'Running': True, "Since": datetime.utcnow()}}) - - def del_job(self, jid): - self.db.JOBS.find_one_and_update({'_id': jid}, {'$set': {'Running': True, "Finished": datetime.utcnow()}}) - - def __str__(self): - _s = "[TensorDB] Info:\n" - _t = _s + " " + str(self.db) - return _t diff --git a/tensorlayer/distributed.py b/tensorlayer/distributed.py deleted file mode 100644 index e9d0335..0000000 --- a/tensorlayer/distributed.py +++ /dev/null @@ -1,327 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import json, os, time -import tensorflow as tf -from tensorflow.python.training import session_run_hook - -__all__ = ['TaskSpecDef', 'TaskSpec', 'DistributedSession', 'StopAtTimeHook', 'LoadCheckpoint'] - - -class TaskSpecDef(object): - """Specification for a distributed task. - - It contains the job name, index of the task, - the parameter servers and the worker servers. If you want to use the last worker - for continuous evaluation you can call the method `use_last_worker_as_evaluator` - which returns a new :class:`TaskSpecDef` object without the last worker in the - cluster specification. - - Parameters - ---------- - task_type : str - Task type. One of `master`, `worker` or `ps`. - index : int - The zero-based index of the task. Distributed training jobs will have a single - master task, one or more parameter servers, and one or more workers. - trial : int - The identifier of the trial being run. - ps_hosts : str OR list of str - A string with a coma separate list of hosts for the parameter servers - or a list of hosts. - worker_hosts : str OR list of str - A string with a coma separate list of hosts for the worker servers - or a list of hosts. - master : str - A string with the master hosts - - Notes - ---------- - master might not be included in TF_CONFIG and can be None. The shard_index is adjusted - in any case to assign 0 to master and >= 1 to workers. - This implementation doesn't support sparse arrays in the `TF_CONFIG` variable as the - official TensorFlow documentation shows, as it is not a supported by the json - definition. - - References - ---------- - - `ML-engine trainer considerations `__ - - """ - - def __init__(self, task_type='master', index=0, trial=None, ps_hosts=None, worker_hosts=None, master=None): - self.type = task_type - self._index = int(index) - self._cluster_spec = None - self.num_workers = 1 - self.num_ps = 0 - self.shard_index = int(index) - self._master = True - self.trial = trial - self.ps_hosts = ps_hosts - self.worker_hosts = worker_hosts - self.master = master - self._server = None - - if ps_hosts and worker_hosts: - self.ps_hosts = ps_hosts if isinstance(ps_hosts, list) else ps_hosts.split(',') - self.num_ps = len(self.ps_hosts) - self.worker_hosts = worker_hosts if isinstance(worker_hosts, list) else worker_hosts.split(',') - if master is not None and len(master) > 0: - self._cluster_spec = tf.train.ClusterSpec({'ps': self.ps_hosts, 'worker': self.worker_hosts, 'master': master}) - # master is a worker too - self.num_workers = len(self.worker_hosts) + 1 - if self.type == 'worker': - self.shard_index = self._index + 1 - self._master = self.type == 'master' - else: - self._cluster_spec = tf.train.ClusterSpec({'ps': self.ps_hosts, 'worker': self.worker_hosts}) - self.num_workers = len(self.worker_hosts) - if self.type == 'worker': - self.shard_index = self._index - self._master = self.type == 'worker' and self._index == 0 - - def is_ps(self): - """Returns true if this server is a parameter server""" - return self.type == 'ps' - - def is_worker(self): - """Returns true if this server is a worker server""" - return self.type == 'worker' - - def is_master(self): - """Returns true if this server is the master server""" - return self._master - - def is_evaluator(self): - """Returns true if this server is the evaluator server""" - return self.type == 'worker' and self.num_workers == self._index - - def device_fn(self): - """Returns the function with the specification to create the graph in this server""" - current_device = '/job:{}/task:{}'.format(self.type, self._index) - ps_devices = '/job:ps' - return tf.train.replica_device_setter(ps_device=ps_devices, worker_device=current_device, cluster=self._cluster_spec) - - def create_server(self): - if self._server is None and self.ps_hosts and self.worker_hosts and not self.is_evaluator(): - # create server and join if it is a parameter server - self._server = tf.train.Server(self._cluster_spec, job_name=self.type, task_index=self._index) - if self.is_ps(): - self._server.join() - - def target(self): - if self._server is None: - self.create_server() - if self._server is not None: - return self._server.target - else: - return None - - def use_last_worker_as_evaluator(self): - """Returns a new :class:`TaskSpecDef` where the last worker has been removed from - the list of worker_hosts, so it is not used for training anymore. You can call - is_evaluator to know whether this server is the evaluator one or not. - In case there is only one server for training this method raises an exception, as - you cannot use any server for evaluation. - - """ - if self.num_workers <= 1: - raise Exception('You need more than one worker instance to use one as evaluator') - return TaskSpecDef( - task_type=self.type, index=self._index, trial=self.trial, ps_hosts=self.ps_hosts, worker_hosts=self.worker_hosts[:-1], master=self.master) - - -def create_task_spec_def(): - """Returns the a :class:`TaskSpecDef` based on the environment variables for distributed training. - - References - ---------- - - `ML-engine trainer considerations `__ - - `TensorPort Distributed Computing `__ - - """ - if 'TF_CONFIG' in os.environ: - # TF_CONFIG is used in ML-engine - env = json.loads(os.environ.get('TF_CONFIG', '{}')) - task_data = env.get('task', None) or {'type': 'master', 'index': 0} - cluster_data = env.get('cluster', None) or {'ps': None, 'worker': None, 'master': None} - return TaskSpecDef( - task_type=task_data['type'], - index=task_data['index'], - trial=task_data['trial'] if 'trial' in task_data else None, - ps_hosts=cluster_data['ps'], - worker_hosts=cluster_data['worker'], - master=cluster_data['master'] if 'master' in cluster_data else None) - elif 'JOB_NAME' in os.environ: - # JOB_NAME, TASK_INDEX, PS_HOSTS, WORKER_HOSTS and MASTER_HOST are used in TensorPort - return TaskSpecDef( - task_type=os.environ['JOB_NAME'], - index=os.environ['TASK_INDEX'], - ps_hosts=os.environ.get('PS_HOSTS', None), - worker_hosts=os.environ.get('WORKER_HOSTS', None), - master=os.environ.get('MASTER_HOST', None)) - else: - raise Exception('You need to setup TF_CONFIG or JOB_NAME to define the task.') - - -def create_distributed_session(task_spec=None, - checkpoint_dir=None, - scaffold=None, - hooks=None, - chief_only_hooks=None, - save_checkpoint_secs=600, - save_summaries_steps=object(), - save_summaries_secs=object(), - config=None, - stop_grace_period_secs=120, - log_step_count_steps=100): - """Creates a distributed session. - - It calls `MonitoredTrainingSession` to create a :class:`MonitoredSession` for distributed training. - - Parameters - ---------- - task_spec : :class:`TaskSpecDef`. - The task spec definition from create_task_spec_def() - checkpoint_dir : str. - Optional path to a directory where to restore variables. - scaffold : ``Scaffold`` - A `Scaffold` used for gathering or building supportive ops. - If not specified, a default one is created. It's used to finalize the graph. - hooks : list of ``SessionRunHook`` objects. - Optional - chief_only_hooks : list of ``SessionRunHook`` objects. - Activate these hooks if `is_chief==True`, ignore otherwise. - save_checkpoint_secs : int - The frequency, in seconds, that a checkpoint is saved - using a default checkpoint saver. If `save_checkpoint_secs` is set to - `None`, then the default checkpoint saver isn't used. - save_summaries_steps : int - The frequency, in number of global steps, that the - summaries are written to disk using a default summary saver. If both - `save_summaries_steps` and `save_summaries_secs` are set to `None`, then - the default summary saver isn't used. Default 100. - save_summaries_secs : int - The frequency, in secs, that the summaries are written - to disk using a default summary saver. If both `save_summaries_steps` and - `save_summaries_secs` are set to `None`, then the default summary saver - isn't used. Default not enabled. - config : ``tf.ConfigProto`` - an instance of `tf.ConfigProto` proto used to configure the session. - It's the `config` argument of constructor of `tf.Session`. - stop_grace_period_secs : int - Number of seconds given to threads to stop after - `close()` has been called. - log_step_count_steps : int - The frequency, in number of global steps, that the - global step/sec is logged. - - Examples - -------- - A simple example for distributed training where all the workers use the same dataset: - - >>> task_spec = TaskSpec() - >>> with tf.device(task_spec.device_fn()): - >>> tensors = create_graph() - >>> with tl.DistributedSession(task_spec=task_spec, - ... checkpoint_dir='/tmp/ckpt') as session: - >>> while not session.should_stop(): - >>> session.run(tensors) - - An example where the dataset is shared among the workers - (see https://www.tensorflow.org/programmers_guide/datasets): - - >>> task_spec = TaskSpec() - >>> # dataset is a :class:`tf.data.Dataset` with the raw data - >>> dataset = create_dataset() - >>> if task_spec is not None: - >>> dataset = dataset.shard(task_spec.num_workers, task_spec.shard_index) - >>> # shuffle or apply a map function to the new sharded dataset, for example: - >>> dataset = dataset.shuffle(buffer_size=10000) - >>> dataset = dataset.batch(batch_size) - >>> dataset = dataset.repeat(num_epochs) - >>> # create the iterator for the dataset and the input tensor - >>> iterator = dataset.make_one_shot_iterator() - >>> next_element = iterator.get_next() - >>> with tf.device(task_spec.device_fn()): - >>> # next_element is the input for the graph - >>> tensors = create_graph(next_element) - >>> with tl.DistributedSession(task_spec=task_spec, - ... checkpoint_dir='/tmp/ckpt') as session: - >>> while not session.should_stop(): - >>> session.run(tensors) - - References - ---------- - - `MonitoredTrainingSession `__ - - """ - target = task_spec.target() if task_spec is not None else None - is_chief = task_spec.is_master() if task_spec is not None else True - return tf.train.MonitoredTrainingSession( - master=target, - is_chief=is_chief, - checkpoint_dir=checkpoint_dir, - scaffold=scaffold, - save_checkpoint_secs=save_checkpoint_secs, - save_summaries_steps=save_summaries_steps, - save_summaries_secs=save_summaries_secs, - log_step_count_steps=log_step_count_steps, - stop_grace_period_secs=stop_grace_period_secs, - config=config, - hooks=hooks, - chief_only_hooks=chief_only_hooks) - - -class StopAtTimeHook(session_run_hook.SessionRunHook): - """Hook that requests stop after a specified time. - - Parameters - ---------- - time_running: int - Maximum time running in seconds - - """ - - def __init__(self, time_running): - self._time_running = time_running - self._end_time = 0 - - def begin(self): - self._end_time = time.time() + self._time_running - - def after_run(self, run_context, run_values): - if time.time() > self._end_time: - run_context.request_stop() - - -class LoadCheckpoint(session_run_hook.SessionRunHook): - """Hook that loads a checkpoint after the session is created. - - >>> from tensorflow.python.ops import variables as tf_variables - >>> from tensorflow.python.training.monitored_session import SingularMonitoredSession - >>> - >>> tensors = create_graph() - >>> saver = tf.train.Saver(var_list=tf_variables.trainable_variables()) - >>> checkpoint_hook = LoadCheckpoint(saver, my_checkpoint_file) - >>> with tf.SingularMonitoredSession(hooks=[checkpoint_hook]) as session: - >>> while not session.should_stop(): - >>> session.run(tensors) - - """ - - def __init__(self, saver, checkpoint): - self._saver = saver - self._checkpoint = checkpoint - self._loaded = False - - def after_create_session(self, session, coord): - if not self._loaded: - self._loaded = True - self._saver.restore(self._checkpoint) - - -# Alias -TaskSpec = create_task_spec_def -DistributedSession = create_distributed_session diff --git a/tensorlayer/files.py b/tensorlayer/files.py deleted file mode 100644 index 0f51400..0000000 --- a/tensorlayer/files.py +++ /dev/null @@ -1,1817 +0,0 @@ -# -*- coding: utf-8 -*- -""" -A collections of helper functions to work with dataset. - -Load benchmark dataset, save and restore model, save and load variables. -TensorFlow provides ``.ckpt`` file format to save and restore the models, while -we suggest to use standard python file format ``.npz`` to save models for the -sake of cross-platform. - -.. code-block:: python - - ## save model as .ckpt - saver = tf.train.Saver() - save_path = saver.save(sess, "model.ckpt") - # restore model from .ckpt - saver = tf.train.Saver() - saver.restore(sess, "model.ckpt") - - ## save model as .npz - tl.files.save_npz(network.all_params , name='model.npz') - # restore model from .npz (method 1) - load_params = tl.files.load_npz(name='model.npz') - tl.files.assign_params(sess, load_params, network) - # restore model from .npz (method 2) - tl.files.load_and_assign_npz(sess=sess, name='model.npz', network=network) - - ## you can assign the pre-trained parameters as follow - # 1st parameter - tl.files.assign_params(sess, [load_params[0]], network) - # the first three parameters - tl.files.assign_params(sess, load_params[:3], network) - -""" - -import gzip -import os -import pickle -import re -import sys -import tarfile -import zipfile - -import numpy as np -import tensorflow as tf -from six.moves import cPickle, zip -from tensorflow.python.platform import gfile - -from . import _logging as logging -from . import nlp, utils, visualize - -__all__ = [ - 'load_mnist_dataset', - 'load_fashion_mnist_dataset', - 'load_cifar10_dataset', - 'load_ptb_dataset', - 'load_matt_mahoney_text8_dataset', - 'load_imdb_dataset', - 'load_nietzsche_dataset', - 'load_wmt_en_fr_dataset', - 'load_flickr25k_dataset', - 'load_flickr1M_dataset', - 'load_cyclegan_dataset', - 'download_file_from_google_drive', - 'load_celebA_dataset', - 'load_voc_dataset', - 'save_npz', - 'load_npz', - 'assign_params', - 'load_and_assign_npz', - 'save_npz_dict', - 'load_and_assign_npz_dict', - 'save_ckpt', - 'load_ckpt', - 'save_any_to_npy', - 'load_npy_to_any', - 'file_exists', - 'folder_exists', - 'del_file', - 'del_folder', - 'read_file', - 'load_file_list', - 'load_folder_list', - 'exists_or_mkdir', - 'maybe_download_and_extract', - 'natural_keys', - 'npz_to_W_pdf', -] - - -## Load dataset functions -def load_mnist_dataset(shape=(-1, 784), path='data'): - """Load the original mnist. - - Automatically download MNIST dataset and return the training, validation and test set with 50000, 10000 and 10000 digit images respectively. - - Parameters - ---------- - shape : tuple - The shape of digit images (the default is (-1, 784), alternatively (-1, 28, 28, 1)). - path : str - The path that the data is downloaded to. - - Returns - ------- - X_train, y_train, X_val, y_val, X_test, y_test: tuple - Return splitted training/validation/test set respectively. - - Examples - -------- - >>> X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_mnist_dataset(shape=(-1,784), path='datasets') - >>> X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_mnist_dataset(shape=(-1, 28, 28, 1)) - """ - return _load_mnist_dataset(shape, path, name='mnist', url='http://yann.lecun.com/exdb/mnist/') - - -def load_fashion_mnist_dataset(shape=(-1, 784), path='data'): - """Load the fashion mnist. - - Automatically download fashion-MNIST dataset and return the training, validation and test set with 50000, 10000 and 10000 fashion images respectively, `examples `__. - - Parameters - ---------- - shape : tuple - The shape of digit images (the default is (-1, 784), alternatively (-1, 28, 28, 1)). - path : str - The path that the data is downloaded to. - - Returns - ------- - X_train, y_train, X_val, y_val, X_test, y_test: tuple - Return splitted training/validation/test set respectively. - - Examples - -------- - >>> X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_fashion_mnist_dataset(shape=(-1,784), path='datasets') - >>> X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_fashion_mnist_dataset(shape=(-1, 28, 28, 1)) - """ - return _load_mnist_dataset(shape, path, name='fashion_mnist', url='http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/') - - -def _load_mnist_dataset(shape, path, name='mnist', url='http://yann.lecun.com/exdb/mnist/'): - """A generic function to load mnist-like dataset. - - Parameters: - ---------- - shape : tuple - The shape of digit images. - path : str - The path that the data is downloaded to. - name : str - The dataset name you want to use(the default is 'mnist'). - url : str - The url of dataset(the default is 'http://yann.lecun.com/exdb/mnist/'). - """ - path = os.path.join(path, name) - - # Define functions for loading mnist-like data's images and labels. - # For convenience, they also download the requested files if needed. - def load_mnist_images(path, filename): - filepath = maybe_download_and_extract(filename, path, url) - - logging.info(filepath) - # Read the inputs in Yann LeCun's binary format. - with gzip.open(filepath, 'rb') as f: - data = np.frombuffer(f.read(), np.uint8, offset=16) - # The inputs are vectors now, we reshape them to monochrome 2D images, - # following the shape convention: (examples, channels, rows, columns) - data = data.reshape(shape) - # The inputs come as bytes, we convert them to float32 in range [0,1]. - # (Actually to range [0, 255/256], for compatibility to the version - # provided at http://deeplearning.net/data/mnist/mnist.pkl.gz.) - return data / np.float32(256) - - def load_mnist_labels(path, filename): - filepath = maybe_download_and_extract(filename, path, url) - # Read the labels in Yann LeCun's binary format. - with gzip.open(filepath, 'rb') as f: - data = np.frombuffer(f.read(), np.uint8, offset=8) - # The labels are vectors of integers now, that's exactly what we want. - return data - - # Download and read the training and test set images and labels. - logging.info("Load or Download {0} > {1}".format(name.upper(), path)) - X_train = load_mnist_images(path, 'train-images-idx3-ubyte.gz') - y_train = load_mnist_labels(path, 'train-labels-idx1-ubyte.gz') - X_test = load_mnist_images(path, 't10k-images-idx3-ubyte.gz') - y_test = load_mnist_labels(path, 't10k-labels-idx1-ubyte.gz') - - # We reserve the last 10000 training examples for validation. - X_train, X_val = X_train[:-10000], X_train[-10000:] - y_train, y_val = y_train[:-10000], y_train[-10000:] - - # We just return all the arrays in order, as expected in main(). - # (It doesn't matter how we do this as long as we can read them again.) - X_train = np.asarray(X_train, dtype=np.float32) - y_train = np.asarray(y_train, dtype=np.int32) - X_val = np.asarray(X_val, dtype=np.float32) - y_val = np.asarray(y_val, dtype=np.int32) - X_test = np.asarray(X_test, dtype=np.float32) - y_test = np.asarray(y_test, dtype=np.int32) - return X_train, y_train, X_val, y_val, X_test, y_test - - -def load_cifar10_dataset(shape=(-1, 32, 32, 3), path='data', plotable=False): - """Load CIFAR-10 dataset. - - It consists of 60000 32x32 colour images in 10 classes, with - 6000 images per class. There are 50000 training images and 10000 test images. - - The dataset is divided into five training batches and one test batch, each with - 10000 images. The test batch contains exactly 1000 randomly-selected images from - each class. The training batches contain the remaining images in random order, - but some training batches may contain more images from one class than another. - Between them, the training batches contain exactly 5000 images from each class. - - Parameters - ---------- - shape : tupe - The shape of digit images e.g. (-1, 3, 32, 32) and (-1, 32, 32, 3). - path : str - The path that the data is downloaded to, defaults is ``data/cifar10/``. - plotable : boolean - Whether to plot some image examples, False as default. - - Examples - -------- - >>> X_train, y_train, X_test, y_test = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3)) - - References - ---------- - - `CIFAR website `__ - - `Data download link `__ - - ``__ - - """ - path = os.path.join(path, 'cifar10') - logging.info("Load or Download cifar10 > {}".format(path)) - - #Helper function to unpickle the data - def unpickle(file): - fp = open(file, 'rb') - if sys.version_info.major == 2: - data = pickle.load(fp) - elif sys.version_info.major == 3: - data = pickle.load(fp, encoding='latin-1') - fp.close() - return data - - filename = 'cifar-10-python.tar.gz' - url = 'https://www.cs.toronto.edu/~kriz/' - #Download and uncompress file - maybe_download_and_extract(filename, path, url, extract=True) - - #Unpickle file and fill in data - X_train = None - y_train = [] - for i in range(1, 6): - data_dic = unpickle(os.path.join(path, 'cifar-10-batches-py/', "data_batch_{}".format(i))) - if i == 1: - X_train = data_dic['data'] - else: - X_train = np.vstack((X_train, data_dic['data'])) - y_train += data_dic['labels'] - - test_data_dic = unpickle(os.path.join(path, 'cifar-10-batches-py/', "test_batch")) - X_test = test_data_dic['data'] - y_test = np.array(test_data_dic['labels']) - - if shape == (-1, 3, 32, 32): - X_test = X_test.reshape(shape) - X_train = X_train.reshape(shape) - elif shape == (-1, 32, 32, 3): - X_test = X_test.reshape(shape, order='F') - X_train = X_train.reshape(shape, order='F') - X_test = np.transpose(X_test, (0, 2, 1, 3)) - X_train = np.transpose(X_train, (0, 2, 1, 3)) - else: - X_test = X_test.reshape(shape) - X_train = X_train.reshape(shape) - - y_train = np.array(y_train) - - if plotable: - logging.info('\nCIFAR-10') - import matplotlib.pyplot as plt - fig = plt.figure(1) - - logging.info('Shape of a training image: X_train[0] %s' % X_train[0].shape) - - plt.ion() # interactive mode - count = 1 - for _ in range(10): # each row - for _ in range(10): # each column - _ = fig.add_subplot(10, 10, count) - if shape == (-1, 3, 32, 32): - # plt.imshow(X_train[count-1], interpolation='nearest') - plt.imshow(np.transpose(X_train[count - 1], (1, 2, 0)), interpolation='nearest') - # plt.imshow(np.transpose(X_train[count-1], (2, 1, 0)), interpolation='nearest') - elif shape == (-1, 32, 32, 3): - plt.imshow(X_train[count - 1], interpolation='nearest') - # plt.imshow(np.transpose(X_train[count-1], (1, 0, 2)), interpolation='nearest') - else: - raise Exception("Do not support the given 'shape' to plot the image examples") - plt.gca().xaxis.set_major_locator(plt.NullLocator()) # 不显示刻度(tick) - plt.gca().yaxis.set_major_locator(plt.NullLocator()) - count = count + 1 - plt.draw() # interactive mode - plt.pause(3) # interactive mode - - logging.info("X_train: %s" % X_train.shape) - logging.info("y_train: %s" % y_train.shape) - logging.info("X_test: %s" % X_test.shape) - logging.info("y_test: %s" % y_test.shape) - - X_train = np.asarray(X_train, dtype=np.float32) - X_test = np.asarray(X_test, dtype=np.float32) - y_train = np.asarray(y_train, dtype=np.int32) - y_test = np.asarray(y_test, dtype=np.int32) - - return X_train, y_train, X_test, y_test - - -def load_ptb_dataset(path='data'): - """Load Penn TreeBank (PTB) dataset. - - It is used in many LANGUAGE MODELING papers, - including "Empirical Evaluation and Combination of Advanced Language - Modeling Techniques", "Recurrent Neural Network Regularization". - It consists of 929k training words, 73k validation words, and 82k test - words. It has 10k words in its vocabulary. - - Parameters - ---------- - path : str - The path that the data is downloaded to, defaults is ``data/ptb/``. - - Returns - -------- - train_data, valid_data, test_data : list of int - The training, validating and testing data in integer format. - vocab_size : int - The vocabulary size. - - Examples - -------- - >>> train_data, valid_data, test_data, vocab_size = tl.files.load_ptb_dataset() - - References - --------------- - - ``tensorflow.models.rnn.ptb import reader`` - - `Manual download `__ - - Notes - ------ - - If you want to get the raw data, see the source code. - - """ - path = os.path.join(path, 'ptb') - logging.info("Load or Download Penn TreeBank (PTB) dataset > {}".format(path)) - - #Maybe dowload and uncompress tar, or load exsisting files - filename = 'simple-examples.tgz' - url = 'http://www.fit.vutbr.cz/~imikolov/rnnlm/' - maybe_download_and_extract(filename, path, url, extract=True) - - data_path = os.path.join(path, 'simple-examples', 'data') - train_path = os.path.join(data_path, "ptb.train.txt") - valid_path = os.path.join(data_path, "ptb.valid.txt") - test_path = os.path.join(data_path, "ptb.test.txt") - - word_to_id = nlp.build_vocab(nlp.read_words(train_path)) - - train_data = nlp.words_to_word_ids(nlp.read_words(train_path), word_to_id) - valid_data = nlp.words_to_word_ids(nlp.read_words(valid_path), word_to_id) - test_data = nlp.words_to_word_ids(nlp.read_words(test_path), word_to_id) - vocab_size = len(word_to_id) - - # logging.info(nlp.read_words(train_path)) # ... 'according', 'to', 'mr.', '', ''] - # logging.info(train_data) # ... 214, 5, 23, 1, 2] - # logging.info(word_to_id) # ... 'beyond': 1295, 'anti-nuclear': 9599, 'trouble': 1520, '': 2 ... } - # logging.info(vocabulary) # 10000 - # exit() - return train_data, valid_data, test_data, vocab_size - - -def load_matt_mahoney_text8_dataset(path='data'): - """Load Matt Mahoney's dataset. - - Download a text file from Matt Mahoney's website - if not present, and make sure it's the right size. - Extract the first file enclosed in a zip file as a list of words. - This dataset can be used for Word Embedding. - - Parameters - ---------- - path : str - The path that the data is downloaded to, defaults is ``data/mm_test8/``. - - Returns - -------- - list of str - The raw text data e.g. [.... 'their', 'families', 'who', 'were', 'expelled', 'from', 'jerusalem', ...] - - Examples - -------- - >>> words = tl.files.load_matt_mahoney_text8_dataset() - >>> print('Data size', len(words)) - - """ - path = os.path.join(path, 'mm_test8') - logging.info("Load or Download matt_mahoney_text8 Dataset> {}".format(path)) - - filename = 'text8.zip' - url = 'http://mattmahoney.net/dc/' - maybe_download_and_extract(filename, path, url, expected_bytes=31344016) - - with zipfile.ZipFile(os.path.join(path, filename)) as f: - word_list = f.read(f.namelist()[0]).split() - for idx, _ in enumerate(word_list): - word_list[idx] = word_list[idx].decode() - return word_list - - -def load_imdb_dataset(path='data', nb_words=None, skip_top=0, maxlen=None, test_split=0.2, seed=113, start_char=1, oov_char=2, index_from=3): - """Load IMDB dataset. - - Parameters - ---------- - path : str - The path that the data is downloaded to, defaults is ``data/imdb/``. - nb_words : int - Number of words to get. - skip_top : int - Top most frequent words to ignore (they will appear as oov_char value in the sequence data). - maxlen : int - Maximum sequence length. Any longer sequence will be truncated. - seed : int - Seed for reproducible data shuffling. - start_char : int - The start of a sequence will be marked with this character. Set to 1 because 0 is usually the padding character. - oov_char : int - Words that were cut out because of the num_words or skip_top limit will be replaced with this character. - index_from : int - Index actual words with this index and higher. - - Examples - -------- - >>> X_train, y_train, X_test, y_test = tl.files.load_imdb_dataset( - ... nb_words=20000, test_split=0.2) - >>> print('X_train.shape', X_train.shape) - ... (20000,) [[1, 62, 74, ... 1033, 507, 27],[1, 60, 33, ... 13, 1053, 7]..] - >>> print('y_train.shape', y_train.shape) - ... (20000,) [1 0 0 ..., 1 0 1] - - References - ----------- - - `Modified from keras. `__ - - """ - path = os.path.join(path, 'imdb') - - filename = "imdb.pkl" - url = 'https://s3.amazonaws.com/text-datasets/' - maybe_download_and_extract(filename, path, url) - - if filename.endswith(".gz"): - f = gzip.open(os.path.join(path, filename), 'rb') - else: - f = open(os.path.join(path, filename), 'rb') - - X, labels = cPickle.load(f) - f.close() - - np.random.seed(seed) - np.random.shuffle(X) - np.random.seed(seed) - np.random.shuffle(labels) - - if start_char is not None: - X = [[start_char] + [w + index_from for w in x] for x in X] - elif index_from: - X = [[w + index_from for w in x] for x in X] - - if maxlen: - new_X = [] - new_labels = [] - for x, y in zip(X, labels): - if len(x) < maxlen: - new_X.append(x) - new_labels.append(y) - X = new_X - labels = new_labels - if not X: - raise Exception('After filtering for sequences shorter than maxlen=' + str(maxlen) + ', no sequence was kept. ' 'Increase maxlen.') - if not nb_words: - nb_words = max([max(x) for x in X]) - - # by convention, use 2 as OOV word - # reserve 'index_from' (=3 by default) characters: 0 (padding), 1 (start), 2 (OOV) - if oov_char is not None: - X = [[oov_char if (w >= nb_words or w < skip_top) else w for w in x] for x in X] - else: - nX = [] - for x in X: - nx = [] - for w in x: - if (w >= nb_words or w < skip_top): - nx.append(w) - nX.append(nx) - X = nX - - X_train = np.array(X[:int(len(X) * (1 - test_split))]) - y_train = np.array(labels[:int(len(X) * (1 - test_split))]) - - X_test = np.array(X[int(len(X) * (1 - test_split)):]) - y_test = np.array(labels[int(len(X) * (1 - test_split)):]) - - return X_train, y_train, X_test, y_test - - -def load_nietzsche_dataset(path='data'): - """Load Nietzsche dataset. - - Parameters - ---------- - path : str - The path that the data is downloaded to, defaults is ``data/nietzsche/``. - - Returns - -------- - str - The content. - - Examples - -------- - >>> see tutorial_generate_text.py - >>> words = tl.files.load_nietzsche_dataset() - >>> words = basic_clean_str(words) - >>> words = words.split() - - """ - logging.info("Load or Download nietzsche dataset > {}".format(path)) - path = os.path.join(path, 'nietzsche') - - filename = "nietzsche.txt" - url = 'https://s3.amazonaws.com/text-datasets/' - filepath = maybe_download_and_extract(filename, path, url) - - with open(filepath, "r") as f: - words = f.read() - return words - - -def load_wmt_en_fr_dataset(path='data'): - """Load WMT'15 English-to-French translation dataset. - - It will download the data from the WMT'15 Website (10^9-French-English corpus), and the 2013 news test from the same site as development set. - Returns the directories of training data and test data. - - Parameters - ---------- - path : str - The path that the data is downloaded to, defaults is ``data/wmt_en_fr/``. - - References - ---------- - - Code modified from /tensorflow/models/rnn/translation/data_utils.py - - Notes - ----- - Usually, it will take a long time to download this dataset. - - """ - path = os.path.join(path, 'wmt_en_fr') - # URLs for WMT data. - _WMT_ENFR_TRAIN_URL = "http://www.statmt.org/wmt10/" - _WMT_ENFR_DEV_URL = "http://www.statmt.org/wmt15/" - - def gunzip_file(gz_path, new_path): - """Unzips from gz_path into new_path.""" - logging.info("Unpacking %s to %s" % (gz_path, new_path)) - with gzip.open(gz_path, "rb") as gz_file: - with open(new_path, "wb") as new_file: - for line in gz_file: - new_file.write(line) - - def get_wmt_enfr_train_set(path): - """Download the WMT en-fr training corpus to directory unless it's there.""" - filename = "training-giga-fren.tar" - maybe_download_and_extract(filename, path, _WMT_ENFR_TRAIN_URL, extract=True) - train_path = os.path.join(path, "giga-fren.release2.fixed") - gunzip_file(train_path + ".fr.gz", train_path + ".fr") - gunzip_file(train_path + ".en.gz", train_path + ".en") - return train_path - - def get_wmt_enfr_dev_set(path): - """Download the WMT en-fr training corpus to directory unless it's there.""" - filename = "dev-v2.tgz" - dev_file = maybe_download_and_extract(filename, path, _WMT_ENFR_DEV_URL, extract=False) - dev_name = "newstest2013" - dev_path = os.path.join(path, "newstest2013") - if not (gfile.Exists(dev_path + ".fr") and gfile.Exists(dev_path + ".en")): - logging.info("Extracting tgz file %s" % dev_file) - with tarfile.open(dev_file, "r:gz") as dev_tar: - fr_dev_file = dev_tar.getmember("dev/" + dev_name + ".fr") - en_dev_file = dev_tar.getmember("dev/" + dev_name + ".en") - fr_dev_file.name = dev_name + ".fr" # Extract without "dev/" prefix. - en_dev_file.name = dev_name + ".en" - dev_tar.extract(fr_dev_file, path) - dev_tar.extract(en_dev_file, path) - return dev_path - - logging.info("Load or Download WMT English-to-French translation > {}".format(path)) - - train_path = get_wmt_enfr_train_set(path) - dev_path = get_wmt_enfr_dev_set(path) - - return train_path, dev_path - - -def load_flickr25k_dataset(tag='sky', path="data", n_threads=50, printable=False): - """Load Flickr25K dataset. - - Returns a list of images by a given tag from Flick25k dataset, - it will download Flickr25k from `the official website `__ - at the first time you use it. - - Parameters - ------------ - tag : str or None - What images to return. - - If you want to get images with tag, use string like 'dog', 'red', see `Flickr Search `__. - - If you want to get all images, set to ``None``. - - path : str - The path that the data is downloaded to, defaults is ``data/flickr25k/``. - n_threads : int - The number of thread to read image. - printable : boolean - Whether to print infomation when reading images, default is ``False``. - - Examples - ----------- - Get images with tag of sky - - >>> images = tl.files.load_flickr25k_dataset(tag='sky') - - Get all images - - >>> images = tl.files.load_flickr25k_dataset(tag=None, n_threads=100, printable=True) - - """ - path = os.path.join(path, 'flickr25k') - - filename = 'mirflickr25k.zip' - url = 'http://press.liacs.nl/mirflickr/mirflickr25k/' - - # download dataset - if folder_exists(path + "/mirflickr") is False: - logging.info("[*] Flickr25k is nonexistent in {}".format(path)) - maybe_download_and_extract(filename, path, url, extract=True) - del_file(path + '/' + filename) - - # return images by the given tag. - # 1. image path list - folder_imgs = path + "/mirflickr" - path_imgs = load_file_list(path=folder_imgs, regx='\\.jpg', printable=False) - path_imgs.sort(key=natural_keys) - - # 2. tag path list - folder_tags = path + "/mirflickr/meta/tags" - path_tags = load_file_list(path=folder_tags, regx='\\.txt', printable=False) - path_tags.sort(key=natural_keys) - - # 3. select images - if tag is None: - logging.info("[Flickr25k] reading all images") - else: - logging.info("[Flickr25k] reading images with tag: {}".format(tag)) - images_list = [] - for idx, _v in enumerate(path_tags): - tags = read_file(folder_tags + '/' + path_tags[idx]).split('\n') - # logging.info(idx+1, tags) - if tag is None or tag in tags: - images_list.append(path_imgs[idx]) - - images = visualize.read_images(images_list, folder_imgs, n_threads=n_threads, printable=printable) - return images - - -def load_flickr1M_dataset(tag='sky', size=10, path="data", n_threads=50, printable=False): - """Load Flick1M dataset. - - Returns a list of images by a given tag from Flickr1M dataset, - it will download Flickr1M from `the official website `__ - at the first time you use it. - - Parameters - ------------ - tag : str or None - What images to return. - - If you want to get images with tag, use string like 'dog', 'red', see `Flickr Search `__. - - If you want to get all images, set to ``None``. - - size : int - integer between 1 to 10. 1 means 100k images ... 5 means 500k images, 10 means all 1 million images. Default is 10. - path : str - The path that the data is downloaded to, defaults is ``data/flickr25k/``. - n_threads : int - The number of thread to read image. - printable : boolean - Whether to print infomation when reading images, default is ``False``. - - Examples - ---------- - Use 200k images - - >>> images = tl.files.load_flickr1M_dataset(tag='zebra', size=2) - - Use 1 Million images - - >>> images = tl.files.load_flickr1M_dataset(tag='zebra') - - """ - path = os.path.join(path, 'flickr1M') - logging.info("[Flickr1M] using {}% of images = {}".format(size * 10, size * 100000)) - images_zip = [ - 'images0.zip', 'images1.zip', 'images2.zip', 'images3.zip', 'images4.zip', 'images5.zip', 'images6.zip', 'images7.zip', 'images8.zip', 'images9.zip' - ] - tag_zip = 'tags.zip' - url = 'http://press.liacs.nl/mirflickr/mirflickr1m/' - - # download dataset - for image_zip in images_zip[0:size]: - image_folder = image_zip.split(".")[0] - # logging.info(path+"/"+image_folder) - if folder_exists(path + "/" + image_folder) is False: - # logging.info(image_zip) - logging.info("[Flickr1M] {} is missing in {}".format(image_folder, path)) - maybe_download_and_extract(image_zip, path, url, extract=True) - del_file(path + '/' + image_zip) - os.system("mv {} {}".format(path + '/images', path + '/' + image_folder)) - else: - logging.info("[Flickr1M] {} exists in {}".format(image_folder, path)) - - # download tag - if folder_exists(path + "/tags") is False: - logging.info("[Flickr1M] tag files is nonexistent in {}".format(path)) - maybe_download_and_extract(tag_zip, path, url, extract=True) - del_file(path + '/' + tag_zip) - else: - logging.info("[Flickr1M] tags exists in {}".format(path)) - - # 1. image path list - images_list = [] - images_folder_list = [] - for i in range(0, size): - images_folder_list += load_folder_list(path=os.path.join(path, 'images%d' % i)) - images_folder_list.sort(key=lambda s: int(s.split('/')[-1])) # folder/images/ddd - - for folder in images_folder_list[0:size * 10]: - tmp = load_file_list(path=folder, regx='\\.jpg', printable=False) - tmp.sort(key=lambda s: int(s.split('.')[-2])) # ddd.jpg - images_list.extend([folder + '/' + x for x in tmp]) - - # 2. tag path list - tag_list = [] - tag_folder_list = load_folder_list(path + "/tags") - tag_folder_list.sort(key=lambda s: int(s.split('/')[-1])) # folder/images/ddd - - for folder in tag_folder_list[0:size * 10]: - tmp = load_file_list(path=folder, regx='\\.txt', printable=False) - tmp.sort(key=lambda s: int(s.split('.')[-2])) # ddd.txt - tmp = [folder + '/' + s for s in tmp] - tag_list += tmp - - # 3. select images - logging.info("[Flickr1M] searching tag: {}".format(tag)) - select_images_list = [] - for idx, _val in enumerate(tag_list): - tags = read_file(tag_list[idx]).split('\n') - if tag in tags: - select_images_list.append(images_list[idx]) - - logging.info("[Flickr1M] reading images with tag: {}".format(tag)) - images = visualize.read_images(select_images_list, '', n_threads=n_threads, printable=printable) - return images - - -def load_cyclegan_dataset(filename='summer2winter_yosemite', path='data'): - """Load images from CycleGAN's database, see `this link `__. - - Parameters - ------------ - filename : str - The dataset you want, see `this link `__. - path : str - The path that the data is downloaded to, defaults is `data/cyclegan` - - Examples - --------- - >>> im_train_A, im_train_B, im_test_A, im_test_B = load_cyclegan_dataset(filename='summer2winter_yosemite') - - """ - path = os.path.join(path, 'cyclegan') - url = 'https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets/' - - if folder_exists(os.path.join(path, filename)) is False: - logging.info("[*] {} is nonexistent in {}".format(filename, path)) - maybe_download_and_extract(filename + '.zip', path, url, extract=True) - del_file(os.path.join(path, filename + '.zip')) - - def load_image_from_folder(path): - path_imgs = load_file_list(path=path, regx='\\.jpg', printable=False) - return visualize.read_images(path_imgs, path=path, n_threads=10, printable=False) - - im_train_A = load_image_from_folder(os.path.join(path, filename, "trainA")) - im_train_B = load_image_from_folder(os.path.join(path, filename, "trainB")) - im_test_A = load_image_from_folder(os.path.join(path, filename, "testA")) - im_test_B = load_image_from_folder(os.path.join(path, filename, "testB")) - - def if_2d_to_3d(images): # [h, w] --> [h, w, 3] - for i, _v in enumerate(images): - if len(images[i].shape) == 2: - images[i] = images[i][:, :, np.newaxis] - images[i] = np.tile(images[i], (1, 1, 3)) - return images - - im_train_A = if_2d_to_3d(im_train_A) - im_train_B = if_2d_to_3d(im_train_B) - im_test_A = if_2d_to_3d(im_test_A) - im_test_B = if_2d_to_3d(im_test_B) - - return im_train_A, im_train_B, im_test_A, im_test_B - - -def download_file_from_google_drive(ID, destination): - """Download file from Google Drive. - - See ``tl.files.load_celebA_dataset`` for example. - - Parameters - -------------- - ID : str - The driver ID. - destination : str - The destination for save file. - - """ - from tqdm import tqdm - import requests - - def save_response_content(response, destination, chunk_size=32 * 1024): - total_size = int(response.headers.get('content-length', 0)) - with open(destination, "wb") as f: - for chunk in tqdm(response.iter_content(chunk_size), total=total_size, unit='B', unit_scale=True, desc=destination): - if chunk: # filter out keep-alive new chunks - f.write(chunk) - - def get_confirm_token(response): - for key, value in response.cookies.items(): - if key.startswith('download_warning'): - return value - return None - - URL = "https://docs.google.com/uc?export=download" - session = requests.Session() - - response = session.get(URL, params={'id': ID}, stream=True) - token = get_confirm_token(response) - - if token: - params = {'id': ID, 'confirm': token} - response = session.get(URL, params=params, stream=True) - save_response_content(response, destination) - - -def load_celebA_dataset(path='data'): - """Load CelebA dataset - - Return a list of image path. - - Parameters - ----------- - path : str - The path that the data is downloaded to, defaults is ``data/celebA/``. - - """ - data_dir = 'celebA' - filename, drive_id = "img_align_celeba.zip", "0B7EVK8r0v71pZjFTYXZWM3FlRnM" - save_path = os.path.join(path, filename) - image_path = os.path.join(path, data_dir) - if os.path.exists(image_path): - logging.info('[*] {} already exists'.format(save_path)) - else: - exists_or_mkdir(path) - download_file_from_google_drive(drive_id, save_path) - zip_dir = '' - with zipfile.ZipFile(save_path) as zf: - zip_dir = zf.namelist()[0] - zf.extractall(path) - os.remove(save_path) - os.rename(os.path.join(path, zip_dir), image_path) - - data_files = load_file_list(path=image_path, regx='\\.jpg', printable=False) - for i, _v in enumerate(data_files): - data_files[i] = os.path.join(image_path, data_files[i]) - return data_files - - -def load_voc_dataset(path='data', dataset='2012', contain_classes_in_person=False): - """Pascal VOC 2007/2012 Dataset. - - It has 20 objects: - aeroplane, bicycle, bird, boat, bottle, bus, car, cat, chair, cow, diningtable, dog, horse, motorbike, person, pottedplant, sheep, sofa, train, tvmonitor - and additional 3 classes : head, hand, foot for person. - - Parameters - ----------- - path : str - The path that the data is downloaded to, defaults is ``data/VOC``. - dataset : str - The VOC dataset version, `2012`, `2007`, `2007test` or `2012test`. We usually train model on `2007+2012` and test it on `2007test`. - contain_classes_in_person : boolean - Whether include head, hand and foot annotation, default is False. - - Returns - --------- - imgs_file_list : list of str - Full paths of all images. - imgs_semseg_file_list : list of str - Full paths of all maps for semantic segmentation. Note that not all images have this map! - imgs_insseg_file_list : list of str - Full paths of all maps for instance segmentation. Note that not all images have this map! - imgs_ann_file_list : list of str - Full paths of all annotations for bounding box and object class, all images have this annotations. - classes : list of str - Classes in order. - classes_in_person : list of str - Classes in person. - classes_dict : dictionary - Class label to integer. - n_objs_list : list of int - Number of objects in all images in ``imgs_file_list`` in order. - objs_info_list : list of str - Darknet format for the annotation of all images in ``imgs_file_list`` in order. ``[class_id x_centre y_centre width height]`` in ratio format. - objs_info_dicts : dictionary - The annotation of all images in ``imgs_file_list``, ``{imgs_file_list : dictionary for annotation}``, - format from `TensorFlow/Models/object-detection `__. - - Examples - ---------- - >>> imgs_file_list, imgs_semseg_file_list, imgs_insseg_file_list, imgs_ann_file_list, - >>> classes, classes_in_person, classes_dict, - >>> n_objs_list, objs_info_list, objs_info_dicts = tl.files.load_voc_dataset(dataset="2012", contain_classes_in_person=False) - >>> idx = 26 - >>> print(classes) - ... ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] - >>> print(classes_dict) - ... {'sheep': 16, 'horse': 12, 'bicycle': 1, 'bottle': 4, 'cow': 9, 'sofa': 17, 'car': 6, 'dog': 11, 'cat': 7, 'person': 14, 'train': 18, 'diningtable': 10, 'aeroplane': 0, 'bus': 5, 'pottedplant': 15, 'tvmonitor': 19, 'chair': 8, 'bird': 2, 'boat': 3, 'motorbike': 13} - >>> print(imgs_file_list[idx]) - ... data/VOC/VOC2012/JPEGImages/2007_000423.jpg - >>> print(n_objs_list[idx]) - ... 2 - >>> print(imgs_ann_file_list[idx]) - ... data/VOC/VOC2012/Annotations/2007_000423.xml - >>> print(objs_info_list[idx]) - ... 14 0.173 0.461333333333 0.142 0.496 - ... 14 0.828 0.542666666667 0.188 0.594666666667 - >>> ann = tl.prepro.parse_darknet_ann_str_to_list(objs_info_list[idx]) - >>> print(ann) - ... [[14, 0.173, 0.461333333333, 0.142, 0.496], [14, 0.828, 0.542666666667, 0.188, 0.594666666667]] - >>> c, b = tl.prepro.parse_darknet_ann_list_to_cls_box(ann) - >>> print(c, b) - ... [14, 14] [[0.173, 0.461333333333, 0.142, 0.496], [0.828, 0.542666666667, 0.188, 0.594666666667]] - - References - ------------- - - `Pascal VOC2012 Website `__. - - `Pascal VOC2007 Website `__. - - """ - path = os.path.join(path, 'VOC') - - def _recursive_parse_xml_to_dict(xml): - """Recursively parses XML contents to python dict. - - We assume that `object` tags are the only ones that can appear - multiple times at the same level of a tree. - - Args: - xml: xml tree obtained by parsing XML file contents using lxml.etree - - Returns: - Python dictionary holding XML contents. - - """ - if not xml: - # if xml is not None: - return {xml.tag: xml.text} - result = {} - for child in xml: - child_result = _recursive_parse_xml_to_dict(child) - if child.tag != 'object': - result[child.tag] = child_result[child.tag] - else: - if child.tag not in result: - result[child.tag] = [] - result[child.tag].append(child_result[child.tag]) - return {xml.tag: result} - - from lxml import etree # pip install lxml - import xml.etree.ElementTree as ET - - if dataset == "2012": - url = "http://host.robots.ox.ac.uk/pascal/VOC/voc2012/" - tar_filename = "VOCtrainval_11-May-2012.tar" - extracted_filename = "VOC2012" #"VOCdevkit/VOC2012" - logging.info(" [============= VOC 2012 =============]") - elif dataset == "2012test": - extracted_filename = "VOC2012test" #"VOCdevkit/VOC2012" - logging.info(" [============= VOC 2012 Test Set =============]") - logging.info(" \nAuthor: 2012test only have person annotation, so 2007test is highly recommended for testing !\n") - import time - time.sleep(3) - if os.path.isdir(os.path.join(path, extracted_filename)) is False: - logging.info("For VOC 2012 Test data - online registration required") - logging.info( - " Please download VOC2012test.tar from: \n register: http://host.robots.ox.ac.uk:8080 \n voc2012 : http://host.robots.ox.ac.uk:8080/eval/challenges/voc2012/ \ndownload: http://host.robots.ox.ac.uk:8080/eval/downloads/VOC2012test.tar" - ) - logging.info(" unzip VOC2012test.tar,rename the folder to VOC2012test and put it into %s" % path) - exit() - # # http://host.robots.ox.ac.uk:8080/eval/downloads/VOC2012test.tar - # url = "http://host.robots.ox.ac.uk:8080/eval/downloads/" - # tar_filename = "VOC2012test.tar" - elif dataset == "2007": - url = "http://host.robots.ox.ac.uk/pascal/VOC/voc2007/" - tar_filename = "VOCtrainval_06-Nov-2007.tar" - extracted_filename = "VOC2007" - logging.info(" [============= VOC 2007 =============]") - elif dataset == "2007test": - # http://host.robots.ox.ac.uk/pascal/VOC/voc2007/index.html#testdata - # http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar - url = "http://host.robots.ox.ac.uk/pascal/VOC/voc2007/" - tar_filename = "VOCtest_06-Nov-2007.tar" - extracted_filename = "VOC2007test" - logging.info(" [============= VOC 2007 Test Set =============]") - else: - raise Exception("Please set the dataset aug to 2012, 2012test or 2007.") - - # download dataset - if dataset != "2012test": - from sys import platform as _platform - if folder_exists(os.path.join(path, extracted_filename)) is False: - logging.info("[VOC] {} is nonexistent in {}".format(extracted_filename, path)) - maybe_download_and_extract(tar_filename, path, url, extract=True) - del_file(os.path.join(path, tar_filename)) - if dataset == "2012": - if _platform == "win32": - os.system("mv {}\VOCdevkit\VOC2012 {}\VOC2012".format(path, path)) - else: - os.system("mv {}/VOCdevkit/VOC2012 {}/VOC2012".format(path, path)) - elif dataset == "2007": - if _platform == "win32": - os.system("mv {}\VOCdevkit\VOC2007 {}\VOC2007".format(path, path)) - else: - os.system("mv {}/VOCdevkit/VOC2007 {}/VOC2007".format(path, path)) - elif dataset == "2007test": - if _platform == "win32": - os.system("mv {}\VOCdevkit\VOC2007 {}\VOC2007test".format(path, path)) - else: - os.system("mv {}/VOCdevkit/VOC2007 {}/VOC2007test".format(path, path)) - del_folder(os.path.join(path, 'VOCdevkit')) - # object classes(labels) NOTE: YOU CAN CUSTOMIZE THIS LIST - classes = [ - "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", - "pottedplant", "sheep", "sofa", "train", "tvmonitor" - ] - if contain_classes_in_person: - classes_in_person = ["head", "hand", "foot"] - else: - classes_in_person = [] - - classes += classes_in_person # use extra 3 classes for person - - classes_dict = utils.list_string_to_dict(classes) - logging.info("[VOC] object classes {}".format(classes_dict)) - - # 1. image path list - # folder_imgs = path+"/"+extracted_filename+"/JPEGImages/" - folder_imgs = os.path.join(path, extracted_filename, "JPEGImages") - imgs_file_list = load_file_list(path=folder_imgs, regx='\\.jpg', printable=False) - logging.info("[VOC] {} images found".format(len(imgs_file_list))) - imgs_file_list.sort(key=lambda s: int(s.replace('.', ' ').replace('_', '').split(' ')[-2])) # 2007_000027.jpg --> 2007000027 - imgs_file_list = [os.path.join(folder_imgs, s) for s in imgs_file_list] - # logging.info('IM',imgs_file_list[0::3333], imgs_file_list[-1]) - if dataset != "2012test": - ##======== 2. semantic segmentation maps path list - # folder_semseg = path+"/"+extracted_filename+"/SegmentationClass/" - folder_semseg = os.path.join(path, extracted_filename, "SegmentationClass") - imgs_semseg_file_list = load_file_list(path=folder_semseg, regx='\\.png', printable=False) - logging.info("[VOC] {} maps for semantic segmentation found".format(len(imgs_semseg_file_list))) - imgs_semseg_file_list.sort(key=lambda s: int(s.replace('.', ' ').replace('_', '').split(' ')[-2])) # 2007_000032.png --> 2007000032 - imgs_semseg_file_list = [os.path.join(folder_semseg, s) for s in imgs_semseg_file_list] - # logging.info('Semantic Seg IM',imgs_semseg_file_list[0::333], imgs_semseg_file_list[-1]) - ##======== 3. instance segmentation maps path list - # folder_insseg = path+"/"+extracted_filename+"/SegmentationObject/" - folder_insseg = os.path.join(path, extracted_filename, "SegmentationObject") - imgs_insseg_file_list = load_file_list(path=folder_insseg, regx='\\.png', printable=False) - logging.info("[VOC] {} maps for instance segmentation found".format(len(imgs_semseg_file_list))) - imgs_insseg_file_list.sort(key=lambda s: int(s.replace('.', ' ').replace('_', '').split(' ')[-2])) # 2007_000032.png --> 2007000032 - imgs_insseg_file_list = [os.path.join(folder_insseg, s) for s in imgs_insseg_file_list] - # logging.info('Instance Seg IM',imgs_insseg_file_list[0::333], imgs_insseg_file_list[-1]) - else: - imgs_semseg_file_list = [] - imgs_insseg_file_list = [] - # 4. annotations for bounding box and object class - # folder_ann = path+"/"+extracted_filename+"/Annotations/" - folder_ann = os.path.join(path, extracted_filename, "Annotations") - imgs_ann_file_list = load_file_list(path=folder_ann, regx='\\.xml', printable=False) - logging.info("[VOC] {} XML annotation files for bounding box and object class found".format(len(imgs_ann_file_list))) - imgs_ann_file_list.sort(key=lambda s: int(s.replace('.', ' ').replace('_', '').split(' ')[-2])) # 2007_000027.xml --> 2007000027 - imgs_ann_file_list = [os.path.join(folder_ann, s) for s in imgs_ann_file_list] - # logging.info('ANN',imgs_ann_file_list[0::3333], imgs_ann_file_list[-1]) - - if dataset == "2012test": # remove unused images in JPEG folder - imgs_file_list_new = [] - for ann in imgs_ann_file_list: - ann = os.path.split(ann)[-1].split('.')[0] - for im in imgs_file_list: - if ann in im: - imgs_file_list_new.append(im) - break - imgs_file_list = imgs_file_list_new - logging.info("[VOC] keep %d images" % len(imgs_file_list_new)) - - # parse XML annotations - def convert(size, box): - dw = 1. / size[0] - dh = 1. / size[1] - x = (box[0] + box[1]) / 2.0 - y = (box[2] + box[3]) / 2.0 - w = box[1] - box[0] - h = box[3] - box[2] - x = x * dw - w = w * dw - y = y * dh - h = h * dh - return x, y, w, h - - def convert_annotation(file_name): - """Given VOC2012 XML Annotations, returns number of objects and info.""" - in_file = open(file_name) - out_file = "" - tree = ET.parse(in_file) - root = tree.getroot() - size = root.find('size') - w = int(size.find('width').text) - h = int(size.find('height').text) - n_objs = 0 - - for obj in root.iter('object'): - if dataset != "2012test": - difficult = obj.find('difficult').text - cls = obj.find('name').text - if cls not in classes or int(difficult) == 1: - continue - else: - cls = obj.find('name').text - if cls not in classes: - continue - cls_id = classes.index(cls) - xmlbox = obj.find('bndbox') - b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)) - bb = convert((w, h), b) - - out_file += str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n' - n_objs += 1 - if cls in "person": - for part in obj.iter('part'): - cls = part.find('name').text - if cls not in classes_in_person: - continue - cls_id = classes.index(cls) - xmlbox = part.find('bndbox') - b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)) - bb = convert((w, h), b) - # out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') - out_file += str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n' - n_objs += 1 - in_file.close() - return n_objs, out_file - - logging.info("[VOC] Parsing xml annotations files") - n_objs_list = [] - objs_info_list = [] # Darknet Format list of string - objs_info_dicts = {} - for idx, ann_file in enumerate(imgs_ann_file_list): - n_objs, objs_info = convert_annotation(ann_file) - n_objs_list.append(n_objs) - objs_info_list.append(objs_info) - with tf.gfile.GFile(ann_file, 'r') as fid: - xml_str = fid.read() - xml = etree.fromstring(xml_str) - data = _recursive_parse_xml_to_dict(xml)['annotation'] - objs_info_dicts.update({imgs_file_list[idx]: data}) - - return imgs_file_list, imgs_semseg_file_list, imgs_insseg_file_list, imgs_ann_file_list, \ - classes, classes_in_person, classes_dict,\ - n_objs_list, objs_info_list, objs_info_dicts - - -def save_npz(save_list=None, name='model.npz', sess=None): - """Input parameters and the file name, save parameters into .npz file. Use tl.utils.load_npz() to restore. - - Parameters - ---------- - save_list : list of tensor - A list of parameters (tensor) to be saved. - name : str - The name of the `.npz` file. - sess : None or Session - Session may be required in some case. - - Examples - -------- - Save model to npz - - >>> tl.files.save_npz(network.all_params, name='model.npz', sess=sess) - - Load model from npz (Method 1) - - >>> load_params = tl.files.load_npz(name='model.npz') - >>> tl.files.assign_params(sess, load_params, network) - - Load model from npz (Method 2) - - >>> tl.files.load_and_assign_npz(sess=sess, name='model.npz', network=network) - - Notes - ----- - If you got session issues, you can change the value.eval() to value.eval(session=sess) - - References - ---------- - `Saving dictionary using numpy `__ - - """ - if save_list is None: - save_list = [] - - save_list_var = [] - if sess: - save_list_var = sess.run(save_list) - else: - try: - save_list_var.extend([v.eval() for v in save_list]) - except Exception: - logging.info(" Fail to save model, Hint: pass the session into this function, tl.files.save_npz(network.all_params, name='model.npz', sess=sess)") - np.savez(name, params=save_list_var) - save_list_var = None - del save_list_var - logging.info("[*] %s saved" % name) - - -def load_npz(path='', name='model.npz'): - """Load the parameters of a Model saved by tl.files.save_npz(). - - Parameters - ---------- - path : str - Folder path to `.npz` file. - name : str - The name of the `.npz` file. - - Returns - -------- - list of array - A list of parameters in order. - - Examples - -------- - - See ``tl.files.save_npz`` - - References - ---------- - - `Saving dictionary using numpy `__ - - """ - d = np.load(path + name) - return d['params'] - - -def assign_params(sess, params, network): - """Assign the given parameters to the TensorLayer network. - - Parameters - ---------- - sess : Session - TensorFlow Session. - params : list of array - A list of parameters (array) in order. - network : :class:`Layer` - The network to be assigned. - - Returns - -------- - list of operations - A list of tf ops in order that assign params. Support sess.run(ops) manually. - - Examples - -------- - - See ``tl.files.save_npz`` - - References - ---------- - - `Assign value to a TensorFlow variable `__ - - """ - ops = [] - for idx, param in enumerate(params): - ops.append(network.all_params[idx].assign(param)) - if sess is not None: - sess.run(ops) - return ops - - -def load_and_assign_npz(sess=None, name=None, network=None): - """Load model from npz and assign to a network. - - Parameters - ------------- - sess : Session - TensorFlow Session. - name : str - The name of the `.npz` file. - network : :class:`Layer` - The network to be assigned. - - Returns - -------- - False or network - Returns False, if the model is not exist. - - Examples - -------- - - See ``tl.files.save_npz`` - - """ - if network is None: - raise ValueError("network is None.") - if sess is None: - raise ValueError("session is None.") - if not os.path.exists(name): - logging.info("[!] Load {} failed!".format(name)) - return False - else: - params = load_npz(name=name) - assign_params(sess, params, network) - logging.info("[*] Load {} SUCCESS!".format(name)) - return network - - -def save_npz_dict(save_list=None, name='model.npz', sess=None): - """Input parameters and the file name, save parameters as a dictionary into .npz file. - - Use ``tl.files.load_and_assign_npz_dict()`` to restore. - - Parameters - ---------- - save_list : list of parameters - A list of parameters (tensor) to be saved. - name : str - The name of the `.npz` file. - sess : Session - TensorFlow Session. - - """ - if sess is None: - raise ValueError("session is None.") - if save_list is None: - save_list = [] - - save_list_names = [tensor.name for tensor in save_list] - save_list_var = sess.run(save_list) - save_var_dict = {save_list_names[idx]: val for idx, val in enumerate(save_list_var)} - np.savez(name, **save_var_dict) - save_list_var = None - save_var_dict = None - del save_list_var - del save_var_dict - logging.info("[*] Model saved in npz_dict %s" % name) - - -def load_and_assign_npz_dict(name='model.npz', sess=None): - """Restore the parameters saved by ``tl.files.save_npz_dict()``. - - Parameters - ---------- - name : str - The name of the `.npz` file. - sess : Session - TensorFlow Session. - - """ - if sess is None: - raise ValueError("session is None.") - - if not os.path.exists(name): - logging.info("[!] Load {} failed!".format(name)) - return False - - params = np.load(name) - if len(params.keys()) != len(set(params.keys())): - raise Exception("Duplication in model npz_dict %s" % name) - ops = list() - for key in params.keys(): - try: - # tensor = tf.get_default_graph().get_tensor_by_name(key) - # varlist = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=key) - varlist = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=key) - if len(varlist) > 1: - raise Exception("[!] Multiple candidate variables to be assigned for name %s" % key) - elif len(varlist) == 0: - raise KeyError - else: - ops.append(varlist[0].assign(params[key])) - logging.info("[*] params restored: %s" % key) - except KeyError: - logging.info("[!] Warning: Tensor named %s not found in network." % key) - - sess.run(ops) - logging.info("[*] Model restored from npz_dict %s" % name) - - -def save_ckpt(sess=None, mode_name='model.ckpt', save_dir='checkpoint', var_list=None, global_step=None, printable=False): - """Save parameters into `ckpt` file. - - Parameters - ------------ - sess : Session - TensorFlow Session. - mode_name : str - The name of the model, default is ``model.ckpt``. - save_dir : str - The path / file directory to the `ckpt`, default is ``checkpoint``. - var_list : list of tensor - The parameters / variables (tensor) to be saved. If empty, save all global variables (default). - global_step : int or None - Step number. - printable : boolean - Whether to print all parameters information. - - See Also - -------- - load_ckpt - - """ - if sess is None: - raise ValueError("session is None.") - if var_list is None: - var_list = [] - - ckpt_file = os.path.join(save_dir, mode_name) - if var_list == []: - var_list = tf.global_variables() - - logging.info("[*] save %s n_params: %d" % (ckpt_file, len(var_list))) - - if printable: - for idx, v in enumerate(var_list): - logging.info(" param {:3}: {:15} {}".format(idx, v.name, str(v.get_shape()))) - - saver = tf.train.Saver(var_list) - saver.save(sess, ckpt_file, global_step=global_step) - - -def load_ckpt(sess=None, mode_name='model.ckpt', save_dir='checkpoint', var_list=None, is_latest=True, printable=False): - """Load parameters from `ckpt` file. - - Parameters - ------------ - sess : Session - TensorFlow Session. - mode_name : str - The name of the model, default is ``model.ckpt``. - save_dir : str - The path / file directory to the `ckpt`, default is ``checkpoint``. - var_list : list of tensor - The parameters / variables (tensor) to be saved. If empty, save all global variables (default). - is_latest : boolean - Whether to load the latest `ckpt`, if False, load the `ckpt` with the name of ```mode_name``. - printable : boolean - Whether to print all parameters information. - - Examples - ---------- - Save all global parameters. - - >>> tl.files.save_ckpt(sess=sess, mode_name='model.ckpt', save_dir='model', printable=True) - - Save specific parameters. - - >>> tl.files.save_ckpt(sess=sess, mode_name='model.ckpt', var_list=net.all_params, save_dir='model', printable=True) - - Load latest ckpt. - - >>> tl.files.load_ckpt(sess=sess, var_list=net.all_params, save_dir='model', printable=True) - - Load specific ckpt. - - >>> tl.files.load_ckpt(sess=sess, mode_name='model.ckpt', var_list=net.all_params, save_dir='model', is_latest=False, printable=True) - - """ - if sess is None: - raise ValueError("session is None.") - if var_list is None: - var_list = [] - - if is_latest: - ckpt_file = tf.train.latest_checkpoint(save_dir) - else: - ckpt_file = os.path.join(save_dir, mode_name) - - if not var_list: - var_list = tf.global_variables() - - logging.info("[*] load %s n_params: %d" % (ckpt_file, len(var_list))) - - if printable: - for idx, v in enumerate(var_list): - logging.info(" param {:3}: {:15} {}".format(idx, v.name, str(v.get_shape()))) - - try: - saver = tf.train.Saver(var_list) - saver.restore(sess, ckpt_file) - except Exception as e: - logging.info(e) - logging.info("[*] load ckpt fail ...") - - -def save_any_to_npy(save_dict=None, name='file.npy'): - """Save variables to `.npy` file. - - Parameters - ------------ - save_dict : directory - The variables to be saved. - name : str - File name. - - Examples - --------- - >>> tl.files.save_any_to_npy(save_dict={'data': ['a','b']}, name='test.npy') - >>> data = tl.files.load_npy_to_any(name='test.npy') - >>> print(data) - ... {'data': ['a','b']} - - """ - if save_dict is None: - save_dict = {} - np.save(name, save_dict) - - -def load_npy_to_any(path='', name='file.npy'): - """Load `.npy` file. - - Parameters - ------------ - path : str - Path to the file (optional). - name : str - File name. - - Examples - --------- - - see tl.files.save_any_to_npy() - - """ - file_path = os.path.join(path, name) - try: - return np.load(file_path).item() - except Exception: - return np.load(file_path) - raise Exception("[!] Fail to load %s" % file_path) - - -def file_exists(filepath): - """Check whether a file exists by given file path.""" - return os.path.isfile(filepath) - - -def folder_exists(folderpath): - """Check whether a folder exists by given folder path.""" - return os.path.isdir(folderpath) - - -def del_file(filepath): - """Delete a file by given file path.""" - os.remove(filepath) - - -def del_folder(folderpath): - """Delete a folder by given folder path.""" - os.rmdir(folderpath) - - -def read_file(filepath): - """Read a file and return a string. - - Examples - --------- - >>> data = tl.files.read_file('data.txt') - - """ - with open(filepath, 'r') as afile: - return afile.read() - - -def load_file_list(path=None, regx='\.npz', printable=True): - r"""Return a file list in a folder by given a path and regular expression. - - Parameters - ---------- - path : str or None - A folder path, if `None`, use the current directory. - regx : str - The regx of file name. - printable : boolean - Whether to print the files infomation. - - Examples - ---------- - >>> file_list = tl.files.load_file_list(path=None, regx='w1pre_[0-9]+\.(npz)') - - """ - if path is None: - path = os.getcwd() - file_list = os.listdir(path) - return_list = [] - for _, f in enumerate(file_list): - if re.search(regx, f): - return_list.append(f) - # return_list.sort() - if printable: - logging.info('Match file list = %s' % return_list) - logging.info('Number of files = %d' % len(return_list)) - return return_list - - -def load_folder_list(path=""): - """Return a folder list in a folder by given a folder path. - - Parameters - ---------- - path : str - A folder path. - - """ - return [os.path.join(path, o) for o in os.listdir(path) if os.path.isdir(os.path.join(path, o))] - - -def exists_or_mkdir(path, verbose=True): - """Check a folder by given name, if not exist, create the folder and return False, - if directory exists, return True. - - Parameters - ---------- - path : str - A folder path. - verbose : boolean - If True (default), prints results. - - Returns - -------- - boolean - True if folder already exist, otherwise, returns False and create the folder. - - Examples - -------- - >>> tl.files.exists_or_mkdir("checkpoints/train") - - """ - if not os.path.exists(path): - if verbose: - logging.info("[*] creates %s ..." % path) - os.makedirs(path) - return False - else: - if verbose: - logging.info("[!] %s exists ..." % path) - return True - - -def maybe_download_and_extract(filename, working_directory, url_source, extract=False, expected_bytes=None): - """Checks if file exists in working_directory otherwise tries to dowload the file, - and optionally also tries to extract the file if format is ".zip" or ".tar" - - Parameters - ----------- - filename : str - The name of the (to be) dowloaded file. - working_directory : str - A folder path to search for the file in and dowload the file to - url : str - The URL to download the file from - extract : boolean - If True, tries to uncompress the dowloaded file is ".tar.gz/.tar.bz2" or ".zip" file, default is False. - expected_bytes : int or None - If set tries to verify that the downloaded file is of the specified size, otherwise raises an Exception, defaults is None which corresponds to no check being performed. - - Returns - ---------- - str - File path of the dowloaded (uncompressed) file. - - Examples - -------- - >>> down_file = tl.files.maybe_download_and_extract(filename='train-images-idx3-ubyte.gz', - ... working_directory='data/', - ... url_source='http://yann.lecun.com/exdb/mnist/') - >>> tl.files.maybe_download_and_extract(filename='ADEChallengeData2016.zip', - ... working_directory='data/', - ... url_source='http://sceneparsing.csail.mit.edu/data/', - ... extract=True) - - """ - - # We first define a download function, supporting both Python 2 and 3. - def _download(filename, working_directory, url_source): - def _dlProgress(count, blockSize, totalSize): - if (totalSize != 0): - percent = float(count * blockSize) / float(totalSize) * 100.0 - sys.stdout.write("\r" "Downloading " + filename + "...%d%%" % percent) - sys.stdout.flush() - - if sys.version_info[0] == 2: - from urllib import urlretrieve - else: - from urllib.request import urlretrieve - filepath = os.path.join(working_directory, filename) - urlretrieve(url_source + filename, filepath, reporthook=_dlProgress) - sys.stdout.write('\n') - - exists_or_mkdir(working_directory, verbose=False) - filepath = os.path.join(working_directory, filename) - - if not os.path.exists(filepath): - _download(filename, working_directory, url_source) - statinfo = os.stat(filepath) - logging.info('Succesfully downloaded %s %s bytes.' % (filename, statinfo.st_size)) #, 'bytes.') - if (not (expected_bytes is None) and (expected_bytes != statinfo.st_size)): - raise Exception('Failed to verify ' + filename + '. Can you get to it with a browser?') - if (extract): - if tarfile.is_tarfile(filepath): - logging.info('Trying to extract tar file') - tarfile.open(filepath, 'r').extractall(working_directory) - logging.info('... Success!') - elif zipfile.is_zipfile(filepath): - logging.info('Trying to extract zip file') - with zipfile.ZipFile(filepath) as zf: - zf.extractall(working_directory) - logging.info('... Success!') - else: - logging.info("Unknown compression_format only .tar.gz/.tar.bz2/.tar and .zip supported") - return filepath - - -def natural_keys(text): - """Sort list of string with number in human order. - - Examples - ---------- - >>> l = ['im1.jpg', 'im31.jpg', 'im11.jpg', 'im21.jpg', 'im03.jpg', 'im05.jpg'] - >>> l.sort(key=tl.files.natural_keys) - ... ['im1.jpg', 'im03.jpg', 'im05', 'im11.jpg', 'im21.jpg', 'im31.jpg'] - >>> l.sort() # that is what we dont want - ... ['im03.jpg', 'im05', 'im1.jpg', 'im11.jpg', 'im21.jpg', 'im31.jpg'] - - References - ---------- - - `link `__ - - """ - - # - alist.sort(key=natural_keys) sorts in human order - # http://nedbatchelder.com/blog/200712/human_sorting.html - # (See Toothy's implementation in the comments) - def atoi(text): - return int(text) if text.isdigit() else text - - return [atoi(c) for c in re.split('(\d+)', text)] - - -# Visualizing npz files -def npz_to_W_pdf(path=None, regx='w1pre_[0-9]+\.(npz)'): - r"""Convert the first weight matrix of `.npz` file to `.pdf` by using `tl.visualize.W()`. - - Parameters - ---------- - path : str - A folder path to `npz` files. - regx : str - Regx for the file name. - - Examples - --------- - Convert the first weight matrix of w1_pre...npz file to w1_pre...pdf. - - >>> tl.files.npz_to_W_pdf(path='/Users/.../npz_file/', regx='w1pre_[0-9]+\.(npz)') - - """ - file_list = load_file_list(path=path, regx=regx) - for f in file_list: - W = load_npz(path, f)[0] - logging.info("%s --> %s" % (f, f.split('.')[0] + '.pdf')) - visualize.draw_weights(W, second=10, saveable=True, name=f.split('.')[0], fig_idx=2012) diff --git a/tensorlayer/iterate.py b/tensorlayer/iterate.py deleted file mode 100644 index 777f905..0000000 --- a/tensorlayer/iterate.py +++ /dev/null @@ -1,278 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import numpy as np -from six.moves import xrange - -__all__ = [ - 'minibatches', - 'seq_minibatches', - 'seq_minibatches2', - 'ptb_iterator', -] - - -def minibatches(inputs=None, targets=None, batch_size=None, shuffle=False): - """Generate a generator that input a group of example in numpy.array and - their labels, return the examples and labels by the given batch size. - - Parameters - ---------- - inputs : numpy.array - The input features, every row is a example. - targets : numpy.array - The labels of inputs, every row is a example. - batch_size : int - The batch size. - shuffle : boolean - Indicating whether to use a shuffling queue, shuffle the dataset before return. - - Examples - -------- - >>> X = np.asarray([['a','a'], ['b','b'], ['c','c'], ['d','d'], ['e','e'], ['f','f']]) - >>> y = np.asarray([0,1,2,3,4,5]) - >>> for batch in tl.iterate.minibatches(inputs=X, targets=y, batch_size=2, shuffle=False): - >>> print(batch) - ... (array([['a', 'a'], - ... ['b', 'b']], - ... dtype='>> X = np.asarray([['a','a'], ['b','b'], ['c','c'], ['d','d'], ['e','e'], ['f','f']]) - >>> y = np.asarray([0, 1, 2, 3, 4, 5]) - >>> for batch in tl.iterate.seq_minibatches(inputs=X, targets=y, batch_size=2, seq_length=2, stride=1): - >>> print(batch) - ... (array([['a', 'a'], - ... ['b', 'b'], - ... ['b', 'b'], - ... ['c', 'c']], - ... dtype='>> return_last = True - >>> num_steps = 2 - >>> X = np.asarray([['a','a'], ['b','b'], ['c','c'], ['d','d'], ['e','e'], ['f','f']]) - >>> Y = np.asarray([0,1,2,3,4,5]) - >>> for batch in tl.iterate.seq_minibatches(inputs=X, targets=Y, batch_size=2, seq_length=num_steps, stride=1): - >>> x, y = batch - >>> if return_last: - >>> tmp_y = y.reshape((-1, num_steps) + y.shape[1:]) - >>> y = tmp_y[:, -1] - >>> print(x, y) - ... [['a' 'a'] - ... ['b' 'b'] - ... ['b' 'b'] - ... ['c' 'c']] [1 2] - ... [['c' 'c'] - ... ['d' 'd'] - ... ['d' 'd'] - ... ['e' 'e']] [3 4] - - """ - assert len(inputs) == len(targets) - n_loads = (batch_size * stride) + (seq_length - stride) - for start_idx in range(0, len(inputs) - n_loads + 1, (batch_size * stride)): - seq_inputs = np.zeros((batch_size, seq_length) + inputs.shape[1:], dtype=inputs.dtype) - seq_targets = np.zeros((batch_size, seq_length) + targets.shape[1:], dtype=targets.dtype) - for b_idx in xrange(batch_size): - start_seq_idx = start_idx + (b_idx * stride) - end_seq_idx = start_seq_idx + seq_length - seq_inputs[b_idx] = inputs[start_seq_idx:end_seq_idx] - seq_targets[b_idx] = targets[start_seq_idx:end_seq_idx] - flatten_inputs = seq_inputs.reshape((-1, ) + inputs.shape[1:]) - flatten_targets = seq_targets.reshape((-1, ) + targets.shape[1:]) - yield flatten_inputs, flatten_targets - - -def seq_minibatches2(inputs, targets, batch_size, num_steps): - """Generate a generator that iterates on two list of words. Yields (Returns) the source contexts and - the target context by the given batch_size and num_steps (sequence_length). - In TensorFlow's tutorial, this generates the `batch_size` pointers into the raw PTB data, and allows minibatch iteration along these pointers. - - Parameters - ---------- - inputs : list of data - The context in list format; note that context usually be represented by splitting by space, and then convert to unique word IDs. - targets : list of data - The context in list format; note that context usually be represented by splitting by space, and then convert to unique word IDs. - batch_size : int - The batch size. - num_steps : int - The number of unrolls. i.e. sequence length - - Yields - ------ - Pairs of the batched data, each a matrix of shape [batch_size, num_steps]. - - Raises - ------ - ValueError : if batch_size or num_steps are too high. - - Examples - -------- - >>> X = [i for i in range(20)] - >>> Y = [i for i in range(20,40)] - >>> for batch in tl.iterate.seq_minibatches2(X, Y, batch_size=2, num_steps=3): - ... x, y = batch - ... print(x, y) - ... - ... [[ 0. 1. 2.] - ... [ 10. 11. 12.]] - ... [[ 20. 21. 22.] - ... [ 30. 31. 32.]] - ... - ... [[ 3. 4. 5.] - ... [ 13. 14. 15.]] - ... [[ 23. 24. 25.] - ... [ 33. 34. 35.]] - ... - ... [[ 6. 7. 8.] - ... [ 16. 17. 18.]] - ... [[ 26. 27. 28.] - ... [ 36. 37. 38.]] - - Notes - ----- - - Hint, if the input data are images, you can modify the source code `data = np.zeros([batch_size, batch_len)` to `data = np.zeros([batch_size, batch_len, inputs.shape[1], inputs.shape[2], inputs.shape[3]])`. - """ - assert len(inputs) == len(targets) - data_len = len(inputs) - batch_len = data_len // batch_size - # data = np.zeros([batch_size, batch_len]) - data = np.zeros((batch_size, batch_len) + inputs.shape[1:], dtype=inputs.dtype) - data2 = np.zeros([batch_size, batch_len]) - - for i in range(batch_size): - data[i] = inputs[batch_len * i:batch_len * (i + 1)] - data2[i] = targets[batch_len * i:batch_len * (i + 1)] - - epoch_size = (batch_len - 1) // num_steps - - if epoch_size == 0: - raise ValueError("epoch_size == 0, decrease batch_size or num_steps") - - for i in range(epoch_size): - x = data[:, i * num_steps:(i + 1) * num_steps] - x2 = data2[:, i * num_steps:(i + 1) * num_steps] - yield (x, x2) - - -def ptb_iterator(raw_data, batch_size, num_steps): - """Generate a generator that iterates on a list of words, see `PTB example `__. - Yields the source contexts and the target context by the given batch_size and num_steps (sequence_length). - - In TensorFlow's tutorial, this generates `batch_size` pointers into the raw - PTB data, and allows minibatch iteration along these pointers. - - Parameters - ---------- - raw_data : a list - the context in list format; note that context usually be - represented by splitting by space, and then convert to unique - word IDs. - batch_size : int - the batch size. - num_steps : int - the number of unrolls. i.e. sequence_length - - Yields - ------ - Pairs of the batched data, each a matrix of shape [batch_size, num_steps]. - The second element of the tuple is the same data time-shifted to the - right by one. - - Raises - ------ - ValueError : if batch_size or num_steps are too high. - - Examples - -------- - >>> train_data = [i for i in range(20)] - >>> for batch in tl.iterate.ptb_iterator(train_data, batch_size=2, num_steps=3): - >>> x, y = batch - >>> print(x, y) - ... [[ 0 1 2] <---x 1st subset/ iteration - ... [10 11 12]] - ... [[ 1 2 3] <---y - ... [11 12 13]] - ... - ... [[ 3 4 5] <--- 1st batch input 2nd subset/ iteration - ... [13 14 15]] <--- 2nd batch input - ... [[ 4 5 6] <--- 1st batch target - ... [14 15 16]] <--- 2nd batch target - ... - ... [[ 6 7 8] 3rd subset/ iteration - ... [16 17 18]] - ... [[ 7 8 9] - ... [17 18 19]] - """ - raw_data = np.array(raw_data, dtype=np.int32) - - data_len = len(raw_data) - batch_len = data_len // batch_size - data = np.zeros([batch_size, batch_len], dtype=np.int32) - for i in range(batch_size): - data[i] = raw_data[batch_len * i:batch_len * (i + 1)] - - epoch_size = (batch_len - 1) // num_steps - - if epoch_size == 0: - raise ValueError("epoch_size == 0, decrease batch_size or num_steps") - - for i in range(epoch_size): - x = data[:, i * num_steps:(i + 1) * num_steps] - y = data[:, i * num_steps + 1:(i + 1) * num_steps + 1] - yield (x, y) diff --git a/tensorlayer/layers/__init__.py b/tensorlayer/layers/__init__.py deleted file mode 100644 index cad53aa..0000000 --- a/tensorlayer/layers/__init__.py +++ /dev/null @@ -1,26 +0,0 @@ -""" -TensorLayer provides rich layer implementations trailed for -various benchmarks and domain-specific problems. In addition, we also -support transparent access to native TensorFlow parameters. -For example, we provide not only layers for local response normalization, but also -layers that allow user to apply ``tf.nn.lrn`` on ``network.outputs``. -More functions can be found in `TensorFlow API `__. -""" - -from .core import * -from .convolution import * -from .super_resolution import * -from .normalization import * -from .spatial_transformer import * -from .object_detection import * -from .time_distribution import * -from .pooling import * -from .padding import * -from .recurrent import * -from .shape import * -from .importer import * -from .merge import * -from .extend import * -from .stack import * -from .special_activation import * -from .flow_control import * diff --git a/tensorlayer/layers/__pycache__/__init__.cpython-34.pyc b/tensorlayer/layers/__pycache__/__init__.cpython-34.pyc deleted file mode 100644 index 1365b66..0000000 Binary files a/tensorlayer/layers/__pycache__/__init__.cpython-34.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/__init__.cpython-35.pyc b/tensorlayer/layers/__pycache__/__init__.cpython-35.pyc deleted file mode 100644 index f16e560..0000000 Binary files a/tensorlayer/layers/__pycache__/__init__.cpython-35.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/convolution.cpython-34.pyc b/tensorlayer/layers/__pycache__/convolution.cpython-34.pyc deleted file mode 100644 index d44611c..0000000 Binary files a/tensorlayer/layers/__pycache__/convolution.cpython-34.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/convolution.cpython-35.pyc b/tensorlayer/layers/__pycache__/convolution.cpython-35.pyc deleted file mode 100644 index 3e90e1d..0000000 Binary files a/tensorlayer/layers/__pycache__/convolution.cpython-35.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/core.cpython-34.pyc b/tensorlayer/layers/__pycache__/core.cpython-34.pyc deleted file mode 100644 index 7c17dc6..0000000 Binary files a/tensorlayer/layers/__pycache__/core.cpython-34.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/core.cpython-35.pyc b/tensorlayer/layers/__pycache__/core.cpython-35.pyc deleted file mode 100644 index 56e21c2..0000000 Binary files a/tensorlayer/layers/__pycache__/core.cpython-35.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/extend.cpython-34.pyc b/tensorlayer/layers/__pycache__/extend.cpython-34.pyc deleted file mode 100644 index 597c284..0000000 Binary files a/tensorlayer/layers/__pycache__/extend.cpython-34.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/extend.cpython-35.pyc b/tensorlayer/layers/__pycache__/extend.cpython-35.pyc deleted file mode 100644 index b3e05e3..0000000 Binary files a/tensorlayer/layers/__pycache__/extend.cpython-35.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/flow_control.cpython-34.pyc b/tensorlayer/layers/__pycache__/flow_control.cpython-34.pyc deleted file mode 100644 index 6402324..0000000 Binary files a/tensorlayer/layers/__pycache__/flow_control.cpython-34.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/flow_control.cpython-35.pyc b/tensorlayer/layers/__pycache__/flow_control.cpython-35.pyc deleted file mode 100644 index f4970de..0000000 Binary files a/tensorlayer/layers/__pycache__/flow_control.cpython-35.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/importer.cpython-34.pyc b/tensorlayer/layers/__pycache__/importer.cpython-34.pyc deleted file mode 100644 index a8afb5d..0000000 Binary files a/tensorlayer/layers/__pycache__/importer.cpython-34.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/importer.cpython-35.pyc b/tensorlayer/layers/__pycache__/importer.cpython-35.pyc deleted file mode 100644 index a66e380..0000000 Binary files a/tensorlayer/layers/__pycache__/importer.cpython-35.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/merge.cpython-34.pyc b/tensorlayer/layers/__pycache__/merge.cpython-34.pyc deleted file mode 100644 index 8e78e37..0000000 Binary files a/tensorlayer/layers/__pycache__/merge.cpython-34.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/merge.cpython-35.pyc b/tensorlayer/layers/__pycache__/merge.cpython-35.pyc deleted file mode 100644 index b2ee959..0000000 Binary files a/tensorlayer/layers/__pycache__/merge.cpython-35.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/normalization.cpython-34.pyc b/tensorlayer/layers/__pycache__/normalization.cpython-34.pyc deleted file mode 100644 index 60635fb..0000000 Binary files a/tensorlayer/layers/__pycache__/normalization.cpython-34.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/normalization.cpython-35.pyc b/tensorlayer/layers/__pycache__/normalization.cpython-35.pyc deleted file mode 100644 index 6d1dd6c..0000000 Binary files a/tensorlayer/layers/__pycache__/normalization.cpython-35.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/object_detection.cpython-34.pyc b/tensorlayer/layers/__pycache__/object_detection.cpython-34.pyc deleted file mode 100644 index a532b2c..0000000 Binary files a/tensorlayer/layers/__pycache__/object_detection.cpython-34.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/object_detection.cpython-35.pyc b/tensorlayer/layers/__pycache__/object_detection.cpython-35.pyc deleted file mode 100644 index abe26d3..0000000 Binary files a/tensorlayer/layers/__pycache__/object_detection.cpython-35.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/padding.cpython-34.pyc b/tensorlayer/layers/__pycache__/padding.cpython-34.pyc deleted file mode 100644 index a0431aa..0000000 Binary files a/tensorlayer/layers/__pycache__/padding.cpython-34.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/padding.cpython-35.pyc b/tensorlayer/layers/__pycache__/padding.cpython-35.pyc deleted file mode 100644 index bdeae96..0000000 Binary files a/tensorlayer/layers/__pycache__/padding.cpython-35.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/pooling.cpython-34.pyc b/tensorlayer/layers/__pycache__/pooling.cpython-34.pyc deleted file mode 100644 index 0554521..0000000 Binary files a/tensorlayer/layers/__pycache__/pooling.cpython-34.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/pooling.cpython-35.pyc b/tensorlayer/layers/__pycache__/pooling.cpython-35.pyc deleted file mode 100644 index 9fb70d2..0000000 Binary files a/tensorlayer/layers/__pycache__/pooling.cpython-35.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/recurrent.cpython-34.pyc b/tensorlayer/layers/__pycache__/recurrent.cpython-34.pyc deleted file mode 100644 index 021cd0d..0000000 Binary files a/tensorlayer/layers/__pycache__/recurrent.cpython-34.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/recurrent.cpython-35.pyc b/tensorlayer/layers/__pycache__/recurrent.cpython-35.pyc deleted file mode 100644 index 10ab766..0000000 Binary files a/tensorlayer/layers/__pycache__/recurrent.cpython-35.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/shape.cpython-34.pyc b/tensorlayer/layers/__pycache__/shape.cpython-34.pyc deleted file mode 100644 index 8f09c1b..0000000 Binary files a/tensorlayer/layers/__pycache__/shape.cpython-34.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/shape.cpython-35.pyc b/tensorlayer/layers/__pycache__/shape.cpython-35.pyc deleted file mode 100644 index 383504e..0000000 Binary files a/tensorlayer/layers/__pycache__/shape.cpython-35.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/spatial_transformer.cpython-34.pyc b/tensorlayer/layers/__pycache__/spatial_transformer.cpython-34.pyc deleted file mode 100644 index 5693288..0000000 Binary files a/tensorlayer/layers/__pycache__/spatial_transformer.cpython-34.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/spatial_transformer.cpython-35.pyc b/tensorlayer/layers/__pycache__/spatial_transformer.cpython-35.pyc deleted file mode 100644 index 3720832..0000000 Binary files a/tensorlayer/layers/__pycache__/spatial_transformer.cpython-35.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/special_activation.cpython-34.pyc b/tensorlayer/layers/__pycache__/special_activation.cpython-34.pyc deleted file mode 100644 index 67a54de..0000000 Binary files a/tensorlayer/layers/__pycache__/special_activation.cpython-34.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/special_activation.cpython-35.pyc b/tensorlayer/layers/__pycache__/special_activation.cpython-35.pyc deleted file mode 100644 index 12a03b3..0000000 Binary files a/tensorlayer/layers/__pycache__/special_activation.cpython-35.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/stack.cpython-34.pyc b/tensorlayer/layers/__pycache__/stack.cpython-34.pyc deleted file mode 100644 index e4cbeec..0000000 Binary files a/tensorlayer/layers/__pycache__/stack.cpython-34.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/stack.cpython-35.pyc b/tensorlayer/layers/__pycache__/stack.cpython-35.pyc deleted file mode 100644 index b0f0c8e..0000000 Binary files a/tensorlayer/layers/__pycache__/stack.cpython-35.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/super_resolution.cpython-34.pyc b/tensorlayer/layers/__pycache__/super_resolution.cpython-34.pyc deleted file mode 100644 index 33b6a9a..0000000 Binary files a/tensorlayer/layers/__pycache__/super_resolution.cpython-34.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/super_resolution.cpython-35.pyc b/tensorlayer/layers/__pycache__/super_resolution.cpython-35.pyc deleted file mode 100644 index b722846..0000000 Binary files a/tensorlayer/layers/__pycache__/super_resolution.cpython-35.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/time_distribution.cpython-34.pyc b/tensorlayer/layers/__pycache__/time_distribution.cpython-34.pyc deleted file mode 100644 index fd2edca..0000000 Binary files a/tensorlayer/layers/__pycache__/time_distribution.cpython-34.pyc and /dev/null differ diff --git a/tensorlayer/layers/__pycache__/time_distribution.cpython-35.pyc b/tensorlayer/layers/__pycache__/time_distribution.cpython-35.pyc deleted file mode 100644 index f7aac2a..0000000 Binary files a/tensorlayer/layers/__pycache__/time_distribution.cpython-35.pyc and /dev/null differ diff --git a/tensorlayer/layers/convolution.py b/tensorlayer/layers/convolution.py deleted file mode 100644 index f19cd62..0000000 --- a/tensorlayer/layers/convolution.py +++ /dev/null @@ -1,1878 +0,0 @@ -# -*- coding: utf-8 -*- - -import tensorflow as tf -from .. import _logging as logging -from .core import * - -__all__ = [ - 'Conv1dLayer', - 'Conv2dLayer', - 'DeConv2dLayer', - 'Conv3dLayer', - 'DeConv3dLayer', - 'UpSampling2dLayer', - 'DownSampling2dLayer', - 'DeformableConv2d', - 'AtrousConv1dLayer', - 'AtrousConv2dLayer', - 'deconv2d_bilinear_upsampling_initializer', - 'Conv1d', - 'Conv2d', - 'DeConv2d', - 'DeConv3d', - 'DepthwiseConv2d', - 'SeparableConv2d', - 'GroupConv2d', -] - - -class Conv1dLayer(Layer): - """ - The :class:`Conv1dLayer` class is a 1D CNN layer, see `tf.nn.convolution `__. - - Parameters - ---------- - layer : :class:`Layer` - Previous layer. - act : activation function - The activation function of this layer. - shape : tuple of int - The shape of the filters: (filter_length, in_channels, out_channels). - stride : int - The number of entries by which the filter is moved right at a step. - dilation_rate : int - Filter up-sampling/input down-sampling rate. - padding : str - The padding algorithm type: "SAME" or "VALID". - data_format : str - Default is 'NWC' as it is a 1D CNN. - W_init : initializer - The initializer for the weight matrix. - b_init : initializer or None - The initializer for the bias vector. If None, skip biases. - W_init_args : dictionary - The arguments for the weight matrix initializer. - b_init_args : dictionary - The arguments for the bias vector initializer. - name : str - A unique layer name - - """ - - def __init__( - self, - prev_layer, - act=tf.identity, - shape=(5, 1, 5), - stride=1, - dilation_rate=1, - padding='SAME', - data_format='NWC', - W_init=tf.truncated_normal_initializer(stddev=0.02), - b_init=tf.constant_initializer(value=0.0), - W_init_args=None, - b_init_args=None, - name='cnn1d', - ): - if act is None: - act = tf.identity - if W_init_args is None: - W_init_args = {} - if b_init_args is None: - b_init_args = {} - - Layer.__init__(self, prev_layer=prev_layer, name=name) - self.inputs = prev_layer.outputs - logging.info("Conv1dLayer %s: shape:%s stride:%s pad:%s act:%s" % (self.name, str(shape), str(stride), padding, act.__name__)) - - with tf.variable_scope(name): - W = tf.get_variable(name='W_conv1d', shape=shape, initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args) - self.outputs = tf.nn.convolution( - self.inputs, W, strides=(stride, ), padding=padding, dilation_rate=(dilation_rate, ), data_format=data_format) # 1.2 - if b_init: - b = tf.get_variable(name='b_conv1d', shape=(shape[-1]), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args) - self.outputs = self.outputs + b - - self.outputs = act(self.outputs) - - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - self.all_layers.append(self.outputs) - if b_init: - self.all_params.extend([W, b]) - else: - self.all_params.append(W) - - -class Conv2dLayer(Layer): - """ - The :class:`Conv2dLayer` class is a 2D CNN layer, see `tf.nn.conv2d `__. - - Parameters - ---------- - layer : :class:`Layer` - Previous layer. - act : activation function - The activation function of this layer. - shape : tuple of int - The shape of the filters: (filter_height, filter_width, in_channels, out_channels). - strides : tuple of int - The sliding window strides of corresponding input dimensions. - It must be in the same order as the ``shape`` parameter. - padding : str - The padding algorithm type: "SAME" or "VALID". - W_init : initializer - The initializer for the the weight matrix. - b_init : initializer or None - The initializer for the the bias vector. If None, skip biases. - W_init_args : dictionary - The arguments for the weight matrix initializer. - b_init_args : dictionary - The arguments for the bias vector initializer. - use_cudnn_on_gpu : bool - Default is False. - data_format : str - "NHWC" or "NCHW", default is "NHWC". - name : str - A unique layer name. - - Notes - ----- - - shape = [h, w, the number of output channel of previous layer, the number of output channels] - - the number of output channel of a layer is its last dimension. - - Examples - -------- - With TensorLayer - - >>> x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) - >>> net = tl.layers.InputLayer(x, name='input_layer') - >>> net = tl.layers.Conv2dLayer(net, - ... act = tf.nn.relu, - ... shape = (5, 5, 1, 32), # 32 features for each 5x5 patch - ... strides = (1, 1, 1, 1), - ... padding='SAME', - ... W_init=tf.truncated_normal_initializer(stddev=5e-2), - ... b_init = tf.constant_initializer(value=0.0), - ... name ='cnn_layer1') # output: (?, 28, 28, 32) - >>> net = tl.layers.PoolLayer(net, - ... ksize=(1, 2, 2, 1), - ... strides=(1, 2, 2, 1), - ... padding='SAME', - ... pool = tf.nn.max_pool, - ... name ='pool_layer1',) # output: (?, 14, 14, 32) - - Without TensorLayer, you can implement 2D convolution as follow. - - >>> W = tf.Variable(W_init(shape=[5, 5, 1, 32], ), name='W_conv') - >>> b = tf.Variable(b_init(shape=[32], ), name='b_conv') - >>> outputs = tf.nn.relu( tf.nn.conv2d(inputs, W, - ... strides=[1, 1, 1, 1], - ... padding='SAME') + b ) - - """ - - def __init__( - self, - prev_layer, - act=tf.identity, - shape=(5, 5, 1, 100), - strides=(1, 1, 1, 1), - padding='SAME', - W_init=tf.truncated_normal_initializer(stddev=0.02), - b_init=tf.constant_initializer(value=0.0), - W_init_args=None, - b_init_args=None, - use_cudnn_on_gpu=None, - data_format=None, - name='cnn_layer', - ): - if W_init_args is None: - W_init_args = {} - if b_init_args is None: - b_init_args = {} - - Layer.__init__(self, prev_layer=prev_layer, name=name) - self.inputs = prev_layer.outputs - if act is None: - act = tf.identity - logging.info("Conv2dLayer %s: shape:%s strides:%s pad:%s act:%s" % (self.name, str(shape), str(strides), padding, act.__name__)) - - with tf.variable_scope(name): - W = tf.get_variable(name='W_conv2d', shape=shape, initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args) - if b_init: - b = tf.get_variable(name='b_conv2d', shape=(shape[-1]), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args) - self.outputs = act( - tf.nn.conv2d(self.inputs, W, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format) + b) - else: - self.outputs = act(tf.nn.conv2d(self.inputs, W, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format)) - - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - self.all_layers.append(self.outputs) - if b_init: - self.all_params.extend([W, b]) - else: - self.all_params.append(W) - - -class DeConv2dLayer(Layer): - """A de-convolution 2D layer. - - See `tf.nn.conv2d_transpose `__. - - Parameters - ---------- - layer : :class:`Layer` - Previous layer. - act : activation function - The activation function of this layer. - shape : tuple of int - Shape of the filters: (height, width, output_channels, in_channels). - The filter's ``in_channels`` dimension must match that of value. - output_shape : tuple of int - Output shape of the deconvolution, - strides : tuple of int - The sliding window strides for corresponding input dimensions. - padding : str - The padding algorithm type: "SAME" or "VALID". - W_init : initializer - The initializer for the weight matrix. - b_init : initializer or None - The initializer for the bias vector. If None, skip biases. - W_init_args : dictionary - The arguments for initializing the weight matrix. - b_init_args : dictionary - The arguments for initializing the bias vector. - name : str - A unique layer name. - - Notes - ----- - - We recommend to use `DeConv2d` with TensorFlow version higher than 1.3. - - shape = [h, w, the number of output channels of this layer, the number of output channel of the previous layer]. - - output_shape = [batch_size, any, any, the number of output channels of this layer]. - - the number of output channel of a layer is its last dimension. - - Examples - -------- - A part of the generator in DCGAN example - - >>> batch_size = 64 - >>> inputs = tf.placeholder(tf.float32, [batch_size, 100], name='z_noise') - >>> net_in = tl.layers.InputLayer(inputs, name='g/in') - >>> net_h0 = tl.layers.DenseLayer(net_in, n_units = 8192, - ... W_init = tf.random_normal_initializer(stddev=0.02), - ... act = tf.identity, name='g/h0/lin') - >>> print(net_h0.outputs._shape) - ... (64, 8192) - >>> net_h0 = tl.layers.ReshapeLayer(net_h0, shape=(-1, 4, 4, 512), name='g/h0/reshape') - >>> net_h0 = tl.layers.BatchNormLayer(net_h0, act=tf.nn.relu, is_train=is_train, name='g/h0/batch_norm') - >>> print(net_h0.outputs._shape) - ... (64, 4, 4, 512) - >>> net_h1 = tl.layers.DeConv2dLayer(net_h0, - ... shape=(5, 5, 256, 512), - ... output_shape=(batch_size, 8, 8, 256), - ... strides=(1, 2, 2, 1), - ... act=tf.identity, name='g/h1/decon2d') - >>> net_h1 = tl.layers.BatchNormLayer(net_h1, act=tf.nn.relu, is_train=is_train, name='g/h1/batch_norm') - >>> print(net_h1.outputs._shape) - ... (64, 8, 8, 256) - - U-Net - - >>> .... - >>> conv10 = tl.layers.Conv2dLayer(conv9, act=tf.nn.relu, - ... shape=(3,3,1024,1024), strides=(1,1,1,1), padding='SAME', - ... W_init=w_init, b_init=b_init, name='conv10') - >>> print(conv10.outputs) - ... (batch_size, 32, 32, 1024) - >>> deconv1 = tl.layers.DeConv2dLayer(conv10, act=tf.nn.relu, - ... shape=(3,3,512,1024), strides=(1,2,2,1), output_shape=(batch_size,64,64,512), - ... padding='SAME', W_init=w_init, b_init=b_init, name='devcon1_1') - - """ - - def __init__( - self, - prev_layer, - act=tf.identity, - shape=(3, 3, 128, 256), - output_shape=(1, 256, 256, 128), - strides=(1, 2, 2, 1), - padding='SAME', - W_init=tf.truncated_normal_initializer(stddev=0.02), - b_init=tf.constant_initializer(value=0.0), - W_init_args=None, - b_init_args=None, - name='decnn2d_layer', - ): - if W_init_args is None: - W_init_args = {} - if b_init_args is None: - b_init_args = {} - - Layer.__init__(self, prev_layer=prev_layer, name=name) - self.inputs = prev_layer.outputs - if act is None: - act = tf.identity - logging.info("DeConv2dLayer %s: shape:%s out_shape:%s strides:%s pad:%s act:%s" % (self.name, str(shape), str(output_shape), str(strides), padding, - act.__name__)) - # logging.info(" DeConv2dLayer: Untested") - with tf.variable_scope(name): - W = tf.get_variable(name='W_deconv2d', shape=shape, initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args) - if b_init: - b = tf.get_variable(name='b_deconv2d', shape=(shape[-2]), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args) - self.outputs = act(tf.nn.conv2d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding) + b) - else: - self.outputs = act(tf.nn.conv2d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding)) - - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - self.all_layers.append(self.outputs) - if b_init: - self.all_params.extend([W, b]) - else: - self.all_params.append(W) - - -class Conv3dLayer(Layer): - """ - The :class:`Conv3dLayer` class is a 3D CNN layer, see `tf.nn.conv3d `__. - - Parameters - ---------- - layer : :class:`Layer` - Previous layer. - act : activation function - The activation function of this layer. - shape : tuple of int - Shape of the filters: (filter_depth, filter_height, filter_width, in_channels, out_channels). - strides : tuple of int - The sliding window strides for corresponding input dimensions. - Must be in the same order as the shape dimension. - padding : str - The padding algorithm type: "SAME" or "VALID". - W_init : initializer - The initializer for the weight matrix. - b_init : initializer - The initializer for the bias vector. - W_init_args : dictionary - The arguments for the weight matrix initializer. - b_init_args : dictionary - The arguments for the bias vector initializer. - name : str - A unique layer name. - - Examples - --------- - >>> x = tf.placeholder(tf.float32, (None, 100, 100, 100, 3)) - >>> n = tl.layers.InputLayer(x, name='in3') - >>> n = tl.layers.Conv3dLayer(n, shape=(2, 2, 2, 3, 32), strides=(1, 2, 2, 2, 1)) - ... [None, 50, 50, 50, 32] - """ - - def __init__( - self, - prev_layer, - act=tf.identity, - shape=(2, 2, 2, 3, 32), - strides=(1, 2, 2, 2, 1), - padding='SAME', - W_init=tf.truncated_normal_initializer(stddev=0.02), - b_init=tf.constant_initializer(value=0.0), - W_init_args=None, - b_init_args=None, - name='cnn3d_layer', - ): - if W_init_args is None: - W_init_args = {} - if b_init_args is None: - b_init_args = {} - - Layer.__init__(self, prev_layer=prev_layer, name=name) - self.inputs = prev_layer.outputs - if act is None: - act = tf.identity - logging.info("Conv3dLayer %s: shape:%s strides:%s pad:%s act:%s" % (self.name, str(shape), str(strides), padding, act.__name__)) - - with tf.variable_scope(name): - # W = tf.Variable(W_init(shape=shape, **W_init_args), name='W_conv') - # b = tf.Variable(b_init(shape=[shape[-1]], **b_init_args), name='b_conv') - W = tf.get_variable(name='W_conv3d', shape=shape, initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args) - b = tf.get_variable(name='b_conv3d', shape=(shape[-1]), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args) - self.outputs = act(tf.nn.conv3d(self.inputs, W, strides=strides, padding=padding, name=None) + b) - - # self.outputs = act( tf.nn.conv3d(self.inputs, W, strides=strides, padding=padding, name=None) + b ) - - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - self.all_layers.append(self.outputs) - self.all_params.extend([W, b]) - - -class DeConv3dLayer(Layer): - """The :class:`DeConv3dLayer` class is deconvolutional 3D layer, see `tf.nn.conv3d_transpose `__. - - Parameters - ---------- - layer : :class:`Layer` - Previous layer. - act : activation function - The activation function of this layer. - shape : tuple of int - The shape of the filters: (depth, height, width, output_channels, in_channels). - The filter's in_channels dimension must match that of value. - output_shape : tuple of int - The output shape of the deconvolution. - strides : tuple of int - The sliding window strides for corresponding input dimensions. - padding : str - The padding algorithm type: "SAME" or "VALID". - W_init : initializer - The initializer for the weight matrix. - b_init : initializer - The initializer for the bias vector. - W_init_args : dictionary - The arguments for the weight matrix initializer. - b_init_args : dictionary - The arguments for the bias vector initializer. - name : str - A unique layer name. - - """ - - def __init__( - self, - prev_layer, - act=tf.identity, - shape=(2, 2, 2, 128, 256), - output_shape=(1, 12, 32, 32, 128), - strides=(1, 2, 2, 2, 1), - padding='SAME', - W_init=tf.truncated_normal_initializer(stddev=0.02), - b_init=tf.constant_initializer(value=0.0), - W_init_args=None, - b_init_args=None, - name='decnn3d_layer', - ): - if W_init_args is None: - W_init_args = {} - if b_init_args is None: - b_init_args = {} - - Layer.__init__(self, prev_layer=prev_layer, name=name) - self.inputs = prev_layer.outputs - if act is None: - act = tf.identity - logging.info("DeConv3dLayer %s: shape:%s out_shape:%s strides:%s pad:%s act:%s" % (self.name, str(shape), str(output_shape), str(strides), padding, - act.__name__)) - - with tf.variable_scope(name): - W = tf.get_variable(name='W_deconv3d', shape=shape, initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args) - b = tf.get_variable(name='b_deconv3d', shape=(shape[-2]), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args) - - self.outputs = act(tf.nn.conv3d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding) + b) - - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - self.all_layers.append(self.outputs) - self.all_params.extend([W, b]) - - -class UpSampling2dLayer(Layer): - """The :class:`UpSampling2dLayer` class is a up-sampling 2D layer, see `tf.image.resize_images `__. - - Parameters - ---------- - layer : :class:`Layer` - Previous layer with 4-D Tensor of the shape (batch, height, width, channels) or 3-D Tensor of the shape (height, width, channels). - size : tuple of int/float - (height, width) scale factor or new size of height and width. - is_scale : boolean - If True (default), the `size` is a scale factor; otherwise, the `size` is the numbers of pixels of height and width. - method : int - The resize method selected through the index. Defaults index is 0 which is ResizeMethod.BILINEAR. - - Index 0 is ResizeMethod.BILINEAR, Bilinear interpolation. - - Index 1 is ResizeMethod.NEAREST_NEIGHBOR, Nearest neighbor interpolation. - - Index 2 is ResizeMethod.BICUBIC, Bicubic interpolation. - - Index 3 ResizeMethod.AREA, Area interpolation. - align_corners : boolean - If True, align the corners of the input and output. Default is False. - name : str - A unique layer name. - - """ - - def __init__( - self, - prev_layer, - size, - is_scale=True, - method=0, - align_corners=False, - name='upsample2d_layer', - ): - Layer.__init__(self, prev_layer=prev_layer, name=name) - self.inputs = prev_layer.outputs - if len(self.inputs.get_shape()) == 3: - if is_scale: - size_h = size[0] * int(self.inputs.get_shape()[0]) - size_w = size[1] * int(self.inputs.get_shape()[1]) - size = [int(size_h), int(size_w)] - elif len(self.inputs.get_shape()) == 4: - if is_scale: - size_h = size[0] * int(self.inputs.get_shape()[1]) - size_w = size[1] * int(self.inputs.get_shape()[2]) - size = [int(size_h), int(size_w)] - else: - raise Exception("Donot support shape %s" % self.inputs.get_shape()) - logging.info("UpSampling2dLayer %s: is_scale:%s size:%s method:%d align_corners:%s" % (name, is_scale, size, method, align_corners)) - with tf.variable_scope(name): - try: - self.outputs = tf.image.resize_images(self.inputs, size=size, method=method, align_corners=align_corners) - except Exception: # for TF 0.10 - self.outputs = tf.image.resize_images(self.inputs, new_height=size[0], new_width=size[1], method=method, align_corners=align_corners) - - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - self.all_layers.append(self.outputs) - - -class DownSampling2dLayer(Layer): - """The :class:`DownSampling2dLayer` class is down-sampling 2D layer, see `tf.image.resize_images `__. - - Parameters - ---------- - layer : :class:`Layer` - Previous layer with 4-D Tensor in the shape of (batch, height, width, channels) or 3-D Tensor in the shape of (height, width, channels). - size : tuple of int/float - (height, width) scale factor or new size of height and width. - is_scale : boolean - If True (default), the `size` is the scale factor; otherwise, the `size` are numbers of pixels of height and width. - method : int - The resize method selected through the index. Defaults index is 0 which is ResizeMethod.BILINEAR. - - Index 0 is ResizeMethod.BILINEAR, Bilinear interpolation. - - Index 1 is ResizeMethod.NEAREST_NEIGHBOR, Nearest neighbor interpolation. - - Index 2 is ResizeMethod.BICUBIC, Bicubic interpolation. - - Index 3 ResizeMethod.AREA, Area interpolation. - align_corners : boolean - If True, exactly align all 4 corners of the input and output. Default is False. - name : str - A unique layer name. - - """ - - def __init__( - self, - prev_layer, - size, - is_scale=True, - method=0, - align_corners=False, - name='downsample2d_layer', - ): - Layer.__init__(self, prev_layer=prev_layer, name=name) - self.inputs = prev_layer.outputs - if len(self.inputs.get_shape()) == 3: - if is_scale: - size_h = size[0] * int(self.inputs.get_shape()[0]) - size_w = size[1] * int(self.inputs.get_shape()[1]) - size = [int(size_h), int(size_w)] - elif len(self.inputs.get_shape()) == 4: - if is_scale: - size_h = size[0] * int(self.inputs.get_shape()[1]) - size_w = size[1] * int(self.inputs.get_shape()[2]) - size = [int(size_h), int(size_w)] - else: - raise Exception("Donot support shape %s" % self.inputs.get_shape()) - logging.info("DownSampling2dLayer %s: is_scale:%s size:%s method:%d, align_corners:%s" % (name, is_scale, size, method, align_corners)) - with tf.variable_scope(name): - try: - self.outputs = tf.image.resize_images(self.inputs, size=size, method=method, align_corners=align_corners) - except Exception: # for TF 0.10 - self.outputs = tf.image.resize_images(self.inputs, new_height=size[0], new_width=size[1], method=method, align_corners=align_corners) - - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - self.all_layers.append(self.outputs) - - -class DeformableConv2d(Layer): - """The :class:`DeformableConv2d` class is a 2D - `Deformable Convolutional Networks `__. - - Parameters - ---------- - layer : :class:`Layer` - Previous layer. - offset_layer : :class:`Layer` - To predict the offset of convolution operations. - The output shape is (batchsize, input height, input width, 2*(number of element in the convolution kernel)) - e.g. if apply a 3*3 kernel, the number of the last dimension should be 18 (2*3*3) - n_filter : int - The number of filters. - filter_size : tuple of int - The filter size (height, width). - act : activation function - The activation function of this layer. - W_init : initializer - The initializer for the weight matrix. - b_init : initializer or None - The initializer for the bias vector. If None, skip biases. - W_init_args : dictionary - The arguments for the weight matrix initializer. - b_init_args : dictionary - The arguments for the bias vector initializer. - name : str - A unique layer name. - - Examples - -------- - >>> net = tl.layers.InputLayer(x, name='input_layer') - >>> offset1 = tl.layers.Conv2d(net, 18, (3, 3), (1, 1), act=act, padding='SAME', name='offset1') - >>> net = tl.layers.DeformableConv2d(net, offset1, 32, (3, 3), act=act, name='deformable1') - >>> offset2 = tl.layers.Conv2d(net, 18, (3, 3), (1, 1), act=act, padding='SAME', name='offset2') - >>> net = tl.layers.DeformableConv2d(net, offset2, 64, (3, 3), act=act, name='deformable2') - - References - ---------- - - The deformation operation was adapted from the implementation in `here `__ - - Notes - ----- - - The padding is fixed to 'SAME'. - - The current implementation is not optimized for memory usgae. Please use it carefully. - - """ - - def __init__( - self, - prev_layer, - offset_layer=None, - # shape=(3, 3, 1, 100), - n_filter=32, - filter_size=(3, 3), - act=tf.identity, - name='deformable_conv_2d', - W_init=tf.truncated_normal_initializer(stddev=0.02), - b_init=tf.constant_initializer(value=0.0), - W_init_args=None, - b_init_args=None): - if tf.__version__ < "1.4": - raise Exception("Deformable CNN layer requires tensrflow 1.4 or higher version | current version %s" % tf.__version__) - - if W_init_args is None: - W_init_args = {} - if b_init_args is None: - b_init_args = {} - - def _to_bc_h_w(x, x_shape): - """(b, h, w, c) -> (b*c, h, w)""" - x = tf.transpose(x, [0, 3, 1, 2]) - x = tf.reshape(x, (-1, x_shape[1], x_shape[2])) - return x - - def _to_b_h_w_n_c(x, x_shape): - """(b*c, h, w, n) -> (b, h, w, n, c)""" - x = tf.reshape(x, (-1, x_shape[4], x_shape[1], x_shape[2], x_shape[3])) - x = tf.transpose(x, [0, 2, 3, 4, 1]) - return x - - def tf_flatten(a): - """Flatten tensor""" - return tf.reshape(a, [-1]) - - def _get_vals_by_coords(inputs, coords, idx, out_shape): - indices = tf.stack([idx, tf_flatten(coords[:, :, :, :, 0]), tf_flatten(coords[:, :, :, :, 1])], axis=-1) - vals = tf.gather_nd(inputs, indices) - vals = tf.reshape(vals, out_shape) - return vals - - def _tf_repeat(a, repeats): - """Tensorflow version of np.repeat for 1D""" - # https://github.com/tensorflow/tensorflow/issues/8521 - assert len(a.get_shape()) == 1 - - a = tf.expand_dims(a, -1) - a = tf.tile(a, [1, repeats]) - a = tf_flatten(a) - return a - - def _tf_batch_map_coordinates(inputs, coords): - """Batch version of tf_map_coordinates - - Only supports 2D feature maps - - Parameters - ---------- - inputs : ``tf.Tensor`` - shape = (b*c, h, w) - coords : ``tf.Tensor`` - shape = (b*c, h, w, n, 2) - - Returns - ------- - ``tf.Tensor`` - A Tensor with the shape as (b*c, h, w, n) - - """ - input_shape = inputs.get_shape() - coords_shape = coords.get_shape() - batch_channel = tf.shape(inputs)[0] - input_h = int(input_shape[1]) - input_w = int(input_shape[2]) - kernel_n = int(coords_shape[3]) - n_coords = input_h * input_w * kernel_n - - coords_lt = tf.cast(tf.floor(coords), 'int32') - coords_rb = tf.cast(tf.ceil(coords), 'int32') - coords_lb = tf.stack([coords_lt[:, :, :, :, 0], coords_rb[:, :, :, :, 1]], axis=-1) - coords_rt = tf.stack([coords_rb[:, :, :, :, 0], coords_lt[:, :, :, :, 1]], axis=-1) - - idx = _tf_repeat(tf.range(batch_channel), n_coords) - - vals_lt = _get_vals_by_coords(inputs, coords_lt, idx, (batch_channel, input_h, input_w, kernel_n)) - vals_rb = _get_vals_by_coords(inputs, coords_rb, idx, (batch_channel, input_h, input_w, kernel_n)) - vals_lb = _get_vals_by_coords(inputs, coords_lb, idx, (batch_channel, input_h, input_w, kernel_n)) - vals_rt = _get_vals_by_coords(inputs, coords_rt, idx, (batch_channel, input_h, input_w, kernel_n)) - - coords_offset_lt = coords - tf.cast(coords_lt, 'float32') - - vals_t = vals_lt + (vals_rt - vals_lt) * coords_offset_lt[:, :, :, :, 0] - vals_b = vals_lb + (vals_rb - vals_lb) * coords_offset_lt[:, :, :, :, 0] - mapped_vals = vals_t + (vals_b - vals_t) * coords_offset_lt[:, :, :, :, 1] - - return mapped_vals - - def _tf_batch_map_offsets(inputs, offsets, grid_offset): - """Batch map offsets into input - - Parameters - ------------ - inputs : ``tf.Tensor`` - shape = (b, h, w, c) - offsets: ``tf.Tensor`` - shape = (b, h, w, 2*n) - grid_offset: `tf.Tensor`` - Offset grids shape = (h, w, n, 2) - - Returns - ------- - ``tf.Tensor`` - A Tensor with the shape as (b, h, w, c) - - """ - input_shape = inputs.get_shape() - batch_size = tf.shape(inputs)[0] - kernel_n = int(int(offsets.get_shape()[3]) / 2) - input_h = input_shape[1] - input_w = input_shape[2] - channel = input_shape[3] - - # inputs (b, h, w, c) --> (b*c, h, w) - inputs = _to_bc_h_w(inputs, input_shape) - - # offsets (b, h, w, 2*n) --> (b, h, w, n, 2) - offsets = tf.reshape(offsets, (batch_size, input_h, input_w, kernel_n, 2)) - # offsets (b, h, w, n, 2) --> (b*c, h, w, n, 2) - # offsets = tf.tile(offsets, [channel, 1, 1, 1, 1]) - - coords = tf.expand_dims(grid_offset, 0) # grid_offset --> (1, h, w, n, 2) - coords = tf.tile(coords, [batch_size, 1, 1, 1, 1]) + offsets # grid_offset --> (b, h, w, n, 2) - - # clip out of bound - coords = tf.stack( - [ - tf.clip_by_value(coords[:, :, :, :, 0], 0.0, tf.cast(input_h - 1, 'float32')), - tf.clip_by_value(coords[:, :, :, :, 1], 0.0, tf.cast(input_w - 1, 'float32')) - ], - axis=-1) - coords = tf.tile(coords, [channel, 1, 1, 1, 1]) - - mapped_vals = _tf_batch_map_coordinates(inputs, coords) - # (b*c, h, w, n) --> (b, h, w, n, c) - mapped_vals = _to_b_h_w_n_c(mapped_vals, [batch_size, input_h, input_w, kernel_n, channel]) - - return mapped_vals - - Layer.__init__(self, prev_layer=[prev_layer, offset_layer], name=name) - self.inputs = prev_layer.outputs - self.offset_layer = offset_layer - if act is None: - act = tf.identity - logging.info("DeformableConv2d %s: n_filter: %d, filter_size: %s act:%s" % (self.name, n_filter, str(filter_size), act.__name__)) - - try: - pre_channel = int(prev_layer.outputs.get_shape()[-1]) - except Exception: # if pre_channel is ?, it happens when using Spatial Transformer Net - pre_channel = 1 - logging.info("[warnings] unknow input channels, set to 1") - shape = (filter_size[0], filter_size[1], pre_channel, n_filter) - - with tf.variable_scope(name): - offset = self.offset_layer.outputs - assert offset.get_shape()[-1] == 2 * shape[0] * shape[1] - - # Grid initialisation - input_h = int(self.inputs.get_shape()[1]) - input_w = int(self.inputs.get_shape()[2]) - kernel_n = shape[0] * shape[1] - initial_offsets = tf.stack(tf.meshgrid(tf.range(shape[0]), tf.range(shape[1]), indexing='ij')) # initial_offsets --> (kh, kw, 2) - initial_offsets = tf.reshape(initial_offsets, (-1, 2)) # initial_offsets --> (n, 2) - initial_offsets = tf.expand_dims(initial_offsets, 0) # initial_offsets --> (1, n, 2) - initial_offsets = tf.expand_dims(initial_offsets, 0) # initial_offsets --> (1, 1, n, 2) - initial_offsets = tf.tile(initial_offsets, [input_h, input_w, 1, 1]) # initial_offsets --> (h, w, n, 2) - initial_offsets = tf.cast(initial_offsets, 'float32') - grid = tf.meshgrid( - tf.range(-int((shape[0] - 1) / 2.0), int(input_h - int((shape[0] - 1) / 2.0)), 1), - tf.range(-int((shape[1] - 1) / 2.0), int(input_w - int((shape[1] - 1) / 2.0)), 1), - indexing='ij') - - grid = tf.stack(grid, axis=-1) - grid = tf.cast(grid, 'float32') # grid --> (h, w, 2) - grid = tf.expand_dims(grid, 2) # grid --> (h, w, 1, 2) - grid = tf.tile(grid, [1, 1, kernel_n, 1]) # grid --> (h, w, n, 2) - grid_offset = grid + initial_offsets # grid_offset --> (h, w, n, 2) - - input_deform = _tf_batch_map_offsets(self.inputs, offset, grid_offset) - - W = tf.get_variable( - name='W_deformableconv2d', - shape=[1, 1, shape[0] * shape[1], shape[-2], shape[-1]], - initializer=W_init, - dtype=LayersConfig.tf_dtype, - **W_init_args) - - if b_init: - b = tf.get_variable(name='b_deformableconv2d', shape=(shape[-1]), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args) - self.outputs = tf.reshape( - act(tf.nn.conv3d(input_deform, W, strides=[1, 1, 1, 1, 1], padding='VALID', name=None) + b), - (tf.shape(self.inputs)[0], input_h, input_w, shape[-1])) - else: - self.outputs = tf.reshape( - act(tf.nn.conv3d(input_deform, W, strides=[1, 1, 1, 1, 1], padding='VALID', name=None)), - (tf.shape(self.inputs)[0], input_h, input_w, shape[-1])) - - # fixed - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - - # add offset_layer properties - # offset_params = [osparam for osparam in offset_layer.all_params if osparam not in layer.all_params] - # offset_layers = [oslayer for oslayer in offset_layer.all_layers if oslayer not in layer.all_layers] - # - # self.all_params.extend(list(offset_params)) - # self.all_layers.extend(list(offset_layers)) - # self.all_drop.update(dict(offset_layer.all_drop)) - - # this layer - self.all_layers.append(self.outputs) - if b_init: - self.all_params.extend([W, b]) - else: - self.all_params.append(W) - - -def atrous_conv1d( - layer, - n_filter=32, - filter_size=2, - stride=1, - dilation=1, - act=tf.identity, - padding='SAME', - data_format='NWC', - W_init=tf.truncated_normal_initializer(stddev=0.02), - b_init=tf.constant_initializer(value=0.0), - W_init_args=None, - b_init_args=None, - name='conv1d', -): - """Simplified version of :class:`AtrousConv1dLayer`. - - Parameters - ---------- - layer : :class:`Layer` - Previous layer. - n_filter : int - The number of filters. - filter_size : int - The filter size. - stride : tuple of int - The strides: (height, width). - dilation : int - The filter dilation size. - act : activation function - The activation function of this layer. - padding : str - The padding algorithm type: "SAME" or "VALID". - data_format : str - Default is 'NWC' as it is a 1D CNN. - W_init : initializer - The initializer for the weight matrix. - b_init : initializer or None - The initializer for the bias vector. If None, skip biases. - W_init_args : dictionary - The arguments for the weight matrix initializer. - b_init_args : dictionary - The arguments for the bias vector initializer. - name : str - A unique layer name. - - Returns - ------- - :class:`Layer` - A :class:`AtrousConv1dLayer` object - - """ - - if W_init_args is None: - W_init_args = {} - if b_init_args is None: - b_init_args = {} - - return Conv1dLayer( - prev_layer=layer, - act=act, - shape=(filter_size, int(layer.outputs.get_shape()[-1]), n_filter), - stride=stride, - padding=padding, - dilation_rate=dilation, - data_format=data_format, - W_init=W_init, - b_init=b_init, - W_init_args=W_init_args, - b_init_args=b_init_args, - name=name, - ) - - -class AtrousConv2dLayer(Layer): - """The :class:`AtrousConv2dLayer` class is 2D atrous convolution (a.k.a. convolution with holes or dilated - convolution) 2D layer, see `tf.nn.atrous_conv2d `__. - - Parameters - ---------- - layer : :class:`Layer` - Previous layer with a 4D output tensor in the shape of (batch, height, width, channels). - n_filter : int - The number of filters. - filter_size : tuple of int - The filter size: (height, width). - rate : int - The stride that we sample input values in the height and width dimensions. - This equals the rate that we up-sample the filters by inserting zeros across the height and width dimensions. - In the literature, this parameter is sometimes mentioned as input stride or dilation. - act : activation function - The activation function of this layer. - padding : str - The padding algorithm type: "SAME" or "VALID". - W_init : initializer - The initializer for the weight matrix. - b_init : initializer or None - The initializer for the bias vector. If None, skip biases. - W_init_args : dictionary - The arguments for the weight matrix initializer. - b_init_args : dictionary - The arguments for the bias vector initializer. - name : str - A unique layer name. - - """ - - def __init__(self, - prev_layer, - n_filter=32, - filter_size=(3, 3), - rate=2, - act=tf.identity, - padding='SAME', - W_init=tf.truncated_normal_initializer(stddev=0.02), - b_init=tf.constant_initializer(value=0.0), - W_init_args=None, - b_init_args=None, - name='atrou2d'): - if W_init_args is None: - W_init_args = {} - if b_init_args is None: - b_init_args = {} - - Layer.__init__(self, prev_layer=prev_layer, name=name) - self.inputs = prev_layer.outputs - if act is None: - act = tf.identity - logging.info("AtrousConv2dLayer %s: n_filter:%d filter_size:%s rate:%d pad:%s act:%s" % (self.name, n_filter, filter_size, rate, padding, act.__name__)) - with tf.variable_scope(name): - shape = [filter_size[0], filter_size[1], int(self.inputs.get_shape()[-1]), n_filter] - filters = tf.get_variable(name='filter', shape=shape, initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args) - if b_init: - b = tf.get_variable(name='b', shape=(n_filter), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args) - self.outputs = act(tf.nn.atrous_conv2d(self.inputs, filters, rate, padding) + b) - else: - self.outputs = act(tf.nn.atrous_conv2d(self.inputs, filters, rate, padding)) - - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - self.all_layers.append(self.outputs) - if b_init: - self.all_params.extend([filters, b]) - else: - self.all_params.append(filters) - - -class _SeparableConv2dLayer(Layer): # TODO - """The :class:`SeparableConv2dLayer` class is 2D convolution with separable filters, see `tf.layers.separable_conv2d `__. - - This layer has not been fully tested yet. - - Parameters - ---------- - layer : :class:`Layer` - Previous layer with a 4D output tensor in the shape of [batch, height, width, channels]. - n_filter : int - The number of filters. - filter_size : tuple of int - The filter size (height, width). - strides : tuple of int - The strides (height, width). - This can be a single integer if you want to specify the same value for all spatial dimensions. - Specifying any stride value != 1 is incompatible with specifying any dilation_rate value != 1. - padding : str - The type of padding algorithm: "SAME" or "VALID" - data_format : str - One of channels_last (Default) or channels_first. - The order must match the input dimensions. - channels_last corresponds to inputs with shapedata_format = 'NWHC' (batch, width, height, channels) while - channels_first corresponds to inputs with shape [batch, channels, width, height]. - dilation_rate : int or tuple of ints - The dilation rate of the convolution. - It can be a single integer if you want to specify the same value for all spatial dimensions. - Currently, specifying any dilation_rate value != 1 is incompatible with specifying any stride value != 1. - depth_multiplier : int - The number of depthwise convolution output channels for each input channel. - The total number of depthwise convolution output channels will be equal to num_filters_in * depth_multiplier. - act : activation function - The activation function of this layer. - use_bias : boolean - Whether the layer uses a bias - depthwise_initializer : initializer - The initializer for the depthwise convolution kernel. - pointwise_initializer : initializer - The initializer for the pointwise convolution kernel. - bias_initializer : initializer - The initializer for the bias vector. If None, skip bias. - depthwise_regularizer : regularizer - Optional regularizer for the depthwise convolution kernel. - pointwise_regularizer : regularizer - Optional regularizer for the pointwise convolution kernel. - bias_regularizer : regularizer - Optional regularizer for the bias vector. - activity_regularizer : regularizer - Regularizer function for the output. - name : str - A unique layer name. - - """ - - def __init__(self, - prev_layer, - n_filter, - filter_size=5, - strides=(1, 1), - padding='valid', - data_format='channels_last', - dilation_rate=(1, 1), - depth_multiplier=1, - act=tf.identity, - use_bias=True, - depthwise_initializer=None, - pointwise_initializer=None, - bias_initializer=tf.zeros_initializer, - depthwise_regularizer=None, - pointwise_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - name='atrou2d'): - Layer.__init__(self, prev_layer=prev_layer, name=name) - self.inputs = prev_layer.outputs - if tf.__version__ > "0.12.1": - raise Exception("This layer only supports for TF 1.0+") - - bias_initializer = bias_initializer() - - logging.info("SeparableConv2dLayer %s: n_filter:%d filter_size:%s strides:%s padding:%s dilation_rate:%s depth_multiplier:%s act:%s" % - (self.name, n_filter, filter_size, str(strides), padding, str(dilation_rate), str(depth_multiplier), act.__name__)) - - with tf.variable_scope(name) as vs: - self.outputs = tf.layers.separable_conv2d( - self.inputs, - filters=n_filter, - kernel_size=filter_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - depth_multiplier=depth_multiplier, - activation=act, - use_bias=use_bias, - depthwise_initializer=depthwise_initializer, - pointwise_initializer=pointwise_initializer, - bias_initializer=bias_initializer, - depthwise_regularizer=depthwise_regularizer, - pointwise_regularizer=pointwise_regularizer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - ) - # trainable=True, name=None, reuse=None) - - variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) - - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - self.all_layers.append(self.outputs) - self.all_params.extend(variables) - - -def deconv2d_bilinear_upsampling_initializer(shape): - """Returns the initializer that can be passed to DeConv2dLayer for initializ ingthe - weights in correspondence to channel-wise bilinear up-sampling. - Used in segmentation approaches such as [FCN](https://arxiv.org/abs/1605.06211) - - Parameters - ---------- - shape : tuple of int - The shape of the filters, [height, width, output_channels, in_channels]. - It must match the shape passed to DeConv2dLayer. - - Returns - ------- - ``tf.constant_initializer`` - A constant initializer with weights set to correspond to per channel bilinear upsampling - when passed as W_int in DeConv2dLayer - - Examples - -------- - - Upsampling by a factor of 2, ie e.g 100->200 - >>> rescale_factor = 2 - >>> filter_size = (2 * rescale_factor - rescale_factor % 2) #Corresponding bilinear filter size - >>> num_in_channels = 3 - >>> num_out_channels = 3 - >>> deconv_filter_shape = (filter_size, filter_size, num_out_channels, num_in_channels) - >>> x = tf.placeholder(tf.float32, (1, imsize, imsize, num_channels)) - >>> net = tl.layers.InputLayer(x, name='input_layer') - >>> bilinear_init = deconv2d_bilinear_upsampling_initializer(shape=filter_shape) - >>> net = tl.layers.DeConv2dLayer(net, - ... shape=filter_shape, - ... output_shape=(1, imsize*rescale_factor, imsize*rescale_factor, num_out_channels), - ... strides=(1, rescale_factor, rescale_factor, 1), - ... W_init=bilinear_init, - ... padding='SAME', - ... act=tf.identity, name='g/h1/decon2d') - - """ - if shape[0] != shape[1]: - raise Exception('deconv2d_bilinear_upsampling_initializer only supports symmetrical filter sizes') - if shape[3] < shape[2]: - raise Exception('deconv2d_bilinear_upsampling_initializer behaviour is not defined for num_in_channels < num_out_channels ') - - filter_size = shape[0] - num_out_channels = shape[2] - num_in_channels = shape[3] - - # Create bilinear filter kernel as numpy array - bilinear_kernel = np.zeros([filter_size, filter_size], dtype=np.float32) - scale_factor = (filter_size + 1) // 2 - if filter_size % 2 == 1: - center = scale_factor - 1 - else: - center = scale_factor - 0.5 - for x in range(filter_size): - for y in range(filter_size): - bilinear_kernel[x, y] = (1 - abs(x - center) / scale_factor) * \ - (1 - abs(y - center) / scale_factor) - weights = np.zeros((filter_size, filter_size, num_out_channels, num_in_channels)) - for i in range(num_out_channels): - weights[:, :, i, i] = bilinear_kernel - - # assign numpy array to constant_initalizer and pass to get_variable - bilinear_weights_init = tf.constant_initializer(value=weights, dtype=LayersConfig.tf_dtype) # dtype=tf.float32) - return bilinear_weights_init - - -def conv1d( - layer, - n_filter=32, - filter_size=5, - stride=1, - dilation_rate=1, - act=tf.identity, - padding='SAME', - data_format="NWC", - W_init=tf.truncated_normal_initializer(stddev=0.02), - b_init=tf.constant_initializer(value=0.0), - W_init_args=None, - b_init_args=None, - name='conv1d', -): - """Simplified version of :class:`Conv1dLayer`. - - Parameters - ---------- - layer : :class:`Layer` - Previous layer - n_filter : int - The number of filters - filter_size : int - The filter size - stride : int - The stride step - dilation_rate : int - Specifying the dilation rate to use for dilated convolution. - act : activation function - The function that is applied to the layer activations - padding : str - The padding algorithm type: "SAME" or "VALID". - data_format : str - Default is 'NWC' as it is a 1D CNN. - W_init : initializer - The initializer for the weight matrix. - b_init : initializer or None - The initializer for the bias vector. If None, skip biases. - W_init_args : dictionary - The arguments for the weight matrix initializer. - b_init_args : dictionary - The arguments for the bias vector initializer. - name : str - A unique layer name - - Returns - ------- - :class:`Layer` - A :class:`Conv1dLayer` object. - - Examples - --------- - >>> x = tf.placeholder(tf.float32, (batch_size, width)) - >>> y_ = tf.placeholder(tf.int64, shape=(batch_size,)) - >>> n = InputLayer(x, name='in') - >>> n = ReshapeLayer(n, (-1, width, 1), name='rs') - >>> n = Conv1d(n, 64, 3, 1, act=tf.nn.relu, name='c1') - >>> n = MaxPool1d(n, 2, 2, padding='valid', name='m1') - >>> n = Conv1d(n, 128, 3, 1, act=tf.nn.relu, name='c2') - >>> n = MaxPool1d(n, 2, 2, padding='valid', name='m2') - >>> n = Conv1d(n, 128, 3, 1, act=tf.nn.relu, name='c3') - >>> n = MaxPool1d(n, 2, 2, padding='valid', name='m3') - >>> n = FlattenLayer(n, name='f') - >>> n = DenseLayer(n, 500, tf.nn.relu, name='d1') - >>> n = DenseLayer(n, 100, tf.nn.relu, name='d2') - >>> n = DenseLayer(n, 2, tf.identity, name='o') - - """ - - if W_init_args is None: - W_init_args = {} - if b_init_args is None: - b_init_args = {} - - return Conv1dLayer( - prev_layer=layer, - act=act, - shape=(filter_size, int(layer.outputs.get_shape()[-1]), n_filter), - stride=stride, - dilation_rate=dilation_rate, - padding=padding, - data_format=data_format, - W_init=W_init, - b_init=b_init, - W_init_args=W_init_args, - b_init_args=b_init_args, - name=name, - ) - - -# TODO: DeConv1d - - -def conv2d( - layer, - n_filter=32, - filter_size=(3, 3), - strides=(1, 1), - act=tf.identity, - padding='SAME', - W_init=tf.truncated_normal_initializer(stddev=0.02), - b_init=tf.constant_initializer(value=0.0), - W_init_args=None, - b_init_args=None, - use_cudnn_on_gpu=None, - data_format=None, - name='conv2d', -): - """Simplified version of :class:`Conv2dLayer`. - - Parameters - ---------- - layer : :class:`Layer` - Previous layer. - n_filter : int - The number of filters. - filter_size : tuple of int - The filter size (height, width). - strides : tuple of int - The sliding window strides of corresponding input dimensions. - It must be in the same order as the ``shape`` parameter. - act : activation function - The activation function of this layer. - padding : str - The padding algorithm type: "SAME" or "VALID". - W_init : initializer - The initializer for the the weight matrix. - b_init : initializer or None - The initializer for the the bias vector. If None, skip biases. - W_init_args : dictionary - The arguments for the weight matrix initializer. - b_init_args : dictionary - The arguments for the bias vector initializer. - use_cudnn_on_gpu : bool - Default is False. - data_format : str - "NHWC" or "NCHW", default is "NHWC". - name : str - A unique layer name. - - Returns - ------- - :class:`Layer` - A :class:`Conv2dLayer` object. - - Examples - -------- - >>> net = InputLayer(x, name='inputs') - >>> net = Conv2d(net, 64, (3, 3), act=tf.nn.relu, name='conv1_1') - >>> net = Conv2d(net, 64, (3, 3), act=tf.nn.relu, name='conv1_2') - >>> net = MaxPool2d(net, (2, 2), name='pool1') - >>> net = Conv2d(net, 128, (3, 3), act=tf.nn.relu, name='conv2_1') - >>> net = Conv2d(net, 128, (3, 3), act=tf.nn.relu, name='conv2_2') - >>> net = MaxPool2d(net, (2, 2), name='pool2') - - """ - - if W_init_args is None: - W_init_args = {} - if b_init_args is None: - b_init_args = {} - - if len(strides) != 2: - raise ValueError("len(strides) should be 2, Conv2d and Conv2dLayer are different.") - - try: - pre_channel = int(layer.outputs.get_shape()[-1]) - except Exception: # if pre_channel is ?, it happens when using Spatial Transformer Net - pre_channel = 1 - logging.info("[warnings] unknow input channels, set to 1") - return Conv2dLayer( - layer, - act=act, - shape=(filter_size[0], filter_size[1], pre_channel, n_filter), # 32 features for each 5x5 patch - strides=(1, strides[0], strides[1], 1), - padding=padding, - W_init=W_init, - W_init_args=W_init_args, - b_init=b_init, - b_init_args=b_init_args, - use_cudnn_on_gpu=use_cudnn_on_gpu, - data_format=data_format, - name=name) - - -def deconv2d(layer, - n_filter=32, - filter_size=(3, 3), - out_size=(30, 30), - strides=(2, 2), - padding='SAME', - batch_size=None, - act=tf.identity, - W_init=tf.truncated_normal_initializer(stddev=0.02), - b_init=tf.constant_initializer(value=0.0), - W_init_args=None, - b_init_args=None, - name='decnn2d'): - """Simplified version of :class:`DeConv2dLayer`. - - Parameters - ---------- - layer : :class:`Layer` - Previous layer. - n_filter : int - The number of filters. - filter_size : tuple of int - The filter size (height, width). - out_size : tuple of int - Require if TF version < 1.3, (height, width) of output. - strides : tuple of int - The stride step (height, width). - padding : str - The padding algorithm type: "SAME" or "VALID". - batch_size : int - Require if TF version < 1.3, int or None. - If None, try to find the `batch_size` from the first dim of net.outputs (you should define the `batch_size` in the input placeholder). - act : activation function - The activation function of this layer. - W_init : initializer - The initializer for the weight matrix. - b_init : initializer or None - The initializer for the bias vector. If None, skip biases. - W_init_args : dictionary - The arguments for the weight matrix initializer. - b_init_args : dictionary - The arguments for the bias vector initializer. - name : str - A unique layer name. - - Returns - ------- - :class:`Layer` - A :class:`DeConv2dLayer` object. - - """ - if W_init_args is None: - W_init_args = {} - if b_init_args is None: - b_init_args = {} - if act is None: - act = tf.identity - if len(strides) != 2: - raise ValueError("len(strides) should be 2, DeConv2d and DeConv2dLayer are different.") - if tf.__version__ > '1.3': - logging.info("DeConv2d %s: n_filters:%s strides:%s pad:%s act:%s" % (name, str(n_filter), str(strides), padding, act.__name__)) - inputs = layer.outputs - scope_name = tf.get_variable_scope().name - # if scope_name: - # whole_name = scope_name + '/' + name - # else: - # whole_name = name - net_new = Layer(name=name) #whole_name) - # with tf.name_scope(name): - with tf.variable_scope(name) as vs: - net_new.outputs = tf.contrib.layers.conv2d_transpose( - inputs=inputs, - num_outputs=n_filter, - kernel_size=filter_size, - stride=strides, - padding=padding, - activation_fn=act, - weights_initializer=W_init, - biases_initializer=b_init, - scope=name) - new_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) - net_new.all_layers = list(layer.all_layers) - net_new.all_params = list(layer.all_params) - net_new.all_drop = dict(layer.all_drop) - net_new.all_layers.extend([net_new.outputs]) - net_new.all_params.extend(new_variables) - return net_new - else: - if batch_size is None: - # batch_size = tf.shape(net.outputs)[0] - fixed_batch_size = layer.outputs.get_shape().with_rank_at_least(1)[0] - if fixed_batch_size.value: - batch_size = fixed_batch_size.value - else: - from tensorflow.python.ops import array_ops - batch_size = array_ops.shape(layer.outputs)[0] - return DeConv2dLayer( - prev_layer=layer, - act=act, - shape=(filter_size[0], filter_size[1], n_filter, int(layer.outputs.get_shape()[-1])), - output_shape=(batch_size, int(out_size[0]), int(out_size[1]), n_filter), - strides=(1, strides[0], strides[1], 1), - padding=padding, - W_init=W_init, - b_init=b_init, - W_init_args=W_init_args, - b_init_args=b_init_args, - name=name) - - -class DeConv3d(Layer): - """Simplified version of The :class:`DeConv3dLayer`, see `tf.contrib.layers.conv3d_transpose `__. - - Parameters - ---------- - layer : :class:`Layer` - Previous layer. - n_filter : int - The number of filters. - filter_size : tuple of int - The filter size (depth, height, width). - stride : tuple of int - The stride step (depth, height, width). - padding : str - The padding algorithm type: "SAME" or "VALID". - act : activation function - The activation function of this layer. - W_init : initializer - The initializer for the weight matrix. - b_init : initializer or None - The initializer for the bias vector. If None, skip bias. - name : str - A unique layer name. - - """ - - def __init__(self, - prev_layer, - n_filter=32, - filter_size=(3, 3, 3), - strides=(2, 2, 2), - padding='SAME', - act=tf.identity, - W_init=tf.truncated_normal_initializer(stddev=0.02), - b_init=tf.constant_initializer(value=0.0), - name='decnn3d'): - Layer.__init__(self, prev_layer=prev_layer, name=name) - self.inputs = prev_layer.outputs - logging.info("DeConv3d %s: n_filters:%s strides:%s pad:%s act:%s" % (name, str(n_filter), str(strides), padding, act.__name__)) - - with tf.variable_scope(name) as vs: - self.outputs = tf.contrib.layers.conv3d_transpose( - num_outputs=n_filter, - kernel_size=filter_size, - stride=strides, - padding=padding, - activation_fn=act, - weights_initializer=W_init, - biases_initializer=b_init, - scope=name, - ) - new_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) - - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - self.all_layers.append(self.outputs) - self.all_params.extend(new_variables) - - -class DepthwiseConv2d(Layer): - """Separable/Depthwise Convolutional 2D layer, see `tf.nn.depthwise_conv2d `__. - - Input: - 4-D Tensor (batch, height, width, in_channels). - Output: - 4-D Tensor (batch, new height, new width, in_channels * depth_multiplier). - - Parameters - ------------ - layer : :class:`Layer` - Previous layer. - filter_size : tuple of int - The filter size (height, width). - stride : tuple of int - The stride step (height, width). - act : activation function - The activation function of this layer. - padding : str - The padding algorithm type: "SAME" or "VALID". - dilation_rate: tuple of 2 int - The dilation rate in which we sample input values across the height and width dimensions in atrous convolution. If it is greater than 1, then all values of strides must be 1. - depth_multiplier : int - The number of channels to expand to. - W_init : initializer - The initializer for the weight matrix. - b_init : initializer or None - The initializer for the bias vector. If None, skip bias. - W_init_args : dictionary - The arguments for the weight matrix initializer. - b_init_args : dictionary - The arguments for the bias vector initializer. - name : str - A unique layer name. - - Examples - --------- - >>> x = tf.placeholder(tf.float32, shape=[None, 28, 28, 1], name='x') - >>> net = InputLayer(x, name='in') - >>> net = Conv2d(net, 32, (3, 3), (1, 1), name='conv1') - >>> net = MaxPool2d(net, (2, 2), name='pool1') - >>> net = DepthwiseConv2d(net, (3, 3), (1, 1), act=tf.nn.relu, name='dethwise1') - >>> net = Conv2d(net, 64, (1, 1), (1, 1), act=tf.nn.relu, name='conv2') - - References - ----------- - - tflearn's `grouped_conv_2d `__ - - keras's `separableconv2d `__ - - """ # # https://zhuanlan.zhihu.com/p/31551004 https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/CNNs/MobileNet.py - - def __init__( - self, - prev_layer, - shape=(3, 3), - strides=(1, 1), - act=tf.identity, - padding='SAME', - dilation_rate=(1, 1), - depth_multiplier=1, - W_init=tf.truncated_normal_initializer(stddev=0.02), - b_init=tf.constant_initializer(value=0.0), - W_init_args=None, - b_init_args=None, - name='depthwise_conv2d', - ): - if W_init_args is None: - W_init_args = {} - if b_init_args is None: - b_init_args = {} - - Layer.__init__(self, prev_layer=prev_layer, name=name) - self.inputs = prev_layer.outputs - - if act is None: - act = tf.identity - - logging.info("DepthwiseConv2d %s: shape:%s strides:%s pad:%s act:%s" % (self.name, str(shape), str(strides), padding, act.__name__)) - try: - pre_channel = int(prev_layer.outputs.get_shape()[-1]) - except Exception: # if pre_channel is ?, it happens when using Spatial Transformer Net - pre_channel = 1 - logging.info("[warnings] unknow input channels, set to 1") - - shape = [shape[0], shape[1], pre_channel, depth_multiplier] - - if len(strides) == 2: - strides = [1, strides[0], strides[1], 1] - - assert len(strides) == 4, "len(strides) should be 4." - - with tf.variable_scope(name): - W = tf.get_variable( - name='W_depthwise2d', shape=shape, initializer=W_init, dtype=LayersConfig.tf_dtype, - **W_init_args) # [filter_height, filter_width, in_channels, depth_multiplier] - if b_init: - b = tf.get_variable( - name='b_depthwise2d', shape=(pre_channel * depth_multiplier), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args) - self.outputs = act(tf.nn.depthwise_conv2d(self.inputs, W, strides=strides, padding=padding, rate=dilation_rate) + b) - else: - self.outputs = act(tf.nn.depthwise_conv2d(self.inputs, W, strides=strides, padding=padding, rate=dilation_rate)) - - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - self.all_layers.append(self.outputs) - if b_init: - self.all_params.extend([W, b]) - else: - self.all_params.append(W) - - -class SeparableConv2d(Layer): - """The :class:`SeparableConv2d` class is a 2D depthwise separable convolutional layer, see `tf.layers.separable_conv2d `__. - - This layer performs a depthwise convolution that acts separately on channels, followed by a pointwise convolution that mixes channels. - While :class:`DepthwiseConv2d` performs depthwise convolution only, which allow us to add batch normalization between depthwise and pointwise convolution. - - Parameters - ------------ - layer : :class:`Layer` - Previous layer. - n_filter : int - The dimensionality of the output space (i.e. the number of filters in the convolution). - filter_size : tuple/list of 2 int - Specifying the spatial dimensions of the filters. Can be a single integer to specify the same value for all spatial dimensions. - strides : tuple/list of 2 int - Specifying the strides of the convolution. Can be a single integer to specify the same value for all spatial dimensions. Specifying any stride value != 1 is incompatible with specifying any dilation_rate value != 1. - padding : str - One of "valid" or "same" (case-insensitive). - data_format : str - One of channels_last (default) or channels_first. The ordering of the dimensions in the inputs. channels_last corresponds to inputs with shape (batch, height, width, channels) while channels_first corresponds to inputs with shape (batch, channels, height, width). - dilation_rate : integer or tuple/list of 2 int - Specifying the dilation rate to use for dilated convolution. Can be a single integer to specify the same value for all spatial dimensions. Currently, specifying any dilation_rate value != 1 is incompatible with specifying any stride value != 1. - depth_multiplier : int - The number of depthwise convolution output channels for each input channel. The total number of depthwise convolution output channels will be equal to num_filters_in * depth_multiplier. - depthwise_init : initializer - for the depthwise convolution kernel. - pointwise_init : initializer - For the pointwise convolution kernel. - b_init : initializer - For the bias vector. If None, ignore bias in the pointwise part only. - name : a str - A unique layer name. - - """ - - def __init__( - self, - prev_layer, - n_filter=100, - filter_size=(3, 3), - strides=(1, 1), - act=tf.identity, - padding='valid', - data_format='channels_last', - dilation_rate=(1, 1), - depth_multiplier=1, - # activation=None, - # use_bias=True, - depthwise_init=None, - pointwise_init=None, - b_init=tf.zeros_initializer(), - # depthwise_regularizer=None, - # pointwise_regularizer=None, - # bias_regularizer=None, - # activity_regularizer=None, - # depthwise_constraint=None, - # pointwise_constraint=None, - # W_init=tf.truncated_normal_initializer(stddev=0.1), - # b_init=tf.constant_initializer(value=0.0), - # W_init_args=None, - # b_init_args=None, - name='seperable', - ): - # if W_init_args is None: - # W_init_args = {} - # if b_init_args is None: - # b_init_args = {} - - Layer.__init__(self, prev_layer=prev_layer, name=name) - self.inputs = prev_layer.outputs - # print(self.name, n_filter, str(filter_size), str(strides), depth_multiplier, act.__name__) - logging.info("SeparableConv2d %s: n_filter:%d filter_size:%s filter_size:%s depth_multiplier:%d act:%s" \ - % (self.name, n_filter, str(filter_size), str(strides), depth_multiplier, act.__name__)) - - with tf.variable_scope(name) as vs: - self.outputs = tf.layers.separable_conv2d( - inputs=self.inputs, - filters=n_filter, - kernel_size=filter_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - depth_multiplier=depth_multiplier, - activation=act, - use_bias=(True if b_init is not None else False), - depthwise_initializer=depthwise_init, - pointwise_initializer=pointwise_init, - bias_initializer=b_init, - # depthwise_regularizer=None, - # pointwise_regularizer=None, - # bias_regularizer=None, - # activity_regularizer=None, - # depthwise_constraint=None, - # pointwise_constraint=None, - # bias_constraint=None, - trainable=True, - name=None) - new_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) - - self.all_layers.append(self.outputs) - self.all_params.extend(new_variables) - - -class GroupConv2d(Layer): - """The :class:`GroupConv2d` class is 2D grouped convolution, see `here `__. - - Parameters - -------------- - layer : :class:`Layer` - Previous layer. - n_filter : int - The number of filters. - filter_size : int - The filter size. - stride : int - The stride step. - n_group : int - The number of groups. - act : activation function - The activation function of this layer. - padding : str - The padding algorithm type: "SAME" or "VALID". - W_init : initializer - The initializer for the weight matrix. - b_init : initializer or None - The initializer for the bias vector. If None, skip biases. - W_init_args : dictionary - The arguments for the weight matrix initializer. - b_init_args : dictionary - The arguments for the bias vector initializer. - name : str - A unique layer name. - """ - - def __init__( - self, - prev_layer=None, - n_filter=32, - filter_size=(3, 3), - strides=(2, 2), - n_group=2, - act=tf.identity, - padding='SAME', - W_init=tf.truncated_normal_initializer(stddev=0.02), - b_init=tf.constant_initializer(value=0.0), - W_init_args=None, - b_init_args=None, - name='groupconv', - ): # Windaway - if W_init_args is None: - W_init_args = {} - if b_init_args is None: - b_init_args = {} - - Layer.__init__(self, prev_layer=prev_layer, name=name) - self.inputs = prev_layer.outputs - groupConv = lambda i, k: tf.nn.conv2d(i, k, strides=[1, strides[0], strides[1], 1], padding=padding) - channels = int(self.inputs.get_shape()[-1]) - - logging.info("GroupConv2d %s: n_filter:%d size:%s strides:%s n_group:%d pad:%s act:%s" % (self.name, n_filter, str(filter_size), str(strides), n_group, - padding, act.__name__)) - with tf.variable_scope(name): - We = tf.get_variable( - name='W', - shape=[filter_size[0], filter_size[1], channels / n_group, n_filter], - initializer=W_init, - dtype=LayersConfig.tf_dtype, - trainable=True, - **W_init_args) - if b_init: - bi = tf.get_variable(name='b', shape=n_filter, initializer=b_init, dtype=LayersConfig.tf_dtype, trainable=True, **b_init_args) - if n_group == 1: - conv = groupConv(self.inputs, We) - else: - inputGroups = tf.split(axis=3, num_or_size_splits=n_group, value=self.inputs) - weightsGroups = tf.split(axis=3, num_or_size_splits=n_group, value=We) - convGroups = [groupConv(i, k) for i, k in zip(inputGroups, weightsGroups)] - conv = tf.concat(axis=3, values=convGroups) - if b_init: - conv = tf.add(conv, bi, name='add') - - self.outputs = act(conv) - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - self.all_layers.append(self.outputs) - if b_init: - self.all_params.extend([We, bi]) - else: - self.all_params.append(We) - - -# Alias -AtrousConv1dLayer = atrous_conv1d -Conv1d = conv1d -Conv2d = conv2d -DeConv2d = deconv2d diff --git a/tensorlayer/layers/core.py b/tensorlayer/layers/core.py deleted file mode 100644 index 2fea3a6..0000000 --- a/tensorlayer/layers/core.py +++ /dev/null @@ -1,1395 +0,0 @@ -# -*- coding: utf-8 -*- - -import time -import numpy as np -import tensorflow as tf -from tensorflow.python.util.deprecation import deprecated -from .. import _logging as logging -from .. import files, iterate, utils, visualize - -__all__ = [ - 'LayersConfig', - 'TF_GRAPHKEYS_VARIABLES', - 'flatten_reshape', - 'clear_layers_name', - 'set_name_reuse', - 'initialize_rnn_state', - 'print_all_variables', - 'get_variables_with_name', - 'get_layers_with_name', - 'list_remove_repeat', - 'merge_networks', - 'initialize_global_variables', - 'Layer', - 'InputLayer', - 'OneHotInputLayer', - 'Word2vecEmbeddingInputlayer', - 'EmbeddingInputlayer', - 'AverageEmbeddingInputlayer', - 'DenseLayer', - 'ReconLayer', - 'DropoutLayer', - 'GaussianNoiseLayer', - 'DropconnectDenseLayer', -] - - -class LayersConfig: - tf_dtype = tf.float32 # TensorFlow DType - set_keep = {} # A dictionary for holding tf.placeholders - - -try: # For TF12 and later - TF_GRAPHKEYS_VARIABLES = tf.GraphKeys.GLOBAL_VARIABLES -except Exception: # For TF11 and before - TF_GRAPHKEYS_VARIABLES = tf.GraphKeys.VARIABLES - - -def flatten_reshape(variable, name='flatten'): - """Reshapes a high-dimension vector input. - [batch_size, mask_row, mask_col, n_mask] ---> [batch_size, mask_row x mask_col x n_mask] - - Parameters - ---------- - variable : TensorFlow variable or tensor - The variable or tensor to be flatten. - name : str - A unique layer name. - - Returns - ------- - Tensor - Flatten Tensor - - Examples - -------- - >>> W_conv2 = weight_variable([5, 5, 100, 32]) # 64 features for each 5x5 patch - >>> b_conv2 = bias_variable([32]) - >>> W_fc1 = weight_variable([7 * 7 * 32, 256]) - - >>> h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) - >>> h_pool2 = max_pool_2x2(h_conv2) - >>> h_pool2.get_shape()[:].as_list() = [batch_size, 7, 7, 32] - ... [batch_size, mask_row, mask_col, n_mask] - >>> h_pool2_flat = tl.layers.flatten_reshape(h_pool2) - ... [batch_size, mask_row * mask_col * n_mask] - >>> h_pool2_flat_drop = tf.nn.dropout(h_pool2_flat, keep_prob) - ... - - """ - dim = 1 - for d in variable.get_shape()[1:].as_list(): - dim *= d - return tf.reshape(variable, shape=[-1, dim], name=name) - - -@deprecated("2018-06-30", "TensorLayer relies on TensorFlow to check naming.") -def clear_layers_name(): - logging.warning('this method is DEPRECATED and has no effect, please remove it from your code.') - - -@deprecated("2018-06-30", "TensorLayer relies on TensorFlow to check name reusing.") -def set_name_reuse(enable=True): - logging.warning('this method is DEPRECATED and has no effect, please remove it from your code.') - - -def initialize_rnn_state(state, feed_dict=None): - """Returns the initialized RNN state. - The inputs are `LSTMStateTuple` or `State` of `RNNCells`, and an optional `feed_dict`. - - Parameters - ---------- - state : RNN state. - The TensorFlow's RNN state. - feed_dict : dictionary - Initial RNN state; if None, returns zero state. - - Returns - ------- - RNN state - The TensorFlow's RNN state. - - """ - try: # TF1.0 - LSTMStateTuple = tf.contrib.rnn.LSTMStateTuple - except Exception: - LSTMStateTuple = tf.nn.rnn_cell.LSTMStateTuple - - if isinstance(state, LSTMStateTuple): - c = state.c.eval(feed_dict=feed_dict) - h = state.h.eval(feed_dict=feed_dict) - return (c, h) - else: - new_state = state.eval(feed_dict=feed_dict) - return new_state - - -def print_all_variables(train_only=False): - """Print information of trainable or all variables, - without ``tl.layers.initialize_global_variables(sess)``. - - Parameters - ---------- - train_only : boolean - Whether print trainable variables only. - - If True, print the trainable variables. - - If False, print all variables. - - """ - # tvar = tf.trainable_variables() if train_only else tf.all_variables() - if train_only: - t_vars = tf.trainable_variables() - logging.info(" [*] printing trainable variables") - else: - try: # TF1.0+ - t_vars = tf.global_variables() - except Exception: # TF0.12 - t_vars = tf.all_variables() - logging.info(" [*] printing global variables") - for idx, v in enumerate(t_vars): - logging.info(" var {:3}: {:15} {}".format(idx, str(v.get_shape()), v.name)) - - -def get_variables_with_name(name=None, train_only=True, printable=False): - """Get a list of TensorFlow variables by a given name scope. - - Parameters - ---------- - name : str - Get the variables that contain this name. - train_only : boolean - If Ture, only get the trainable variables. - printable : boolean - If True, print the information of all variables. - - Returns - ------- - list of Tensor - A list of TensorFlow variables - - Examples - -------- - >>> dense_vars = tl.layers.get_variable_with_name('dense', True, True) - - """ - if name is None: - raise Exception("please input a name") - logging.info(" [*] geting variables with %s" % name) - # tvar = tf.trainable_variables() if train_only else tf.all_variables() - if train_only: - t_vars = tf.trainable_variables() - else: - try: # TF1.0+ - t_vars = tf.global_variables() - except Exception: # TF0.12 - t_vars = tf.all_variables() - - d_vars = [var for var in t_vars if name in var.name] - if printable: - for idx, v in enumerate(d_vars): - logging.info(" got {:3}: {:15} {}".format(idx, v.name, str(v.get_shape()))) - return d_vars - - -def get_layers_with_name(net, name="", printable=False): - """Get a list of layers' output in a network by a given name scope. - - Parameters - ----------- - net : :class:`Layer` - The last layer of the network. - name : str - Get the layers' output that contain this name. - printable : boolean - If True, print information of all the layers' output - - Returns - -------- - list of Tensor - A list of layers' output (TensorFlow tensor) - - Examples - --------- - >>> layers = tl.layers.get_layers_with_name(net, "CNN", True) - - """ - logging.info(" [*] geting layers with %s" % name) - - layers = [] - i = 0 - for layer in net.all_layers: - # logging.info(type(layer.name)) - if name in layer.name: - layers.append(layer) - if printable: - logging.info(" got {:3}: {:15} {}".format(i, layer.name, str(layer.get_shape()))) - i = i + 1 - return layers - - -def list_remove_repeat(x): - """Remove the repeated items in a list, and return the processed list. - You may need it to create merged layer like Concat, Elementwise and etc. - - Parameters - ---------- - x : list - Input - - Returns - ------- - list - A list that after removing it's repeated items - - Examples - ------- - >>> l = [2, 3, 4, 2, 3] - >>> l = list_remove_repeat(l) - ... [2, 3, 4] - - """ - y = [] - for i in x: - if not i in y: - y.append(i) - return y - - -def merge_networks(layers=None): - """Merge all parameters, layers and dropout probabilities to a :class:`Layer`. - The output of return network is the first network in the list. - - Parameters - ---------- - layers : list of :class:`Layer` - Merge all parameters, layers and dropout probabilities to the first layer in the list. - - Returns - -------- - :class:`Layer` - The network after merging all parameters, layers and dropout probabilities to the first network in the list. - - Examples - --------- - >>> n1 = ... - >>> n2 = ... - >>> n1 = tl.layers.merge_networks([n1, n2]) - - """ - if layers is None: - raise Exception("layers should be a list of TensorLayer's Layers.") - layer = layers[0] - - all_params = [] - all_layers = [] - all_drop = {} - for l in layers: - all_params.extend(l.all_params) - all_layers.extend(l.all_layers) - all_drop.update(l.all_drop) - - layer.all_params = list(all_params) - layer.all_layers = list(all_layers) - layer.all_drop = dict(all_drop) - - layer.all_layers = list_remove_repeat(layer.all_layers) - layer.all_params = list_remove_repeat(layer.all_params) - - return layer - - -def initialize_global_variables(sess): - """Initialize the global variables of TensorFlow. - - Run ``sess.run(tf.global_variables_initializer())`` for TF 0.12+ or - ``sess.run(tf.initialize_all_variables())`` for TF 0.11. - - Parameters - ---------- - sess : Session - TensorFlow session. - - """ - assert sess is not None - # try: # TF12+ - sess.run(tf.global_variables_initializer()) - # except: # TF11 - # sess.run(tf.initialize_all_variables()) - - -class Layer(object): - """ - The basic :class:`Layer` class represents a single layer of a neural network. It - should be subclassed when implementing new types of layers. - Because each layer can keep track of the layer(s) feeding into it, a - network's output :class:`Layer` instance can double as a handle to the full - network. - - Parameters - ---------- - inputs : :class:`Layer` instance - The `Layer` class feeding into this layer. - layer : :class:`Layer` or None - Previous layer (optional), for adding all properties of previous layer(s) to this layer. - name : str or None - A unique layer name. - - Methods - --------- - print_params(details=True, session=None) - Print all parameters of this network. - print_layers() - Print all outputs of all layers of this network. - count_params() - Return the number of parameters of this network. - - Examples - --------- - - Define model - >>> x = tf.placeholder("float32", [None, 100]) - >>> n = tl.layers.InputLayer(x, name='in') - >>> n = tl.layers.DenseLayer(n, 80, name='d1') - >>> n = tl.layers.DenseLayer(n, 80, name='d2') - - - Get information - >>> print(n) - ... Last layer is: DenseLayer (d2) [None, 80] - >>> n.print_layers() - ... [TL] layer 0: d1/Identity:0 (?, 80) float32 - ... [TL] layer 1: d2/Identity:0 (?, 80) float32 - >>> n.print_params(False) - ... [TL] param 0: d1/W:0 (100, 80) float32_ref - ... [TL] param 1: d1/b:0 (80,) float32_ref - ... [TL] param 2: d2/W:0 (80, 80) float32_ref - ... [TL] param 3: d2/b:0 (80,) float32_ref - ... [TL] num of params: 14560 - >>> n.count_params() - ... 14560 - - - Slicing the outputs - >>> n2 = n[:, :30] - >>> print(n2) - ... Last layer is: Layer (d2) [None, 30] - - - Iterating the outputs - >>> for l in n: - >>> print(l) - ... Tensor("d1/Identity:0", shape=(?, 80), dtype=float32) - ... Tensor("d2/Identity:0", shape=(?, 80), dtype=float32) - - """ - - def __init__(self, prev_layer=None, name=None): - if name is None: - raise ValueError('Layer must have a name.') - - scope_name = tf.get_variable_scope().name - if scope_name: - name = scope_name + '/' + name - self.name = name - - # get all properties of previous layer(s) - if isinstance(prev_layer, Layer): # 1. for normal layer have only 1 input i.e. DenseLayer - # Hint : list(), dict() is pass by value (shallow), without them, - # it is pass by reference. - self.all_layers = list(prev_layer.all_layers) - self.all_params = list(prev_layer.all_params) - self.all_drop = dict(prev_layer.all_drop) - elif isinstance(prev_layer, list): # 2. for layer have multiply inputs i.e. ConcatLayer - self.all_layers = list_remove_repeat(sum([l.all_layers for l in prev_layer], [])) - self.all_params = list_remove_repeat(sum([l.all_params for l in prev_layer], [])) - self.all_drop = dict(sum([list(l.all_drop.items()) for l in prev_layer], [])) - elif isinstance(prev_layer, tf.Tensor): - raise Exception("Please use InputLayer to convert Tensor/Placeholder to TL layer") - elif prev_layer is not None: - raise Exception("Unknown layer type %s" % type(prev_layer)) - - def print_params(self, details=True, session=None): - """Print all info of parameters in the network""" - for i, p in enumerate(self.all_params): - if details: - try: - # logging.info(" param {:3}: {:15} (mean: {:<18}, median: {:<18}, std: {:<18}) {}".format(i, str(p.eval().shape), p.eval().mean(), np.median(p.eval()), p.eval().std(), p.name)) - val = p.eval(session=session) - logging.info(" param {:3}: {:20} {:15} {} (mean: {:<18}, median: {:<18}, std: {:<18}) ".format( - i, p.name, str(val.shape), p.dtype.name, val.mean(), np.median(val), val.std())) - except Exception as e: - logging.info(str(e)) - raise Exception("Hint: print params details after tl.layers.initialize_global_variables(sess) or use network.print_params(False).") - else: - logging.info(" param {:3}: {:20} {:15} {}".format(i, p.name, str(p.get_shape()), p.dtype.name)) - logging.info(" num of params: %d" % self.count_params()) - - def print_layers(self): - """Print all info of layers in the network""" - for i, layer in enumerate(self.all_layers): - # logging.info(" layer %d: %s" % (i, str(layer))) - logging.info(" layer {:3}: {:20} {:15} {}".format(i, layer.name, str(layer.get_shape()), layer.dtype.name)) - - def count_params(self): - """Return the number of parameters in the network""" - n_params = 0 - for _i, p in enumerate(self.all_params): - n = 1 - # for s in p.eval().shape: - for s in p.get_shape(): - try: - s = int(s) - except Exception: - s = 1 - if s: - n = n * s - n_params = n_params + n - return n_params - - def __str__(self): - return " Last layer is: %s (%s) %s" % (self.__class__.__name__, self.name, self.outputs.get_shape().as_list()) - - def __getitem__(self, key): - net_new = Layer(name=self.name) - net_new.inputs = self.inputs - net_new.outputs = self.outputs[key] - - net_new.all_layers = list(self.all_layers[:-1]) - net_new.all_layers.append(net_new.outputs) - net_new.all_params = list(self.all_params) - net_new.all_drop = dict(self.all_drop) - return net_new - - def __setitem__(self, key, item): - # self.outputs[key] = item - raise NotImplementedError("%s: __setitem__" % self.name) - - def __delitem__(self, key): - raise NotImplementedError("%s: __delitem__" % self.name) - - def __iter__(self): - for x in self.all_layers: - yield x - - def __len__(self): - return len(self.all_layers) - - -class InputLayer(Layer): - """ - The :class:`InputLayer` class is the starting layer of a neural network. - - Parameters - ---------- - inputs : placeholder or tensor - The input of a network. - name : str - A unique layer name. - - """ - - def __init__(self, inputs=None, name='input'): - Layer.__init__(self, name=name) - logging.info("InputLayer %s: %s" % (self.name, inputs.get_shape())) - self.outputs = inputs - self.all_layers = [] - self.all_params = [] - self.all_drop = {} - - -class OneHotInputLayer(Layer): - """ - The :class:`OneHotInputLayer` class is the starting layer of a neural network, see ``tf.one_hot``. - - Parameters - ---------- - inputs : placeholder or tensor - The input of a network. - depth : None or int - If the input indices is rank N, the output will have rank N+1. The new axis is created at dimension `axis` (default: the new axis is appended at the end). - on_value : None or number - The value to represnt `ON`. If None, it will default to the value 1. - off_value : None or number - The value to represnt `OFF`. If None, it will default to the value 0. - axis : None or int - The axis. - dtype : None or TensorFlow dtype - The data type, None means tf.float32. - name : str - A unique layer name. - - Examples - --------- - >>> x = tf.placeholder(tf.int32, shape=[None]) - >>> net = tl.layers.OneHotInputLayer(x, depth=8, name='onehot') - ... (?, 8) - - """ - - def __init__(self, inputs=None, depth=None, on_value=None, off_value=None, axis=None, dtype=None, name='input'): - Layer.__init__(self, name=name) - logging.info("OneHotInputLayer %s: %s" % (self.name, inputs.get_shape())) - # assert depth != None, "depth is not given" - if depth is None: - logging.info(" [*] depth == None the number of output units is undefined") - self.outputs = tf.one_hot(inputs, depth, on_value=on_value, off_value=off_value, axis=axis, dtype=dtype) - self.all_layers = [] - self.all_params = [] - self.all_drop = {} - - -class Word2vecEmbeddingInputlayer(Layer): - """ - The :class:`Word2vecEmbeddingInputlayer` class is a fully connected layer. - For Word Embedding, words are input as integer index. - The output is the embedded word vector. - - Parameters - ---------- - inputs : placeholder or tensor - The input of a network. For word inputs, please use integer index format, 2D tensor : [batch_size, num_steps(num_words)] - train_labels : placeholder - For word labels. integer index format - vocabulary_size : int - The size of vocabulary, number of words - embedding_size : int - The number of embedding dimensions - num_sampled : int - The mumber of negative examples for NCE loss - nce_loss_args : dictionary - The arguments for tf.nn.nce_loss() - E_init : initializer - The initializer for initializing the embedding matrix - E_init_args : dictionary - The arguments for embedding initializer - nce_W_init : initializer - The initializer for initializing the nce decoder weight matrix - nce_W_init_args : dictionary - The arguments for initializing the nce decoder weight matrix - nce_b_init : initializer - The initializer for initializing of the nce decoder bias vector - nce_b_init_args : dictionary - The arguments for initializing the nce decoder bias vector - name : str - A unique layer name - - Attributes - ---------- - nce_cost : Tensor - The NCE loss. - outputs : Tensor - The embedding layer outputs. - normalized_embeddings : Tensor - Normalized embedding matrix. - - Examples - -------- - With TensorLayer : see ``tensorlayer/example/tutorial_word2vec_basic.py`` - - >>> batch_size = 8 - >>> train_inputs = tf.placeholder(tf.int32, shape=(batch_size)) - >>> train_labels = tf.placeholder(tf.int32, shape=(batch_size, 1)) - >>> net = tl.layers.Word2vecEmbeddingInputlayer(inputs=train_inputs, - ... train_labels=train_labels, vocabulary_size=1000, embedding_size=200, - ... num_sampled=64, name='word2vec') - ... (8, 200) - >>> cost = net.nce_cost - >>> train_params = net.all_params - >>> cost = net.nce_cost - >>> train_params = net.all_params - >>> train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize( - ... cost, var_list=train_params) - >>> normalized_embeddings = net.normalized_embeddings - - Without TensorLayer : see ``tensorflow/examples/tutorials/word2vec/word2vec_basic.py`` - - >>> train_inputs = tf.placeholder(tf.int32, shape=(batch_size)) - >>> train_labels = tf.placeholder(tf.int32, shape=(batch_size, 1)) - >>> embeddings = tf.Variable( - ... tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)) - >>> embed = tf.nn.embedding_lookup(embeddings, train_inputs) - >>> nce_weights = tf.Variable( - ... tf.truncated_normal([vocabulary_size, embedding_size], - ... stddev=1.0 / math.sqrt(embedding_size))) - >>> nce_biases = tf.Variable(tf.zeros([vocabulary_size])) - >>> cost = tf.reduce_mean( - ... tf.nn.nce_loss(weights=nce_weights, biases=nce_biases, - ... inputs=embed, labels=train_labels, - ... num_sampled=num_sampled, num_classes=vocabulary_size, - ... num_true=1)) - - References - ---------- - `tensorflow/examples/tutorials/word2vec/word2vec_basic.py `__ - - """ - - def __init__( - self, - inputs=None, - train_labels=None, - vocabulary_size=80000, - embedding_size=200, - num_sampled=64, - nce_loss_args=None, - E_init=tf.random_uniform_initializer(minval=-1.0, maxval=1.0), - E_init_args=None, - nce_W_init=tf.truncated_normal_initializer(stddev=0.03), - nce_W_init_args=None, - nce_b_init=tf.constant_initializer(value=0.0), - nce_b_init_args=None, - name='word2vec', - ): - if nce_loss_args is None: - nce_loss_args = {} - if E_init_args is None: - E_init_args = {} - if nce_W_init_args is None: - nce_W_init_args = {} - if nce_b_init_args is None: - nce_b_init_args = {} - - Layer.__init__(self, name=name) - self.inputs = inputs - logging.info("Word2vecEmbeddingInputlayer %s: (%d, %d)" % (self.name, vocabulary_size, embedding_size)) - - # Look up embeddings for inputs. - # Note: a row of 'embeddings' is the vector representation of a word. - # for the sake of speed, it is better to slice the embedding matrix - # instead of transfering a word id to one-hot-format vector and then - # multiply by the embedding matrix. - # embed is the outputs of the hidden layer (embedding layer), it is a - # row vector with 'embedding_size' values. - with tf.variable_scope(name): - embeddings = tf.get_variable( - name='embeddings', shape=(vocabulary_size, embedding_size), initializer=E_init, dtype=LayersConfig.tf_dtype, **E_init_args) - embed = tf.nn.embedding_lookup(embeddings, self.inputs) - # Construct the variables for the NCE loss (i.e. negative sampling) - nce_weights = tf.get_variable( - name='nce_weights', shape=(vocabulary_size, embedding_size), initializer=nce_W_init, dtype=LayersConfig.tf_dtype, **nce_W_init_args) - nce_biases = tf.get_variable(name='nce_biases', shape=(vocabulary_size), initializer=nce_b_init, dtype=LayersConfig.tf_dtype, **nce_b_init_args) - - # Compute the average NCE loss for the batch. - # tf.nce_loss automatically draws a new sample of the negative labels - # each time we evaluate the loss. - self.nce_cost = tf.reduce_mean( - tf.nn.nce_loss( - weights=nce_weights, - biases=nce_biases, - inputs=embed, - labels=train_labels, - num_sampled=num_sampled, - num_classes=vocabulary_size, - **nce_loss_args)) - - self.outputs = embed - self.normalized_embeddings = tf.nn.l2_normalize(embeddings, 1) - - self.all_layers = [self.outputs] - self.all_params = [embeddings, nce_weights, nce_biases] - self.all_drop = {} - - -class EmbeddingInputlayer(Layer): - """ - The :class:`EmbeddingInputlayer` class is a look-up table for word embedding. - - Word content are accessed using integer indexes, then the output is the embedded word vector. - To train a word embedding matrix, you can used :class:`Word2vecEmbeddingInputlayer`. - If you have a pre-trained matrix, you can assign the parameters into it. - - Parameters - ---------- - inputs : placeholder - The input of a network. For word inputs. - Please use integer index format, 2D tensor : (batch_size, num_steps(num_words)). - vocabulary_size : int - The size of vocabulary, number of words. - embedding_size : int - The number of embedding dimensions. - E_init : initializer - The initializer for the embedding matrix. - E_init_args : dictionary - The arguments for embedding matrix initializer. - name : str - A unique layer name. - - Attributes - ---------- - outputs : tensor - The embedding layer output is a 3D tensor in the shape: (batch_size, num_steps(num_words), embedding_size). - - Examples - -------- - >>> batch_size = 8 - >>> x = tf.placeholder(tf.int32, shape=(batch_size, )) - >>> net = tl.layers.EmbeddingInputlayer(inputs=x, vocabulary_size=1000, embedding_size=50, name='embed') - ... (8, 50) - - """ - - def __init__( - self, - inputs=None, - vocabulary_size=80000, - embedding_size=200, - E_init=tf.random_uniform_initializer(-0.1, 0.1), - E_init_args=None, - name='embedding', - ): - if E_init_args is None: - E_init_args = {} - - Layer.__init__(self, name=name) - self.inputs = inputs - logging.info("EmbeddingInputlayer %s: (%d, %d)" % (self.name, vocabulary_size, embedding_size)) - - with tf.variable_scope(name): - embeddings = tf.get_variable( - name='embeddings', shape=(vocabulary_size, embedding_size), initializer=E_init, dtype=LayersConfig.tf_dtype, **E_init_args) - embed = tf.nn.embedding_lookup(embeddings, self.inputs) - - self.outputs = embed - - self.all_layers = [self.outputs] - self.all_params = [embeddings] - self.all_drop = {} - - -class AverageEmbeddingInputlayer(Layer): - """The :class:`AverageEmbeddingInputlayer` averages over embeddings of inputs. - This is often used as the input layer for models like DAN[1] and FastText[2]. - - Parameters - ---------- - inputs : placeholder or tensor - The network input. - For word inputs, please use integer index format, 2D tensor: (batch_size, num_steps(num_words)). - vocabulary_size : int - The size of vocabulary. - embedding_size : int - The dimension of the embedding vectors. - pad_value : int - The scalar padding value used in inputs, 0 as default. - embeddings_initializer : initializer - The initializer of the embedding matrix. - embeddings_kwargs : None or dictionary - The arguments to get embedding matrix variable. - name : str - A unique layer name. - - References - ---------- - - [1] Iyyer, M., Manjunatha, V., Boyd-Graber, J., & Daum’e III, H. (2015). Deep Unordered Composition Rivals Syntactic Methods for Text Classification. In Association for Computational Linguistics. - - [2] Joulin, A., Grave, E., Bojanowski, P., & Mikolov, T. (2016). `Bag of Tricks for Efficient Text Classification. `__ - - Examples - --------- - >>> batch_size = 8 - >>> length = 5 - >>> x = tf.placeholder(tf.int32, shape=(batch_size, length)) - >>> net = tl.layers.AverageEmbeddingInputlayer(x, vocabulary_size=1000, embedding_size=50, name='avg') - ... (8, 50) - - """ - - def __init__( - self, - inputs, - vocabulary_size, - embedding_size, - pad_value=0, - embeddings_initializer=tf.random_uniform_initializer(-0.1, 0.1), - embeddings_kwargs=None, - name='average_embedding', - ): - # super().__init__(name=name) # dont work for py2 - Layer.__init__(self, name=name) - - # if embeddings_kwargs is None: - # embeddings_kwargs = {} - - if inputs.get_shape().ndims != 2: - raise ValueError('inputs must be of size batch_size * batch_sentence_length') - - self.inputs = inputs - - logging.info("AverageEmbeddingInputlayer %s: (%d, %d)" % (name, vocabulary_size, embedding_size)) - with tf.variable_scope(name): - self.embeddings = tf.get_variable( - name='embeddings', - shape=(vocabulary_size, embedding_size), - initializer=embeddings_initializer, - dtype=LayersConfig.tf_dtype, - **(embeddings_kwargs or {}) - # **embeddings_kwargs - ) # **(embeddings_kwargs or {}), - - word_embeddings = tf.nn.embedding_lookup( - self.embeddings, - self.inputs, - name='word_embeddings', - ) - # Zero out embeddings of pad value - masks = tf.not_equal(self.inputs, pad_value, name='masks') - word_embeddings *= tf.cast( - tf.expand_dims(masks, axis=-1), - # tf.float32, - dtype=LayersConfig.tf_dtype, - ) - sum_word_embeddings = tf.reduce_sum(word_embeddings, axis=1) - - # Count number of non-padding words in each sentence - sentence_lengths = tf.count_nonzero( - masks, - axis=1, - keep_dims=True, - # dtype=tf.float32, - dtype=LayersConfig.tf_dtype, - name='sentence_lengths', - ) - - sentence_embeddings = tf.divide( - sum_word_embeddings, - sentence_lengths + 1e-8, # Add epsilon to avoid dividing by 0 - name='sentence_embeddings') - - self.outputs = sentence_embeddings - self.all_layers = [self.outputs] - self.all_params = [self.embeddings] - self.all_drop = {} - - -class DenseLayer(Layer): - """The :class:`DenseLayer` class is a fully connected layer. - - Parameters - ---------- - layer : :class:`Layer` - Previous layer. - n_units : int - The number of units of this layer. - act : activation function - The activation function of this layer. - W_init : initializer - The initializer for the weight matrix. - b_init : initializer or None - The initializer for the bias vector. If None, skip biases. - W_init_args : dictionary - The arguments for the weight matrix initializer. - b_init_args : dictionary - The arguments for the bias vector initializer. - name : a str - A unique layer name. - - Examples - -------- - With TensorLayer - - >>> net = tl.layers.InputLayer(x, name='input') - >>> net = tl.layers.DenseLayer(net, 800, act=tf.nn.relu, name='relu') - - Without native TensorLayer APIs, you can do as follow. - - >>> W = tf.Variable( - ... tf.random_uniform([n_in, n_units], -1.0, 1.0), name='W') - >>> b = tf.Variable(tf.zeros(shape=[n_units]), name='b') - >>> y = tf.nn.relu(tf.matmul(inputs, W) + b) - - Notes - ----- - If the layer input has more than two axes, it needs to be flatten by using :class:`FlattenLayer`. - - """ - - def __init__( - self, - prev_layer, - n_units=100, - act=tf.identity, - W_init=tf.truncated_normal_initializer(stddev=0.1), - b_init=tf.constant_initializer(value=0.0), - W_init_args=None, - b_init_args=None, - name='dense', - ): - if W_init_args is None: - W_init_args = {} - if b_init_args is None: - b_init_args = {} - - Layer.__init__(self, prev_layer=prev_layer, name=name) - self.inputs = prev_layer.outputs - if self.inputs.get_shape().ndims != 2: - raise Exception("The input dimension must be rank 2, please reshape or flatten it") - - n_in = int(self.inputs.get_shape()[-1]) - self.n_units = n_units - logging.info("DenseLayer %s: %d %s" % (self.name, self.n_units, act.__name__)) - with tf.variable_scope(name): - W = tf.get_variable(name='W', shape=(n_in, n_units), initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args) - if b_init is not None: - try: - b = tf.get_variable(name='b', shape=(n_units), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args) - except Exception: # If initializer is a constant, do not specify shape. - b = tf.get_variable(name='b', initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args) - self.outputs = act(tf.matmul(self.inputs, W) + b) - else: - self.outputs = act(tf.matmul(self.inputs, W)) - - # Hint : list(), dict() is pass by value (shallow), without them, it is - # pass by reference. - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - self.all_layers.append(self.outputs) - if b_init is not None: - self.all_params.extend([W, b]) - else: - self.all_params.append(W) - - -class ReconLayer(DenseLayer): - """A reconstruction layer for :class:`DenseLayer` to implement AutoEncoder. - - It is often used to pre-train the previous :class:`DenseLayer` - - Parameters - ---------- - layer : :class:`Layer` - Previous layer. - x_recon : placeholder or tensor - The target for reconstruction. - n_units : int - The number of units of the layer. It should equal ``x_recon``. - act : activation function - The activation function of this layer. - Normally, for sigmoid layer, the reconstruction activation is ``sigmoid``; - for rectifying layer, the reconstruction activation is ``softplus``. - name : str - A unique layer name. - - Examples - -------- - >>> x = tf.placeholder(tf.float32, shape=(None, 784)) - >>> net = tl.layers.InputLayer(x, name='input') - >>> net = tl.layers.DenseLayer(net, n_units=196, act=tf.nn.sigmoid, name='dense') - >>> recon = tl.layers.ReconLayer(net, x_recon=x, n_units=784, act=tf.nn.sigmoid, name='recon') - >>> sess = tf.InteractiveSession() - >>> tl.layers.initialize_global_variables(sess) - >>> X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_mnist_dataset(shape=(-1, 784)) - >>> recon.pretrain(sess, x=x, X_train=X_train, X_val=X_val, denoise_name=None, n_epoch=500, batch_size=128, print_freq=1, save=True, save_name='w1pre_') - - Methods - ------- - pretrain(sess, x, X_train, X_val, denoise_name=None, n_epoch=100, batch_size=128, print_freq=10, save=True, save_name='w1pre') - Start to pre-train the parameters of the previous DenseLayer. - - Notes - ----- - The input layer should be `DenseLayer` or a layer that has only one axes. - You may need to modify this part to define your own cost function. - By default, the cost is implemented as follow: - - For sigmoid layer, the implementation can be `UFLDL `__ - - For rectifying layer, the implementation can be `Glorot (2011). Deep Sparse Rectifier Neural Networks `__ - - """ - - def __init__( - self, - prev_layer, - x_recon=None, - n_units=784, - act=tf.nn.softplus, - name='recon', - ): - DenseLayer.__init__(self, prev_layer=prev_layer, n_units=n_units, act=act, name=name) - logging.info("%s is a ReconLayer" % self.name) - - # y : reconstruction outputs; train_params : parameters to train - # Note that: train_params = [W_encoder, b_encoder, W_decoder, b_encoder] - y = self.outputs - self.train_params = self.all_params[-4:] - - # ===================================================================== - # - # You need to modify the below cost function and optimizer so as to - # implement your own pre-train method. - # - # ===================================================================== - lambda_l2_w = 0.004 - learning_rate = 0.0001 - logging.info(" lambda_l2_w: %f" % lambda_l2_w) - logging.info(" learning_rate: %f" % learning_rate) - - # Mean-square-error i.e. quadratic-cost - mse = tf.reduce_sum(tf.squared_difference(y, x_recon), 1) - mse = tf.reduce_mean(mse) # in theano: mse = ((y - x) ** 2 ).sum(axis=1).mean() - # mse = tf.reduce_mean(tf.reduce_sum(tf.square(tf.sub(y, x_recon)), 1)) - # mse = tf.reduce_mean(tf.squared_difference(y, x_recon)) # : Error - # mse = tf.sqrt(tf.reduce_mean(tf.square(y - x_recon))) # : Error - # Cross-entropy - # ce = cost.cross_entropy(y, x_recon) # : list , list , Error (only be used for softmax output) - # ce = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, x_recon)) # : list , list , Error (only be used for softmax output) - # ce = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(y, x_recon)) # : list , index , Error (only be used for softmax output) - L2_w = tf.contrib.layers.l2_regularizer(lambda_l2_w)(self.train_params[0]) \ - + tf.contrib.layers.l2_regularizer(lambda_l2_w)(self.train_params[2]) # faster than the code below - # L2_w = lambda_l2_w * tf.reduce_mean(tf.square(self.train_params[0])) + lambda_l2_w * tf.reduce_mean( tf.square(self.train_params[2])) - - # DropNeuro - # P_o = cost.lo_regularizer(0.03)( - # self.train_params[0]) # + cost.lo_regularizer(0.5)(self.train_params[2]) # : if add lo on decoder, no neuron will be broken - # P_i = cost.li_regularizer(0.03)(self.train_params[0]) # + cost.li_regularizer(0.001)(self.train_params[2]) - - # L1 of activation outputs - activation_out = self.all_layers[-2] - L1_a = 0.001 * tf.reduce_mean(activation_out) # : theano: T.mean( self.a[i] ) # some neuron are broken, white and black - # L1_a = 0.001 * tf.reduce_mean( tf.reduce_sum(activation_out, 0) ) # : some neuron are broken, white and black - # L1_a = 0.001 * 100 * tf.reduce_mean( tf.reduce_sum(activation_out, 1) ) # : some neuron are broken, white and black - # KL Divergence - beta = 4 - rho = 0.15 - p_hat = tf.reduce_mean(activation_out, 0) # theano: p_hat = T.mean( self.a[i], axis=0 ) - try: # TF1.0 - KLD = beta * tf.reduce_sum(rho * tf.log(tf.divide(rho, p_hat)) + (1 - rho) * tf.log((1 - rho) / (tf.subtract(float(1), p_hat)))) - except Exception: # TF0.12 - KLD = beta * tf.reduce_sum(rho * tf.log(tf.div(rho, p_hat)) + (1 - rho) * tf.log((1 - rho) / (tf.sub(float(1), p_hat)))) - # KLD = beta * tf.reduce_sum( rho * tf.log(rho/ p_hat) + (1- rho) * tf.log((1- rho)/(1- p_hat)) ) - # theano: L1_a = l1_a[i] * T.sum( rho[i] * T.log(rho[i]/ p_hat) + (1- rho[i]) * T.log((1- rho[i])/(1- p_hat)) ) - # Total cost - if act == tf.nn.softplus: - logging.info(' use: mse, L2_w, L1_a') - self.cost = mse + L1_a + L2_w - elif act == tf.nn.sigmoid: - # ---------------------------------------------------- - # Cross-entropy was used in Denoising AE - # logging.info(' use: ce, L2_w, KLD') - # self.cost = ce + L2_w + KLD - # ---------------------------------------------------- - # Mean-squared-error was used in Vanilla AE - logging.info(' use: mse, L2_w, KLD') - self.cost = mse + L2_w + KLD - # ---------------------------------------------------- - # Add DropNeuro penalty (P_o) can remove neurons of AE - # logging.info(' use: mse, L2_w, KLD, P_o') - # self.cost = mse + L2_w + KLD + P_o - # ---------------------------------------------------- - # Add DropNeuro penalty (P_i) can remove neurons of previous layer - # If previous layer is InputLayer, it means remove useless features - # logging.info(' use: mse, L2_w, KLD, P_i') - # self.cost = mse + L2_w + KLD + P_i - else: - raise Exception("Don't support the given reconstruct activation function") - - self.train_op = tf.train.AdamOptimizer( - learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-08, use_locking=False).minimize( - self.cost, var_list=self.train_params) - # self.train_op = tf.train.GradientDescentOptimizer(1.0).minimize(self.cost, var_list=self.train_params) - - def pretrain(self, sess, x, X_train, X_val, denoise_name=None, n_epoch=100, batch_size=128, print_freq=10, save=True, save_name='w1pre_'): - # ==================================================== - # - # You need to modify the cost function in __init__() so as to - # get your own pre-train method. - # - # ==================================================== - logging.info(" [*] %s start pretrain" % self.name) - logging.info(" batch_size: %d" % batch_size) - if denoise_name: - logging.info(" denoising layer keep: %f" % self.all_drop[LayersConfig.set_keep[denoise_name]]) - dp_denoise = self.all_drop[LayersConfig.set_keep[denoise_name]] - else: - logging.info(" no denoising layer") - - for epoch in range(n_epoch): - start_time = time.time() - for X_train_a, _ in iterate.minibatches(X_train, X_train, batch_size, shuffle=True): - dp_dict = utils.dict_to_one(self.all_drop) - if denoise_name: - dp_dict[LayersConfig.set_keep[denoise_name]] = dp_denoise - feed_dict = {x: X_train_a} - feed_dict.update(dp_dict) - sess.run(self.train_op, feed_dict=feed_dict) - - if epoch + 1 == 1 or (epoch + 1) % print_freq == 0: - logging.info("Epoch %d of %d took %fs" % (epoch + 1, n_epoch, time.time() - start_time)) - train_loss, n_batch = 0, 0 - for X_train_a, _ in iterate.minibatches(X_train, X_train, batch_size, shuffle=True): - dp_dict = utils.dict_to_one(self.all_drop) - feed_dict = {x: X_train_a} - feed_dict.update(dp_dict) - err = sess.run(self.cost, feed_dict=feed_dict) - train_loss += err - n_batch += 1 - logging.info(" train loss: %f" % (train_loss / n_batch)) - val_loss, n_batch = 0, 0 - for X_val_a, _ in iterate.minibatches(X_val, X_val, batch_size, shuffle=True): - dp_dict = utils.dict_to_one(self.all_drop) - feed_dict = {x: X_val_a} - feed_dict.update(dp_dict) - err = sess.run(self.cost, feed_dict=feed_dict) - val_loss += err - n_batch += 1 - logging.info(" val loss: %f" % (val_loss / n_batch)) - if save: - try: - visualize.draw_weights( - self.train_params[0].eval(), second=10, saveable=True, shape=[28, 28], name=save_name + str(epoch + 1), fig_idx=2012) - files.save_npz([self.all_params[0]], name=save_name + str(epoch + 1) + '.npz') - except Exception: - raise Exception( - "You should change the visualize.W() in ReconLayer.pretrain(), if you want to save the feature images for different dataset") - - -class DropoutLayer(Layer): - """ - The :class:`DropoutLayer` class is a noise layer which randomly set some - activations to zero according to a keeping probability. - - Parameters - ---------- - layer : :class:`Layer` - Previous layer. - keep : float - The keeping probability. - The lower the probability it is, the more activations are set to zero. - is_fix : boolean - Fixing probability or nor. Default is False. - If True, the keeping probability is fixed and cannot be changed via `feed_dict`. - is_train : boolean - Trainable or not. If False, skip this layer. Default is True. - seed : int or None - The seed for random dropout. - name : str - A unique layer name. - - Examples - -------- - Method 1: Using ``all_drop`` see `tutorial_mlp_dropout1.py `__ - - >>> net = tl.layers.InputLayer(x, name='input_layer') - >>> net = tl.layers.DropoutLayer(net, keep=0.8, name='drop1') - >>> net = tl.layers.DenseLayer(net, n_units=800, act=tf.nn.relu, name='relu1') - >>> ... - >>> # For training, enable dropout as follow. - >>> feed_dict = {x: X_train_a, y_: y_train_a} - >>> feed_dict.update( net.all_drop ) # enable noise layers - >>> sess.run(train_op, feed_dict=feed_dict) - >>> ... - >>> # For testing, disable dropout as follow. - >>> dp_dict = tl.utils.dict_to_one( net.all_drop ) # disable noise layers - >>> feed_dict = {x: X_val_a, y_: y_val_a} - >>> feed_dict.update(dp_dict) - >>> err, ac = sess.run([cost, acc], feed_dict=feed_dict) - >>> ... - - Method 2: Without using ``all_drop`` see `tutorial_mlp_dropout2.py `__ - - >>> def mlp(x, is_train=True, reuse=False): - >>> with tf.variable_scope("MLP", reuse=reuse): - >>> tl.layers.set_name_reuse(reuse) - >>> net = tl.layers.InputLayer(x, name='input') - >>> net = tl.layers.DropoutLayer(net, keep=0.8, is_fix=True, - >>> is_train=is_train, name='drop1') - >>> ... - >>> return net - >>> # define inferences - >>> net_train = mlp(x, is_train=True, reuse=False) - >>> net_test = mlp(x, is_train=False, reuse=True) - - """ - - def __init__( - self, - prev_layer, - keep=0.5, - is_fix=False, - is_train=True, - seed=None, - name='dropout_layer', - ): - Layer.__init__(self, prev_layer=prev_layer, name=name) - if is_train is False: - logging.info(" skip DropoutLayer") - self.outputs = prev_layer.outputs - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - else: - self.inputs = prev_layer.outputs - logging.info("DropoutLayer %s: keep:%f is_fix:%s" % (self.name, keep, is_fix)) - - # The name of placeholder for keep_prob is the same with the name - # of the Layer. - if is_fix: - self.outputs = tf.nn.dropout(self.inputs, keep, seed=seed, name=name) - else: - LayersConfig.set_keep[name] = tf.placeholder(tf.float32) - self.outputs = tf.nn.dropout(self.inputs, LayersConfig.set_keep[name], seed=seed, name=name) # 1.2 - - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - if is_fix is False: - self.all_drop.update({LayersConfig.set_keep[name]: keep}) - self.all_layers.append(self.outputs) - - # logging.info(set_keep[name]) - # Tensor("Placeholder_2:0", dtype=float32) - # logging.info(denoising1) - # Tensor("Placeholder_2:0", dtype=float32) - # logging.info(self.all_drop[denoising1]) - # 0.8 - # - # https://www.tensorflow.org/versions/r0.8/tutorials/mnist/tf/index.html - # The optional feed_dict argument allows the caller to override the - # value of tensors in the graph. Each key in feed_dict can be one of - # the following types: - # If the key is a Tensor, the value may be a Python scalar, string, - # list, or numpy ndarray that can be converted to the same dtype as that - # tensor. Additionally, if the key is a placeholder, the shape of the - # value will be checked for compatibility with the placeholder. - # If the key is a SparseTensor, the value should be a SparseTensorValue. - - -class GaussianNoiseLayer(Layer): - """ - The :class:`GaussianNoiseLayer` class is noise layer that adding noise with - gaussian distribution to the activation. - - Parameters - ------------ - layer : :class:`Layer` - Previous layer. - mean : float - The mean. Default is 0. - stddev : float - The standard deviation. Default is 1. - is_train : boolean - Is trainable layer. If False, skip this layer. default is True. - seed : int or None - The seed for random noise. - name : str - A unique layer name. - - Examples - ---------- - >>> x = tf.placeholder(tf.float32, shape=(100, 784)) - >>> net = tl.layers.InputLayer(x, name='input') - >>> net = tl.layers.DenseLayer(net, n_units=100, act=tf.nn.relu, name='dense3') - >>> net = tl.layers.GaussianNoiseLayer(net, name='gaussian') - ... (64, 100) - - """ - - def __init__( - self, - prev_layer, - mean=0.0, - stddev=1.0, - is_train=True, - seed=None, - name='gaussian_noise_layer', - ): - Layer.__init__(self, prev_layer=prev_layer, name=name) - if is_train is False: - logging.info(" skip GaussianNoiseLayer") - self.outputs = prev_layer.outputs - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - else: - self.inputs = prev_layer.outputs - logging.info("GaussianNoiseLayer %s: mean:%f stddev:%f" % (self.name, mean, stddev)) - with tf.variable_scope(name): - # noise = np.random.normal(0.0 , sigma , tf.to_int64(self.inputs).get_shape()) - noise = tf.random_normal(shape=self.inputs.get_shape(), mean=mean, stddev=stddev, seed=seed) - self.outputs = self.inputs + noise - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - self.all_layers.append(self.outputs) - - -class DropconnectDenseLayer(Layer): - """ - The :class:`DropconnectDenseLayer` class is :class:`DenseLayer` with DropConnect - behaviour which randomly removes connections between this layer and the previous - layer according to a keeping probability. - - Parameters - ---------- - layer : :class:`Layer` - Previous layer. - keep : float - The keeping probability. - The lower the probability it is, the more activations are set to zero. - n_units : int - The number of units of this layer. - act : activation function - The activation function of this layer. - W_init : weights initializer - The initializer for the weight matrix. - b_init : biases initializer - The initializer for the bias vector. - W_init_args : dictionary - The arguments for the weight matrix initializer. - b_init_args : dictionary - The arguments for the bias vector initializer. - name : str - A unique layer name. - - Examples - -------- - >>> net = tl.layers.InputLayer(x, name='input_layer') - >>> net = tl.layers.DropconnectDenseLayer(net, keep=0.8, - ... n_units=800, act=tf.nn.relu, name='relu1') - >>> net = tl.layers.DropconnectDenseLayer(net, keep=0.5, - ... n_units=800, act=tf.nn.relu, name='relu2') - >>> net = tl.layers.DropconnectDenseLayer(net, keep=0.5, - ... n_units=10, name='output') - - References - ---------- - - `Wan, L. (2013). Regularization of neural networks using dropconnect `__ - - """ - - def __init__( - self, - prev_layer, - keep=0.5, - n_units=100, - act=tf.identity, - W_init=tf.truncated_normal_initializer(stddev=0.1), - b_init=tf.constant_initializer(value=0.0), - W_init_args=None, - b_init_args=None, - name='dropconnect_layer', - ): - if W_init_args is None: - W_init_args = {} - if b_init_args is None: - b_init_args = {} - - Layer.__init__(self, prev_layer=prev_layer, name=name) - self.inputs = prev_layer.outputs - if self.inputs.get_shape().ndims != 2: - raise Exception("The input dimension must be rank 2") - n_in = int(self.inputs.get_shape()[-1]) - self.n_units = n_units - logging.info("DropconnectDenseLayer %s: %d %s" % (self.name, self.n_units, act.__name__)) - - with tf.variable_scope(name): - W = tf.get_variable(name='W', shape=(n_in, n_units), initializer=W_init, dtype=LayersConfig.tf_dtype, **W_init_args) - b = tf.get_variable(name='b', shape=(n_units), initializer=b_init, dtype=LayersConfig.tf_dtype, **b_init_args) - # self.outputs = act(tf.matmul(self.inputs, W) + b) - - LayersConfig.set_keep[name] = tf.placeholder(tf.float32) - W_dropcon = tf.nn.dropout(W, LayersConfig.set_keep[name]) - self.outputs = act(tf.matmul(self.inputs, W_dropcon) + b) - - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - # self.all_drop.update({LayersConfig.set_keep[name]: keep}) - # self.all_layers.append(self.outputs) - # self.all_params.extend([W, b]) - - self.all_layers.append(self.outputs) diff --git a/tensorlayer/layers/extend.py b/tensorlayer/layers/extend.py deleted file mode 100644 index b97261a..0000000 --- a/tensorlayer/layers/extend.py +++ /dev/null @@ -1,98 +0,0 @@ -# -*- coding: utf-8 -*- - -from .core import * -from .. import _logging as logging -import tensorflow as tf - -__all__ = [ - 'ExpandDimsLayer', - 'TileLayer', -] - - -class ExpandDimsLayer(Layer): - """ - The :class:`ExpandDimsLayer` class inserts a dimension of 1 into a tensor's shape, - see `tf.expand_dims() `__ . - - Parameters - ---------- - layer : :class:`Layer` - The previous layer. - axis : int - The dimension index at which to expand the shape of input. - name : str - A unique layer name. - - Examples - -------- - >>> x = tf.placeholder(tf.float32, (None, 100)) - >>> n = tl.layers.InputLayer(x, name='in') - >>> n = tl.layers.ExpandDimsLayer(n, 2) - ... [None, 100, 1] - """ - - def __init__( - self, - prev_layer, - axis, - name='expand_dims', - ): - Layer.__init__(self, prev_layer=prev_layer, name=name) - self.inputs = prev_layer.outputs - - logging.info("ExpandDimsLayer %s: axis:%d" % (self.name, axis)) - with tf.variable_scope(name): - try: # TF12 TF1.0 - self.outputs = tf.expand_dims(self.inputs, axis=axis) - except Exception: # TF11 - self.outputs = tf.expand_dims(self.inputs, dim=axis) - # self.all_layers = list(layer.all_layers) - self.all_params = list(prev_layer.all_params) - self.all_drop = dict(prev_layer.all_drop) - self.all_layers.append(self.outputs) - # self.all_params.extend( variables ) - - -class TileLayer(Layer): - """ - The :class:`TileLayer` class constructs a tensor by tiling a given tensor, - see `tf.tile() `__ . - - Parameters - ---------- - layer : :class:`Layer` - The previous layer. - multiples: tensor - Must be one of the following types: int32, int64. - 1-D Length must be the same as the number of dimensions in input. - name : str - A unique layer name. - - - Examples - -------- - >>> x = tf.placeholder(tf.float32, (None, 100)) - >>> n = tl.layers.InputLayer(x, name='in') - >>> n = tl.layers.ExpandDimsLayer(n, 2) - >>> n = tl.layers.TileLayer(n, [-1, 1, 3]) - ... [None, 100, 3] - """ - - def __init__( - self, - prev_layer=None, - multiples=None, - name='tile', - ): - Layer.__init__(self, prev_layer=prev_layer, name=name) - self.inputs = prev_layer.outputs - - logging.info("TileLayer %s: multiples:%s" % (self.name, multiples)) - with tf.variable_scope(name): - self.outputs = tf.tile(self.inputs, multiples=multiples) - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - self.all_layers.append(self.outputs) - # self.all_params.extend( variables ) diff --git a/tensorlayer/layers/flow_control.py b/tensorlayer/layers/flow_control.py deleted file mode 100644 index 2ab0d12..0000000 --- a/tensorlayer/layers/flow_control.py +++ /dev/null @@ -1,88 +0,0 @@ -# -*- coding: utf-8 -*- - -from .core import * -from .. import _logging as logging -import tensorflow as tf - -__all__ = [ - 'MultiplexerLayer', -] - - -class MultiplexerLayer(Layer): - """ - The :class:`MultiplexerLayer` selects inputs to be forwarded to output. - see `tutorial_mnist_multiplexer.py`. - - Parameters - ---------- - layers : a list of :class:`Layer` - The input layers. - name : str - A unique layer name. - - Attributes - ---------- - sel : placeholder - The placeholder takes an integer for selecting which layer to output. - - Examples - -------- - >>> x = tf.placeholder(tf.float32, shape=(None, 784), name='x') - >>> # define the network - >>> net_in = tl.layers.InputLayer(x, name='input') - >>> net_in = tl.layers.DropoutLayer(net_in, keep=0.8, name='drop1') - >>> # net 0 - >>> net_0 = tl.layers.DenseLayer(net_in, n_units=800, act=tf.nn.relu, name='net0/relu1') - >>> net_0 = tl.layers.DropoutLayer(net_0, keep=0.5, name='net0/drop2') - >>> net_0 = tl.layers.DenseLayer(net_0, n_units=800, act=tf.nn.relu, name='net0/relu2') - >>> # net 1 - >>> net_1 = tl.layers.DenseLayer(net_in, n_units=800, act=tf.nn.relu, name='net1/relu1') - >>> net_1 = tl.layers.DropoutLayer(net_1, keep=0.8, name='net1/drop2') - >>> net_1 = tl.layers.DenseLayer(net_1, n_units=800, act=tf.nn.relu, name='net1/relu2') - >>> net_1 = tl.layers.DropoutLayer(net_1, keep=0.8, name='net1/drop3') - >>> net_1 = tl.layers.DenseLayer(net_1, n_units=800, act=tf.nn.relu, name='net1/relu3') - >>> # multiplexer - >>> net_mux = tl.layers.MultiplexerLayer(layers=[net_0, net_1], name='mux') - >>> network = tl.layers.ReshapeLayer(net_mux, shape=(-1, 800), name='reshape') - >>> network = tl.layers.DropoutLayer(network, keep=0.5, name='drop3') - >>> # output layer - >>> network = tl.layers.DenseLayer(network, n_units=10, act=tf.identity, name='output') - - """ - - def __init__(self, layers, name='mux_layer'): - Layer.__init__(self, prev_layer=layers, name=name) - self.n_inputs = len(layers) - - self.inputs = [] - for l in layers: - self.inputs.append(l.outputs) - try: # TF1.0 - all_inputs = tf.stack(self.inputs, name=name) # pack means concat a list of tensor in a new dim # 1.2 - except Exception: - all_inputs = tf.pack(self.inputs, name=name) # pack means concat a list of tensor in a new dim # 1.2 - - logging.info("MultiplexerLayer %s: n_inputs:%d" % (self.name, self.n_inputs)) - - self.sel = tf.placeholder(tf.int32) - self.outputs = tf.gather(all_inputs, self.sel, name=name) # [sel, :, : ...] # 1.2 - - # logging.info(self.outputs, vars(self.outputs)) - # # tf.reshape(self.outputs, shape=) - # exit() - # # the same with ConcatLayer - # self.all_layers = list(layers[0].all_layers) - # self.all_params = list(layers[0].all_params) - # self.all_drop = dict(layers[0].all_drop) - # - # for i in range(1, len(layers)): - # self.all_layers.extend(list(layers[i].all_layers)) - # self.all_params.extend(list(layers[i].all_params)) - # self.all_drop.update(dict(layers[i].all_drop)) - # - # self.all_layers = list_remove_repeat(self.all_layers) - # self.all_params = list_remove_repeat(self.all_params) - # # self.all_drop = list_remove_repeat(self.all_drop) - - self.all_layers.append(self.outputs) diff --git a/tensorlayer/layers/importer.py b/tensorlayer/layers/importer.py deleted file mode 100644 index 58ac031..0000000 --- a/tensorlayer/layers/importer.py +++ /dev/null @@ -1,232 +0,0 @@ -# -*- coding: utf-8 -*- - -from tensorflow.python.util.deprecation import deprecated -from .core import * -from .. import _logging as logging -import tensorflow as tf - -__all__ = [ - 'LambdaLayer', - 'SlimNetsLayer', - 'KerasLayer', - 'EstimatorLayer', -] - - -class LambdaLayer(Layer): - """A layer that takes a user-defined function using TensorFlow Lambda. - - Parameters - ---------- - layer : :class:`Layer` - Previous layer. - fn : function - The function that applies to the outputs of previous layer. - fn_args : dictionary or None - The arguments for the function (option). - name : str - A unique layer name. - - Examples - --------- - Non-parametric case - - >>> x = tf.placeholder(tf.float32, shape=[None, 1], name='x') - >>> net = tl.layers.InputLayer(x, name='input') - >>> net = LambdaLayer(net, lambda x: 2*x, name='lambda') - - Parametric case, merge other wrappers into TensorLayer - - >>> from keras.layers import * - >>> from tensorlayer.layers import * - >>> def keras_block(x): - >>> x = Dropout(0.8)(x) - >>> x = Dense(800, activation='relu')(x) - >>> x = Dropout(0.5)(x) - >>> x = Dense(800, activation='relu')(x) - >>> x = Dropout(0.5)(x) - >>> logits = Dense(10, activation='linear')(x) - >>> return logits - >>> net = InputLayer(x, name='input') - >>> net = LambdaLayer(net, fn=keras_block, name='keras') - - """ - - def __init__( - self, - prev_layer, - fn, - fn_args=None, - name='lambda_layer', - ): - if fn_args is None: - fn_args = {} - Layer.__init__(self, prev_layer=prev_layer, name=name) - assert prev_layer is not None - assert fn is not None - self.inputs = prev_layer.outputs - logging.info("LambdaLayer %s" % self.name) - with tf.variable_scope(name) as vs: - self.outputs = fn(self.inputs, **fn_args) - variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) - - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - self.all_layers.append(self.outputs) - self.all_params.extend(variables) - - -class SlimNetsLayer(Layer): - """A layer that merges TF-Slim models into TensorLayer. - - Models can be found in `slim-model `__, - see Inception V3 example on `Github `__. - - Parameters - ---------- - layer : :class:`Layer` - Previous layer. - slim_layer : a slim network function - The network you want to stack onto, end with ``return net, end_points``. - slim_args : dictionary - The arguments for the slim model. - name : str - A unique layer name. - - Notes - ----- - - As TF-Slim stores the layers as dictionary, the ``all_layers`` in this network is not in order ! Fortunately, the ``all_params`` are in order. - - """ - - def __init__( - self, - prev_layer, - slim_layer, - slim_args=None, - name='tfslim_layer', - ): - if slim_layer is None: - raise ValueError("slim layer is None") - if slim_args is None: - slim_args = {} - - Layer.__init__(self, prev_layer=prev_layer, name=name) - self.inputs = prev_layer.outputs - logging.info("SlimNetsLayer %s: %s" % (self.name, slim_layer.__name__)) - - # with tf.variable_scope(name) as vs: - # net, end_points = slim_layer(self.inputs, **slim_args) - # slim_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) - - net, end_points = slim_layer(self.inputs, **slim_args) - - slim_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=name) - if slim_variables == []: - logging.info( - "No variables found under %s : the name of SlimNetsLayer should be matched with the begining of the ckpt file, see tutorial_inceptionV3_tfslim.py for more details" - % name) - - self.outputs = net - - slim_layers = [] - for v in end_points.values(): - # tf.contrib.layers.summaries.summarize_activation(v) - slim_layers.append(v) - - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - - self.all_layers.extend(slim_layers) - self.all_params.extend(slim_variables) - - -@deprecated("2018-06-30", "This layer will be deprecated soon as :class:`LambdaLayer` can do the same thing.") -class KerasLayer(Layer): - """A layer to import Keras layers into TensorLayer. - - Example can be found here `tutorial_keras.py `__. - - Parameters - ---------- - layer : :class:`Layer` - Previous layer - keras_layer : function - A tensor in tensor out function for building model. - keras_args : dictionary - The arguments for the `keras_layer`. - name : str - A unique layer name. - - """ - - def __init__( - self, - prev_layer, - keras_layer, - keras_args=None, - name='keras_layer', - ): - if prev_layer is None: - raise ValueError("layer is None") - if keras_args is None: - keras_args = {} - - Layer.__init__(self, prev_layer=prev_layer, name=name) - self.inputs = prev_layer.outputs - logging.info("KerasLayer %s: %s" % (self.name, keras_layer)) - logging.info("This API will be removed, please use LambdaLayer instead.") - with tf.variable_scope(name) as vs: - self.outputs = keras_layer(self.inputs, **keras_args) - variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - self.all_layers.append(self.outputs) - self.all_params.extend(variables) - - -@deprecated("2018-06-30", "This layer will be deprecated soon as :class:`LambdaLayer` can do the same thing.") -class EstimatorLayer(Layer): - """A layer that accepts a user-defined model. - - It is similar with :class:`KerasLayer`, see `tutorial_keras.py `__. - - Parameters - ---------- - layer : :class:`Layer` - Previous layer - model_fn : function - A tensor in tensor out function for building model. - args : dictionary - The arguments for the `model_fn`. - name : str - A unique layer name. - - """ - - def __init__( - self, - prev_layer, - model_fn, - args=None, - name='estimator_layer', - ): - if model_fn is None: - raise ValueError('model fn is None') - if args is None: - args = {} - Layer.__init__(self, prev_layer=prev_layer, name=name) - self.inputs = prev_layer.outputs - logging.info("EstimatorLayer %s: %s" % (self.name, model_fn)) - logging.info("This API will be removed, please use LambdaLayer instead.") - with tf.variable_scope(name) as vs: - self.outputs = model_fn(self.inputs, **args) - variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - self.all_layers.append(self.outputs) - self.all_params.extend(variables) diff --git a/tensorlayer/layers/merge.py b/tensorlayer/layers/merge.py deleted file mode 100644 index b2509bf..0000000 --- a/tensorlayer/layers/merge.py +++ /dev/null @@ -1,147 +0,0 @@ -# -*- coding: utf-8 -*- - -from .core import * -from .. import _logging as logging -import tensorflow as tf - -__all__ = [ - 'ConcatLayer', - 'ElementwiseLayer', -] - - -class ConcatLayer(Layer): - """A layer that concats multiple tensors according to given axis.. - - Parameters - ---------- - layers : list of :class:`Layer` - List of layers to concatenate. - concat_dim : int - The dimension to concatenate. - name : str - A unique layer name. - - Examples - ---------- - >>> sess = tf.InteractiveSession() - >>> x = tf.placeholder(tf.float32, shape=[None, 784]) - >>> inputs = tl.layers.InputLayer(x, name='input_layer') - >>> net1 = tl.layers.DenseLayer(inputs, 800, act=tf.nn.relu, name='relu1_1') - >>> net2 = tl.layers.DenseLayer(inputs, 300, act=tf.nn.relu, name='relu2_1') - >>> net = tl.layers.ConcatLayer([net1, net2], 1, name ='concat_layer') - ... InputLayer input_layer (?, 784) - ... DenseLayer relu1_1: 800, relu - ... DenseLayer relu2_1: 300, relu - ... ConcatLayer concat_layer, 1100 - >>> tl.layers.initialize_global_variables(sess) - >>> net.print_params() - ... [TL] param 0: relu1_1/W:0 (784, 800) float32_ref - ... [TL] param 1: relu1_1/b:0 (800,) float32_ref - ... [TL] param 2: relu2_1/W:0 (784, 300) float32_ref - ... [TL] param 3: relu2_1/b:0 (300,) float32_ref - ... num of params: 863500 - >>> net.print_layers() - ... [TL] layer 0: relu1_1/Relu:0 (?, 800) float32 - ... [TL] layer 1: relu2_1/Relu:0 (?, 300) float32 - ... [TL] layer 2: concat_layer:0 (?, 1100) float32 - - """ - - def __init__( - self, - layers, - concat_dim=-1, - name='concat_layer', - ): - Layer.__init__(self, prev_layer=layers, name=name) - self.inputs = [] - for l in layers: - self.inputs.append(l.outputs) - try: # TF1.0 - self.outputs = tf.concat(self.inputs, concat_dim, name=name) - except Exception: # TF0.12 - self.outputs = tf.concat(concat_dim, self.inputs, name=name) - - logging.info("ConcatLayer %s: axis: %d" % (self.name, concat_dim)) - - # self.all_layers = list(layers[0].all_layers) - # self.all_params = list(layers[0].all_params) - # self.all_drop = dict(layers[0].all_drop) - # - # for i in range(1, len(layers)): - # self.all_layers.extend(list(layers[i].all_layers)) - # self.all_params.extend(list(layers[i].all_params)) - # self.all_drop.update(dict(layers[i].all_drop)) - # - # self.all_layers = list_remove_repeat(self.all_layers) - # self.all_params = list_remove_repeat(self.all_params) - - self.all_layers.append(self.outputs) - - -class ElementwiseLayer(Layer): - """A layer that combines multiple :class:`Layer` that have the same output shapes - according to an element-wise operation. - - Parameters - ---------- - layers : list of :class:`Layer` - The list of layers to combine. - combine_fn : a TensorFlow element-wise combine function - e.g. AND is ``tf.minimum`` ; OR is ``tf.maximum`` ; ADD is ``tf.add`` ; MUL is ``tf.multiply`` and so on. - See `TensorFlow Math API `__ . - act : activation function - The activation function of this layer. - name : str - A unique layer name. - - Examples - -------- - >>> net_0 = tl.layers.DenseLayer(inputs, n_units=500, act=tf.nn.relu, name='net_0') - >>> net_1 = tl.layers.DenseLayer(inputs, n_units=500, act=tf.nn.relu, name='net_1') - >>> net = tl.layers.ElementwiseLayer([net_0, net_1], combine_fn=tf.minimum, name='minimum') - >>> net.print_params(False) - ... [TL] param 0: net_0/W:0 (784, 500) float32_ref - ... [TL] param 1: net_0/b:0 (500,) float32_ref - ... [TL] param 2: net_1/W:0 (784, 500) float32_ref - ... [TL] param 3: net_1/b:0 (500,) float32_ref - >>> net.print_layers() - ... [TL] layer 0: net_0/Relu:0 (?, 500) float32 - ... [TL] layer 1: net_1/Relu:0 (?, 500) float32 - ... [TL] layer 2: minimum:0 (?, 500) float32 - """ - - def __init__( - self, - layers, - combine_fn=tf.minimum, - act=None, - name='elementwise_layer', - ): - Layer.__init__(self, prev_layer=layers, name=name) - - logging.info("ElementwiseLayer %s: size:%s fn:%s" % (self.name, layers[0].outputs.get_shape(), combine_fn.__name__)) - - self.outputs = layers[0].outputs - - for l in layers[1:]: - self.outputs = combine_fn(self.outputs, l.outputs, name=name) - - if act: - self.outputs = act(self.outputs) - - # self.all_layers = list(layers[0].all_layers) - # self.all_params = list(layers[0].all_params) - # self.all_drop = dict(layers[0].all_drop) - # - # for i in range(1, len(layers)): - # self.all_layers.extend(list(layers[i].all_layers)) - # self.all_params.extend(list(layers[i].all_params)) - # self.all_drop.update(dict(layers[i].all_drop)) - # - # self.all_layers = list_remove_repeat(self.all_layers) - # self.all_params = list_remove_repeat(self.all_params) - # # self.all_drop = list_remove_repeat(self.all_drop) - - self.all_layers.append(self.outputs) diff --git a/tensorlayer/layers/normalization.py b/tensorlayer/layers/normalization.py deleted file mode 100644 index 6d5f028..0000000 --- a/tensorlayer/layers/normalization.py +++ /dev/null @@ -1,293 +0,0 @@ -# -*- coding: utf-8 -*- - -from .core import * -from .. import _logging as logging -import tensorflow as tf - -__all__ = [ - 'LocalResponseNormLayer', - 'BatchNormLayer', - 'InstanceNormLayer', - 'LayerNormLayer', -] - - -class LocalResponseNormLayer(Layer): - """The :class:`LocalResponseNormLayer` layer is for Local Response Normalization. - See ``tf.nn.local_response_normalization`` or ``tf.nn.lrn`` for new TF version. - The 4-D input tensor is a 3-D array of 1-D vectors (along the last dimension), and each vector is normalized independently. - Within a given vector, each component is divided by the weighted square-sum of inputs within depth_radius. - - Parameters - ----------- - layer : :class:`Layer` - The previous layer with a 4D output shape. - depth_radius : int - Depth radius. 0-D. Half-width of the 1-D normalization window. - bias : float - An offset which is usually positive and shall avoid dividing by 0. - alpha : float - A scale factor which is usually positive. - beta : float - An exponent. - name : str - A unique layer name. - - """ - - def __init__( - self, - prev_layer, - depth_radius=None, - bias=None, - alpha=None, - beta=None, - name='lrn_layer', - ): - Layer.__init__(self, prev_layer=prev_layer, name=name) - self.inputs = prev_layer.outputs - logging.info("LocalResponseNormLayer %s: depth_radius: %s, bias: %s, alpha: %s, beta: %s" % (self.name, str(depth_radius), str(bias), str(alpha), - str(beta))) - with tf.variable_scope(name): - self.outputs = tf.nn.lrn(self.inputs, depth_radius=depth_radius, bias=bias, alpha=alpha, beta=beta) - - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - self.all_layers.append(self.outputs) - - -class BatchNormLayer(Layer): - """ - The :class:`BatchNormLayer` is a batch normalization layer for both fully-connected and convolution outputs. - See ``tf.nn.batch_normalization`` and ``tf.nn.moments``. - - Parameters - ---------- - layer : :class:`Layer` - The previous layer. - decay : float - A decay factor for `ExponentialMovingAverage`. - Suggest to use a large value for large dataset. - epsilon : float - Eplison. - act : activation function - The activation function of this layer. - is_train : boolean - Is being used for training or inference. - beta_init : initializer - The initializer for initializing beta. - gamma_init : initializer - The initializer for initializing gamma. - dtype : TensorFlow dtype - tf.float32 (default) or tf.float16. - name : str - A unique layer name. - - References - ---------- - - `Source `__ - - `stackoverflow `__ - - """ - - def __init__( - self, - prev_layer, - decay=0.9, - epsilon=0.00001, - act=tf.identity, - is_train=False, - beta_init=tf.zeros_initializer, - gamma_init=tf.random_normal_initializer(mean=1.0, stddev=0.002), - name='batchnorm_layer', - ): - Layer.__init__(self, prev_layer=prev_layer, name=name) - self.inputs = prev_layer.outputs - logging.info("BatchNormLayer %s: decay:%f epsilon:%f act:%s is_train:%s" % (self.name, decay, epsilon, act.__name__, is_train)) - x_shape = self.inputs.get_shape() - params_shape = x_shape[-1:] - - from tensorflow.python.training import moving_averages - - with tf.variable_scope(name): - axis = list(range(len(x_shape) - 1)) - - # 1. beta, gamma - if tf.__version__ > '0.12.1' and beta_init == tf.zeros_initializer: - beta_init = beta_init() - beta = tf.get_variable('beta', shape=params_shape, initializer=beta_init, dtype=LayersConfig.tf_dtype, trainable=is_train) - - gamma = tf.get_variable( - 'gamma', - shape=params_shape, - initializer=gamma_init, - dtype=LayersConfig.tf_dtype, - trainable=is_train, - ) - - # 2. - if tf.__version__ > '0.12.1': - moving_mean_init = tf.zeros_initializer() - else: - moving_mean_init = tf.zeros_initializer - moving_mean = tf.get_variable('moving_mean', params_shape, initializer=moving_mean_init, dtype=LayersConfig.tf_dtype, trainable=False) - moving_variance = tf.get_variable( - 'moving_variance', - params_shape, - initializer=tf.constant_initializer(1.), - dtype=LayersConfig.tf_dtype, - trainable=False, - ) - - # 3. - # These ops will only be preformed when training. - mean, variance = tf.nn.moments(self.inputs, axis) - try: # TF12 - update_moving_mean = moving_averages.assign_moving_average(moving_mean, mean, decay, zero_debias=False) # if zero_debias=True, has bias - update_moving_variance = moving_averages.assign_moving_average( - moving_variance, variance, decay, zero_debias=False) # if zero_debias=True, has bias - # logging.info("TF12 moving") - except Exception: # TF11 - update_moving_mean = moving_averages.assign_moving_average(moving_mean, mean, decay) - update_moving_variance = moving_averages.assign_moving_average(moving_variance, variance, decay) - # logging.info("TF11 moving") - - def mean_var_with_update(): - with tf.control_dependencies([update_moving_mean, update_moving_variance]): - return tf.identity(mean), tf.identity(variance) - - if is_train: - mean, var = mean_var_with_update() - self.outputs = act(tf.nn.batch_normalization(self.inputs, mean, var, beta, gamma, epsilon)) - else: - self.outputs = act(tf.nn.batch_normalization(self.inputs, moving_mean, moving_variance, beta, gamma, epsilon)) - - variables = [beta, gamma, moving_mean, moving_variance] - - # logging.info(len(variables)) - # for idx, v in enumerate(variables): - # logging.info(" var {:3}: {:15} {}".format(idx, str(v.get_shape()), v)) - # exit() - - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - self.all_layers.append(self.outputs) - self.all_params.extend(variables) - - -class InstanceNormLayer(Layer): - """The :class:`InstanceNormLayer` class is a for instance normalization. - - Parameters - ----------- - layer : :class:`Layer` - The previous layer. - act : activation function. - The activation function of this layer. - epsilon : float - Eplison. - name : str - A unique layer name - - """ - - def __init__( - self, - prev_layer, - act=tf.identity, - epsilon=1e-5, - name='instan_norm', - ): - Layer.__init__(self, prev_layer=prev_layer, name=name) - self.inputs = prev_layer.outputs - logging.info("InstanceNormLayer %s: epsilon:%f act:%s" % (self.name, epsilon, act.__name__)) - - with tf.variable_scope(name) as vs: - mean, var = tf.nn.moments(self.inputs, [1, 2], keep_dims=True) - scale = tf.get_variable( - 'scale', [self.inputs.get_shape()[-1]], initializer=tf.truncated_normal_initializer(mean=1.0, stddev=0.02), dtype=LayersConfig.tf_dtype) - offset = tf.get_variable('offset', [self.inputs.get_shape()[-1]], initializer=tf.constant_initializer(0.0), dtype=LayersConfig.tf_dtype) - self.outputs = scale * tf.div(self.inputs - mean, tf.sqrt(var + epsilon)) + offset - self.outputs = act(self.outputs) - variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) - - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - self.all_layers.append(self.outputs) - self.all_params.extend(variables) - - -class LayerNormLayer(Layer): - """ - The :class:`LayerNormLayer` class is for layer normalization, see `tf.contrib.layers.layer_norm `__. - - Parameters - ---------- - layer : :class:`Layer` - The previous layer. - act : activation function - The activation function of this layer. - others : _ - `tf.contrib.layers.layer_norm `__. - - """ - - def __init__(self, - prev_layer, - center=True, - scale=True, - act=tf.identity, - reuse=None, - variables_collections=None, - outputs_collections=None, - trainable=True, - begin_norm_axis=1, - begin_params_axis=-1, - name='layernorm'): - - Layer.__init__(self, prev_layer=prev_layer, name=name) - self.inputs = prev_layer.outputs - logging.info("LayerNormLayer %s: act:%s" % (self.name, act.__name__)) - - if tf.__version__ < "1.3": - # raise Exception("Please use TF 1.3+") - with tf.variable_scope(name) as vs: - self.outputs = tf.contrib.layers.layer_norm( - self.inputs, - center=center, - scale=scale, - activation_fn=act, - reuse=reuse, - variables_collections=variables_collections, - outputs_collections=outputs_collections, - trainable=trainable, - # begin_norm_axis=begin_norm_axis, - # begin_params_axis=begin_params_axis, - scope='var', - ) - variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) - else: - with tf.variable_scope(name) as vs: - self.outputs = tf.contrib.layers.layer_norm( - self.inputs, - center=center, - scale=scale, - activation_fn=act, - reuse=reuse, - variables_collections=variables_collections, - outputs_collections=outputs_collections, - trainable=trainable, - begin_norm_axis=begin_norm_axis, - begin_params_axis=begin_params_axis, - scope='var', - ) - variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) - - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - self.all_layers.append(self.outputs) - self.all_params.extend(variables) diff --git a/tensorlayer/layers/object_detection.py b/tensorlayer/layers/object_detection.py deleted file mode 100644 index fde01d0..0000000 --- a/tensorlayer/layers/object_detection.py +++ /dev/null @@ -1,56 +0,0 @@ -# -*- coding: utf-8 -*- - -from .core import * -from .. import _logging as logging - -__all__ = [ - 'ROIPoolingLayer', -] - - -class ROIPoolingLayer(Layer): - """ - The region of interest pooling layer. - - Parameters - ----------- - layer : :class:`Layer` - The previous layer. - rois : tuple of int - Regions of interest in the format of (feature map index, upper left, bottom right). - pool_width : int - The size of the pooling sections. - pool_width : int - The size of the pooling sections. - name : str - A unique layer name. - - Notes - ----------- - - This implementation is imported from `Deepsense-AI `__ . - - Please install it by the instruction `HERE `__. - - """ - - def __init__( - self, - prev_layer, - rois, - pool_height=2, - pool_width=2, - name='roipooling_layer', - ): - Layer.__init__(self, prev_layer=prev_layer, name=name) - self.inputs = prev_layer.outputs - logging.info("ROIPoolingLayer %s: (%d, %d)" % (self.name, pool_height, pool_width)) - try: - from tensorlayer.third_party.roi_pooling.roi_pooling.roi_pooling_ops import roi_pooling - except Exception as e: - logging.info(e) - logging.info("HINT: 1. https://github.com/deepsense-ai/roi-pooling 2. tensorlayer/third_party/roi_pooling") - self.outputs = roi_pooling(self.inputs, rois, pool_height, pool_width) - - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - self.all_layers.append(self.outputs) diff --git a/tensorlayer/layers/padding.py b/tensorlayer/layers/padding.py deleted file mode 100644 index 1d52c08..0000000 --- a/tensorlayer/layers/padding.py +++ /dev/null @@ -1,47 +0,0 @@ -# -*- coding: utf-8 -*- - -from .core import * -from .. import _logging as logging -import tensorflow as tf - -__all__ = [ - 'PadLayer', -] - - -class PadLayer(Layer): - """ - The :class:`PadLayer` class is a padding layer for any mode and dimension. - Please see `tf.pad `__ for usage. - - Parameters - ---------- - layer : :class:`Layer` - The previous layer. - paddings : Tensor - The int32 values to pad. - mode : str - "CONSTANT", "REFLECT", or "SYMMETRIC" (case-insensitive). - name : str - A unique layer name. - - """ - - def __init__( - self, - prev_layer, - paddings, - mode='CONSTANT', - name='pad_layer', - ): - Layer.__init__(self, prev_layer=prev_layer, name=name) - assert paddings is not None, "paddings should be a Tensor of type int32. see https://www.tensorflow.org/api_docs/python/tf/pad" - self.inputs = prev_layer.outputs - logging.info("PadLayer %s: paddings:%s mode:%s" % (self.name, list(paddings), mode)) - - self.outputs = tf.pad(self.inputs, paddings=paddings, mode=mode, name=name) - - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - self.all_layers.append(self.outputs) diff --git a/tensorlayer/layers/pooling.py b/tensorlayer/layers/pooling.py deleted file mode 100644 index c17331f..0000000 --- a/tensorlayer/layers/pooling.py +++ /dev/null @@ -1,490 +0,0 @@ -# -*- coding: utf-8 -*- - -import copy -import tensorflow as tf -from .. import _logging as logging -from .core import * - -__all__ = [ - 'PoolLayer', - 'MaxPool1d', - 'MeanPool1d', - 'MaxPool2d', - 'MeanPool2d', - 'MaxPool3d', - 'MeanPool3d', - 'GlobalMaxPool1d', - 'GlobalMeanPool1d', - 'GlobalMaxPool2d', - 'GlobalMeanPool2d', -] - - -class PoolLayer(Layer): - """ - The :class:`PoolLayer` class is a Pooling layer. - You can choose ``tf.nn.max_pool`` and ``tf.nn.avg_pool`` for 2D input or - ``tf.nn.max_pool3d`` and ``tf.nn.avg_pool3d`` for 3D input. - - Parameters - ---------- - layer : :class:`Layer` - The previous layer. - ksize : tuple of int - The size of the window for each dimension of the input tensor. - Note that: len(ksize) >= 4. - strides : tuple of int - The stride of the sliding window for each dimension of the input tensor. - Note that: len(strides) >= 4. - padding : str - The padding algorithm type: "SAME" or "VALID". - pool : pooling function - One of ``tf.nn.max_pool``, ``tf.nn.avg_pool``, ``tf.nn.max_pool3d`` and ``f.nn.avg_pool3d``. - See `TensorFlow pooling APIs `__ - name : str - A unique layer name. - - Examples - -------- - - see :class:`Conv2dLayer`. - - """ - - def __init__( - self, - prev_layer=None, - ksize=(1, 2, 2, 1), - strides=(1, 2, 2, 1), - padding='SAME', - pool=tf.nn.max_pool, - name='pool_layer', - ): - Layer.__init__(self, prev_layer=prev_layer, name=name) - self.inputs = prev_layer.outputs - logging.info("PoolLayer %s: ksize:%s strides:%s padding:%s pool:%s" % (self.name, str(ksize), str(strides), padding, pool.__name__)) - - self.outputs = pool(self.inputs, ksize=ksize, strides=strides, padding=padding, name=name) - - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - self.all_layers.append(self.outputs) - - -def maxpool1d(net, filter_size=3, strides=2, padding='valid', data_format='channels_last', name=None): - """Wrapper for `tf.layers.max_pooling1d `__ . - - Parameters - ---------- - net : :class:`Layer` - The previous layer with a output rank as 3. - filter_size : tuple of int - Pooling window size. - strides : tuple of int - Strides of the pooling operation. - padding : str - The padding method: 'valid' or 'same'. - data_format : str - One of `channels_last` (default) or `channels_first`. - The ordering of the dimensions must match the inputs. - channels_last corresponds to inputs with the shape (batch, length, channels); - while channels_first corresponds to inputs with shape (batch, channels, length). - name : str - A unique layer name. - - Returns - ------- - :class:`Layer` - A max pooling 1-D layer with a output rank as 3. - - """ - logging.info("MaxPool1d %s: filter_size:%s strides:%s padding:%s" % (name, str(filter_size), str(strides), str(padding))) - outputs = tf.layers.max_pooling1d(net.outputs, filter_size, strides, padding=padding, data_format=data_format, name=name) - - net_new = copy.copy(net) - net_new.outputs = outputs - net_new.all_layers.extend([outputs]) - return net_new - - -def meanpool1d(net, filter_size=3, strides=2, padding='valid', data_format='channels_last', name=None): - """Wrapper for `tf.layers.average_pooling1d `__ . - - Parameters - ------------ - net : :class:`Layer` - The previous layer with a output rank as 3. - filter_size : tuple of int - Pooling window size. - strides : tuple of int - Strides of the pooling operation. - padding : str - The padding method: 'valid' or 'same'. - data_format : str - One of `channels_last` (default) or `channels_first`. - The ordering of the dimensions must match the inputs. - channels_last corresponds to inputs with the shape (batch, length, channels); - while channels_first corresponds to inputs with shape (batch, channels, length). - name : str - A unique layer name. - - Returns - ------- - :class:`Layer` - A mean pooling 1-D layer with a output rank as 3. - - """ - logging.info("MeanPool1d %s: filter_size:%s strides:%s padding:%s" % (name, str(filter_size), str(strides), str(padding))) - outputs = tf.layers.average_pooling1d(net.outputs, filter_size, strides, padding=padding, data_format=data_format, name=name) - - net_new = copy.copy(net) - net_new.outputs = outputs - net_new.all_layers.extend([outputs]) - return net_new - - -def maxpool2d(net, filter_size=(3, 3), strides=(2, 2), padding='SAME', name='maxpool'): - """Wrapper for :class:`PoolLayer`. - - Parameters - ----------- - net : :class:`Layer` - The previous layer with a output rank as 4. - filter_size : tuple of int - (height, width) for filter size. - strides : tuple of int - (height, width) for strides. - padding : str - The padding method: 'valid' or 'same'. - name : str - A unique layer name. - - Returns - ------- - :class:`Layer` - A max pooling 2-D layer with a output rank as 4. - - """ - if strides is None: - strides = filter_size - assert len(strides) == 2, "len(strides) should be 2, MaxPool2d and PoolLayer are different." - net = PoolLayer(net, ksize=[1, filter_size[0], filter_size[1], 1], strides=[1, strides[0], strides[1], 1], padding=padding, pool=tf.nn.max_pool, name=name) - return net - - -def meanpool2d(net, filter_size=(3, 3), strides=(2, 2), padding='SAME', name='meanpool'): - """Wrapper for :class:`PoolLayer`. - - Parameters - ----------- - layer : :class:`Layer` - The previous layer with a output rank as 4. - filter_size : tuple of int - (height, width) for filter size. - strides : tuple of int - (height, width) for strides. - padding : str - The padding method: 'valid' or 'same'. - name : str - A unique layer name. - - Returns - ------- - :class:`Layer` - A mean pooling 2-D layer with a output rank as 4. - - """ - if strides is None: - strides = filter_size - assert len(strides) == 2, "len(strides) should be 2, MeanPool2d and PoolLayer are different." - net = PoolLayer(net, ksize=[1, filter_size[0], filter_size[1], 1], strides=[1, strides[0], strides[1], 1], padding=padding, pool=tf.nn.avg_pool, name=name) - return net - - -# def maxpool3d(net, filter_size=(3, 3, 3), strides=(2, 2, 2), padding='valid', data_format='channels_last', name='maxpool3d'): -class MaxPool3d(Layer): - """Wrapper for `tf.layers.max_pooling3d `__ . - - Parameters - ------------ - layer : :class:`Layer` - The previous layer with a output rank as 5. - filter_size : tuple of int - Pooling window size. - strides : tuple of int - Strides of the pooling operation. - padding : str - The padding method: 'valid' or 'same'. - data_format : str - One of `channels_last` (default) or `channels_first`. - The ordering of the dimensions must match the inputs. - channels_last corresponds to inputs with the shape (batch, length, channels); - while channels_first corresponds to inputs with shape (batch, channels, length). - name : str - A unique layer name. - - Returns - ------- - :class:`Layer` - A max pooling 3-D layer with a output rank as 5. - - """ - - def __init__(self, prev_layer, filter_size=(3, 3, 3), strides=(2, 2, 2), padding='valid', data_format='channels_last', name='maxpool3d'): - - # check layer name (fixed) - Layer.__init__(self, prev_layer=prev_layer, name=name) - - # the input of this layer is the output of previous layer (fixed) - self.inputs = prev_layer.outputs - - logging.info("MaxPool3d %s: filter_size:%s strides:%s padding:%s" % (name, str(filter_size), str(strides), str(padding))) - - self.outputs = tf.layers.max_pooling3d(prev_layer.outputs, filter_size, strides, padding=padding, data_format=data_format, name=name) - - # get stuff from previous layer (fixed) - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - - # update layer (customized) - self.all_layers.append(self.outputs) - - -# def meanpool3d(net, filter_size=(3, 3, 3), strides=(2, 2, 2), padding='valid', data_format='channels_last', name='meanpool3d'): -class MeanPool3d(Layer): - """Wrapper for `tf.layers.average_pooling3d `__ - - Parameters - ------------ - layer : :class:`Layer` - The previous layer with a output rank as 5. - filter_size : tuple of int - Pooling window size. - strides : tuple of int - Strides of the pooling operation. - padding : str - The padding method: 'valid' or 'same'. - data_format : str - One of `channels_last` (default) or `channels_first`. - The ordering of the dimensions must match the inputs. - channels_last corresponds to inputs with the shape (batch, length, channels); - while channels_first corresponds to inputs with shape (batch, channels, length). - name : str - A unique layer name. - - Returns - ------- - :class:`Layer` - A mean pooling 3-D layer with a output rank as 5. - - """ - - def __init__(self, prev_layer, filter_size=(3, 3, 3), strides=(2, 2, 2), padding='valid', data_format='channels_last', name='meanpool3d'): - # check layer name (fixed) - Layer.__init__(self, prev_layer=prev_layer, name=name) - - # the input of this layer is the output of previous layer (fixed) - self.inputs = prev_layer.outputs - - # print out info (customized) - logging.info("MeanPool3d %s: filter_size:%s strides:%s padding:%s" % (name, str(filter_size), str(strides), str(padding))) - - # operation (customized) - self.outputs = tf.layers.average_pooling3d(prev_layer.outputs, filter_size, strides, padding=padding, data_format=data_format, name=name) - - # get stuff from previous layer (fixed) - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - - # update layer (customized) - self.all_layers.append(self.outputs) - # self.all_params.extend( [W, b] ) - - -class GlobalMaxPool1d(Layer): - """The :class:`GlobalMaxPool1d` class is a 1D Global Max Pooling layer. - - Parameters - ------------ - layer : :class:`Layer` - The previous layer with a output rank as 3. - name : str - A unique layer name. - - Examples - --------- - >>> x = tf.placeholder("float32", [None, 100, 30]) - >>> n = InputLayer(x, name='in') - >>> n = GlobalMaxPool1d(n) - ... [None, 30] - """ - - def __init__( - self, - prev_layer=None, - name='globalmaxpool1d', - ): - # check layer name (fixed) - Layer.__init__(self, prev_layer=prev_layer, name=name) - - # the input of this layer is the output of previous layer (fixed) - self.inputs = prev_layer.outputs - - # print out info (customized) - logging.info("GlobalMaxPool1d %s" % name) - - # operation (customized) - self.outputs = tf.reduce_max(prev_layer.outputs, axis=1, name=name) - - # get stuff from previous layer (fixed) - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - - # update layer (customized) - self.all_layers.append(self.outputs) - # self.all_params.extend( [W, b] ) - - -class GlobalMeanPool1d(Layer): - """The :class:`GlobalMeanPool1d` class is a 1D Global Mean Pooling layer. - - Parameters - ------------ - layer : :class:`Layer` - The previous layer with a output rank as 3. - name : str - A unique layer name. - - Examples - --------- - >>> x = tf.placeholder("float32", [None, 100, 30]) - >>> n = InputLayer(x, name='in') - >>> n = GlobalMeanPool1d(n) - ... [None, 30] - """ - - def __init__( - self, - prev_layer=None, - name='globalmeanpool1d', - ): - # check layer name (fixed) - Layer.__init__(self, prev_layer=prev_layer, name=name) - - # the input of this layer is the output of previous layer (fixed) - self.inputs = prev_layer.outputs - - # print out info (customized) - logging.info("GlobalMeanPool1d %s" % name) - - # operation (customized) - self.outputs = tf.reduce_mean(prev_layer.outputs, axis=1, name=name) - - # get stuff from previous layer (fixed) - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - - # update layer (customized) - self.all_layers.append(self.outputs) - # self.all_params.extend( [W, b] ) - - -class GlobalMaxPool2d(Layer): - """The :class:`GlobalMaxPool2d` class is a 2D Global Max Pooling layer. - - Parameters - ------------ - layer : :class:`Layer` - The previous layer with a output rank as 4. - name : str - A unique layer name. - - Examples - --------- - >>> x = tf.placeholder("float32", [None, 100, 100, 30]) - >>> n = InputLayer(x, name='in2') - >>> n = GlobalMaxPool2d(n) - ... [None, 30] - """ - - def __init__( - self, - prev_layer=None, - name='globalmaxpool2d', - ): - # check layer name (fixed) - Layer.__init__(self, prev_layer=prev_layer, name=name) - - # the input of this layer is the output of previous layer (fixed) - self.inputs = prev_layer.outputs - - # print out info (customized) - logging.info("GlobalMaxPool2d %s" % name) - - # operation (customized) - self.outputs = tf.reduce_max(prev_layer.outputs, axis=[1, 2], name=name) - - # get stuff from previous layer (fixed) - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - - # update layer (customized) - self.all_layers.append(self.outputs) - # self.all_params.extend( [W, b] ) - - -class GlobalMeanPool2d(Layer): - """The :class:`GlobalMeanPool2d` class is a 2D Global Mean Pooling layer. - - Parameters - ------------ - layer : :class:`Layer` - The previous layer with a output rank as 4. - name : str - A unique layer name. - - Examples - --------- - >>> x = tf.placeholder("float32", [None, 100, 100, 30]) - >>> n = InputLayer(x, name='in2') - >>> n = GlobalMeanPool2d(n) - ... [None, 30] - """ - - def __init__( - self, - prev_layer=None, - name='globalmeanpool2d', - ): - # check layer name (fixed) - Layer.__init__(self, prev_layer=prev_layer, name=name) - - # the input of this layer is the output of previous layer (fixed) - self.inputs = prev_layer.outputs - - # print out info (customized) - logging.info("GlobalMeanPool2d %s" % name) - - # operation (customized) - self.outputs = tf.reduce_mean(prev_layer.outputs, axis=[1, 2], name=name) - - # get stuff from previous layer (fixed) - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - - # update layer (customized) - self.all_layers.append(self.outputs) - # self.all_params.extend( [W, b] ) - - -# Alias -MaxPool1d = maxpool1d -MaxPool2d = maxpool2d -MeanPool1d = meanpool1d -MeanPool2d = meanpool2d diff --git a/tensorlayer/layers/recurrent.py b/tensorlayer/layers/recurrent.py deleted file mode 100644 index 59ec309..0000000 --- a/tensorlayer/layers/recurrent.py +++ /dev/null @@ -1,1644 +0,0 @@ -# -*- coding: utf-8 -*- - -import inspect -import tensorflow as tf -from .. import _logging as logging -from .core import * - -__all__ = [ - 'RNNLayer', - 'BiRNNLayer', - 'ConvRNNCell', - 'BasicConvLSTMCell', - 'ConvLSTMLayer', - 'advanced_indexing_op', - 'retrieve_seq_length_op', - 'retrieve_seq_length_op2', - 'retrieve_seq_length_op3', - 'target_mask_op', - 'DynamicRNNLayer', - 'BiDynamicRNNLayer', - 'Seq2Seq', -] - - -class RNNLayer(Layer): - """ - The :class:`RNNLayer` class is a fixed length recurrent layer for implementing vanilla RNN, - LSTM, GRU and etc. - - Parameters - ---------- - layer : :class:`Layer` - Previous layer. - cell_fn : TensorFlow cell function - A TensorFlow core RNN cell - - See `RNN Cells in TensorFlow `__ - - Note TF1.0+ and TF1.0- are different - cell_init_args : dictionary - The arguments for the cell function. - n_hidden : int - The number of hidden units in the layer. - initializer : initializer - The initializer for initializing the model parameters. - n_steps : int - The fixed sequence length. - initial_state : None or RNN State - If None, `initial_state` is zero state. - return_last : boolean - Whether return last output or all outputs in each step. - - If True, return the last output, "Sequence input and single output" - - If False, return all outputs, "Synced sequence input and output" - - In other word, if you want to stack more RNNs on this layer, set to False. - return_seq_2d : boolean - Only consider this argument when `return_last` is `False` - - If True, return 2D Tensor [n_example, n_hidden], for stacking DenseLayer after it. - - If False, return 3D Tensor [n_example/n_steps, n_steps, n_hidden], for stacking multiple RNN after it. - name : str - A unique layer name. - - Attributes - ---------- - outputs : Tensor - The output of this layer. - - final_state : Tensor or StateTuple - The finial state of this layer. - - When `state_is_tuple` is `False`, it is the final hidden and cell states, `states.get_shape() = [?, 2 * n_hidden]`. - - When `state_is_tuple` is `True`, it stores two elements: `(c, h)`. - - In practice, you can get the final state after each iteration during training, then feed it to the initial state of next iteration. - - initial_state : Tensor or StateTuple - The initial state of this layer. - - In practice, you can set your state at the begining of each epoch or iteration according to your training procedure. - - batch_size : int or Tensor - It is an integer, if it is able to compute the `batch_size`; otherwise, tensor for dynamic batch size. - - Examples - -------- - - For synced sequence input and output, see `PTB example `__ - - - For encoding see below. - - >>> batch_size = 32 - >>> num_steps = 5 - >>> vocab_size = 3000 - >>> hidden_size = 256 - >>> keep_prob = 0.8 - >>> is_train = True - >>> input_data = tf.placeholder(tf.int32, [batch_size, num_steps]) - >>> net = tl.layers.EmbeddingInputlayer(inputs=input_data, vocabulary_size=vocab_size, - ... embedding_size=hidden_size, name='embed') - >>> net = tl.layers.DropoutLayer(net, keep=keep_prob, is_fix=True, is_train=is_train, name='drop1') - >>> net = tl.layers.RNNLayer(net, cell_fn=tf.contrib.rnn.BasicLSTMCell, - ... n_hidden=hidden_size, n_steps=num_steps, return_last=False, name='lstm1') - >>> net = tl.layers.DropoutLayer(net, keep=keep_prob, is_fix=True, is_train=is_train, name='drop2') - >>> net = tl.layers.RNNLayer(net, cell_fn=tf.contrib.rnn.BasicLSTMCell, - ... n_hidden=hidden_size, n_steps=num_steps, return_last=True, name='lstm2') - >>> net = tl.layers.DropoutLayer(net, keep=keep_prob, is_fix=True, is_train=is_train, name='drop3') - >>> net = tl.layers.DenseLayer(net, n_units=vocab_size, name='output') - - - For CNN+LSTM - - >>> image_size = 100 - >>> batch_size = 10 - >>> num_steps = 5 - >>> x = tf.placeholder(tf.float32, shape=[batch_size, image_size, image_size, 1]) - >>> net = tl.layers.InputLayer(x, name='in') - >>> net = tl.layers.Conv2d(net, 32, (5, 5), (2, 2), tf.nn.relu, name='cnn1') - >>> net = tl.layers.MaxPool2d(net, (2, 2), (2, 2), name='pool1') - >>> net = tl.layers.Conv2d(net, 10, (5, 5), (2, 2), tf.nn.relu, name='cnn2') - >>> net = tl.layers.MaxPool2d(net, (2, 2), (2, 2), name='pool2') - >>> net = tl.layers.FlattenLayer(net, name='flatten') - >>> net = tl.layers.ReshapeLayer(net, shape=[-1, num_steps, int(net.outputs._shape[-1])]) - >>> rnn = tl.layers.RNNLayer(net, cell_fn=tf.contrib.rnn.BasicLSTMCell, n_hidden=200, n_steps=num_steps, return_last=False, return_seq_2d=True, name='rnn') - >>> net = tl.layers.DenseLayer(rnn, 3, name='out') - - Notes - ----- - Input dimension should be rank 3 : [batch_size, n_steps, n_features], if no, please see :class:`ReshapeLayer`. - - References - ---------- - - `Neural Network RNN Cells in TensorFlow `__ - - `tensorflow/python/ops/rnn.py `__ - - `tensorflow/python/ops/rnn_cell.py `__ - - see TensorFlow tutorial ``ptb_word_lm.py``, TensorLayer tutorials ``tutorial_ptb_lstm*.py`` and ``tutorial_generate_text.py`` - - """ - - def __init__( - self, - prev_layer, - cell_fn, - cell_init_args=None, - n_hidden=100, - initializer=tf.random_uniform_initializer(-0.1, 0.1), - n_steps=5, - initial_state=None, - return_last=False, - return_seq_2d=False, - name='rnn', - ): - if cell_init_args is None: - cell_init_args = {} - - Layer.__init__(self, prev_layer=prev_layer, name=name) - if cell_fn is None: - raise Exception("Please put in cell_fn") - if 'GRU' in cell_fn.__name__: - try: - cell_init_args.pop('state_is_tuple') - except Exception: - logging.warning('pop state_is_tuple fails.') - - self.inputs = prev_layer.outputs - - logging.info("RNNLayer %s: n_hidden:%d n_steps:%d in_dim:%d in_shape:%s cell_fn:%s " % (self.name, n_hidden, n_steps, self.inputs.get_shape().ndims, - self.inputs.get_shape(), cell_fn.__name__)) - # You can get the dimension by .get_shape() or ._shape, and check the - # dimension by .with_rank() as follow. - # self.inputs.get_shape().with_rank(2) - # self.inputs.get_shape().with_rank(3) - - # Input dimension should be rank 3 [batch_size, n_steps(max), n_features] - try: - self.inputs.get_shape().with_rank(3) - except Exception: - raise Exception("RNN : Input dimension should be rank 3 : [batch_size, n_steps, n_features]") - - # is_reshape : boolean (deprecate) - # Reshape the inputs to 3 dimension tensor.\n - # If input is[batch_size, n_steps, n_features], we do not need to reshape it.\n - # If input is [batch_size * n_steps, n_features], we need to reshape it. - # if is_reshape: - # self.inputs = tf.reshape(self.inputs, shape=[-1, n_steps, int(self.inputs._shape[-1])]) - - fixed_batch_size = self.inputs.get_shape().with_rank_at_least(1)[0] - - if fixed_batch_size.value: - batch_size = fixed_batch_size.value - logging.info(" RNN batch_size (concurrent processes): %d" % batch_size) - else: - from tensorflow.python.ops import array_ops - batch_size = array_ops.shape(self.inputs)[0] - logging.info(" non specified batch_size, uses a tensor instead.") - self.batch_size = batch_size - - # Simplified version of tensorflow.models.rnn.rnn.py's rnn(). - # This builds an unrolled LSTM for tutorial purposes only. - # In general, use the rnn() or state_saving_rnn() from rnn.py. - # - # The alternative version of the code below is: - # - # from tensorflow.models.rnn import rnn - # inputs = [tf.squeeze(input_, [1]) - # for input_ in tf.split(1, num_steps, inputs)] - # outputs, state = rnn.rnn(cell, inputs, initial_state=self._initial_state) - outputs = [] - if 'reuse' in inspect.getargspec(cell_fn.__init__).args: - self.cell = cell = cell_fn(num_units=n_hidden, reuse=tf.get_variable_scope().reuse, **cell_init_args) - else: - self.cell = cell = cell_fn(num_units=n_hidden, **cell_init_args) - if initial_state is None: - self.initial_state = cell.zero_state(batch_size, dtype=LayersConfig.tf_dtype) #dtype=tf.float32) # 1.2.3 - state = self.initial_state - # with tf.variable_scope("model", reuse=None, initializer=initializer): - with tf.variable_scope(name, initializer=initializer) as vs: - for time_step in range(n_steps): - if time_step > 0: tf.get_variable_scope().reuse_variables() - (cell_output, state) = cell(self.inputs[:, time_step, :], state) - outputs.append(cell_output) - - # Retrieve just the RNN variables. - # rnn_variables = [v for v in tf.all_variables() if v.name.startswith(vs.name)] - rnn_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) - - logging.info(" n_params : %d" % (len(rnn_variables))) - - if return_last: - # 2D Tensor [batch_size, n_hidden] - self.outputs = outputs[-1] - else: - if return_seq_2d: - # PTB tutorial: stack dense layer after that, or compute the cost from the output - # 2D Tensor [n_example, n_hidden] - try: # TF1.0 - self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, n_hidden]) - except Exception: # TF0.12 - self.outputs = tf.reshape(tf.concat(1, outputs), [-1, n_hidden]) - - else: - # : stack more RNN layer after that - # 3D Tensor [n_example/n_steps, n_steps, n_hidden] - try: # TF1.0 - self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, n_steps, n_hidden]) - except Exception: # TF0.12 - self.outputs = tf.reshape(tf.concat(1, outputs), [-1, n_steps, n_hidden]) - - self.final_state = state - - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - - self.all_layers.append(self.outputs) - self.all_params.extend(rnn_variables) - - -class BiRNNLayer(Layer): - """ - The :class:`BiRNNLayer` class is a fixed length Bidirectional recurrent layer. - - Parameters - ---------- - layer : :class:`Layer` - Previous layer. - cell_fn : TensorFlow cell function - A TensorFlow core RNN cell. - - See `RNN Cells in TensorFlow `__. - - Note TF1.0+ and TF1.0- are different. - cell_init_args : dictionary or None - The arguments for the cell function. - n_hidden : int - The number of hidden units in the layer. - initializer : initializer - The initializer for initializing the model parameters. - n_steps : int - The fixed sequence length. - fw_initial_state : None or forward RNN State - If None, `initial_state` is zero state. - bw_initial_state : None or backward RNN State - If None, `initial_state` is zero state. - dropout : tuple of float or int - The input and output keep probability (input_keep_prob, output_keep_prob). - If one int, input and output keep probability are the same. - n_layer : int - The number of RNN layers, default is 1. - return_last : boolean - Whether return last output or all outputs in each step. - - If True, return the last output, "Sequence input and single output" - - If False, return all outputs, "Synced sequence input and output" - - In other word, if you want to stack more RNNs on this layer, set to False. - return_seq_2d : boolean - Only consider this argument when `return_last` is `False` - - If True, return 2D Tensor [n_example, n_hidden], for stacking DenseLayer after it. - - If False, return 3D Tensor [n_example/n_steps, n_steps, n_hidden], for stacking multiple RNN after it. - name : str - A unique layer name. - - Attributes - ---------- - outputs : tensor - The output of this layer. - fw(bw)_final_state : tensor or StateTuple - The finial state of this layer. - - When `state_is_tuple` is `False`, it is the final hidden and cell states, `states.get_shape() = [?, 2 * n_hidden]`. - - When `state_is_tuple` is `True`, it stores two elements: `(c, h)`. - - In practice, you can get the final state after each iteration during training, then feed it to the initial state of next iteration. - fw(bw)_initial_state : tensor or StateTuple - The initial state of this layer. - - In practice, you can set your state at the begining of each epoch or iteration according to your training procedure. - batch_size : int or tensor - It is an integer, if it is able to compute the `batch_size`; otherwise, tensor for dynamic batch size. - - Notes - ----- - Input dimension should be rank 3 : [batch_size, n_steps, n_features]. If not, please see :class:`ReshapeLayer`. - For predicting, the sequence length has to be the same with the sequence length of training, while, for normal - RNN, we can use sequence length of 1 for predicting. - - References - ---------- - `Source `__ - - """ - - def __init__( - self, - prev_layer, - cell_fn, - cell_init_args=None, - n_hidden=100, - initializer=tf.random_uniform_initializer(-0.1, 0.1), - n_steps=5, - fw_initial_state=None, - bw_initial_state=None, - dropout=None, - n_layer=1, - return_last=False, - return_seq_2d=False, - name='birnn', - ): - if cell_init_args is None: - cell_init_args = {'state_is_tuple': True} # 'use_peepholes': True, - - Layer.__init__(self, prev_layer=prev_layer, name=name) - if cell_fn is None: - raise Exception("Please put in cell_fn") - if 'GRU' in cell_fn.__name__: - try: - cell_init_args.pop('state_is_tuple') - except Exception: - logging.warning("pop state_is_tuple fails.") - - self.inputs = prev_layer.outputs - - logging.info("BiRNNLayer %s: n_hidden:%d n_steps:%d in_dim:%d in_shape:%s cell_fn:%s dropout:%s n_layer:%d " % (self.name, n_hidden, n_steps, - self.inputs.get_shape().ndims, - self.inputs.get_shape(), - cell_fn.__name__, dropout, n_layer)) - - fixed_batch_size = self.inputs.get_shape().with_rank_at_least(1)[0] - - if fixed_batch_size.value: - self.batch_size = fixed_batch_size.value - logging.info(" RNN batch_size (concurrent processes): %d" % self.batch_size) - else: - from tensorflow.python.ops import array_ops - self.batch_size = array_ops.shape(self.inputs)[0] - logging.info(" non specified batch_size, uses a tensor instead.") - - # Input dimension should be rank 3 [batch_size, n_steps(max), n_features] - try: - self.inputs.get_shape().with_rank(3) - except Exception: - raise Exception("RNN : Input dimension should be rank 3 : [batch_size, n_steps, n_features]") - - with tf.variable_scope(name, initializer=initializer) as vs: - rnn_creator = lambda: cell_fn(num_units=n_hidden, **cell_init_args) - # Apply dropout - if dropout: - if isinstance(dropout, (tuple, list)): # type(dropout) in [tuple, list]: - in_keep_prob = dropout[0] - out_keep_prob = dropout[1] - elif isinstance(dropout, float): - in_keep_prob, out_keep_prob = dropout, dropout - else: - raise Exception("Invalid dropout type (must be a 2-D tuple of " "float)") - try: # TF 1.0 - DropoutWrapper_fn = tf.contrib.rnn.DropoutWrapper - except Exception: - DropoutWrapper_fn = tf.nn.rnn_cell.DropoutWrapper - cell_creator = lambda is_last=True: \ - DropoutWrapper_fn(rnn_creator(), - input_keep_prob=in_keep_prob, - output_keep_prob=out_keep_prob if is_last else 1.0) - else: - cell_creator = rnn_creator - self.fw_cell = cell_creator() - self.bw_cell = cell_creator() - - # Apply multiple layers - if n_layer > 1: - try: # TF1.0 - MultiRNNCell_fn = tf.contrib.rnn.MultiRNNCell - except Exception: - MultiRNNCell_fn = tf.nn.rnn_cell.MultiRNNCell - if dropout: - try: - self.fw_cell = MultiRNNCell_fn([cell_creator(is_last=i == n_layer - 1) for i in range(n_layer)], state_is_tuple=True) - self.bw_cell = MultiRNNCell_fn([cell_creator(is_last=i == n_layer - 1) for i in range(n_layer)], state_is_tuple=True) - except Exception: - self.fw_cell = MultiRNNCell_fn([cell_creator(is_last=i == n_layer - 1) for i in range(n_layer)]) - self.bw_cell = MultiRNNCell_fn([cell_creator(is_last=i == n_layer - 1) for i in range(n_layer)]) - else: - try: - self.fw_cell = MultiRNNCell_fn([cell_creator() for _ in range(n_layer)], state_is_tuple=True) - self.bw_cell = MultiRNNCell_fn([cell_creator() for _ in range(n_layer)], state_is_tuple=True) - except Exception: - self.fw_cell = MultiRNNCell_fn([cell_creator() for _ in range(n_layer)]) - self.bw_cell = MultiRNNCell_fn([cell_creator() for _ in range(n_layer)]) - - # Initial state of RNN - if fw_initial_state is None: - self.fw_initial_state = self.fw_cell.zero_state(self.batch_size, dtype=LayersConfig.tf_dtype) # dtype=tf.float32) - else: - self.fw_initial_state = fw_initial_state - if bw_initial_state is None: - self.bw_initial_state = self.bw_cell.zero_state(self.batch_size, dtype=LayersConfig.tf_dtype) # dtype=tf.float32) - else: - self.bw_initial_state = bw_initial_state - # exit() - # Feedforward to MultiRNNCell - try: # TF1.0 - list_rnn_inputs = tf.unstack(self.inputs, axis=1) - except Exception: # TF0.12 - list_rnn_inputs = tf.unpack(self.inputs, axis=1) - - try: # TF1.0 - bidirectional_rnn_fn = tf.contrib.rnn.static_bidirectional_rnn - except Exception: - bidirectional_rnn_fn = tf.nn.bidirectional_rnn - outputs, fw_state, bw_state = bidirectional_rnn_fn( # outputs, fw_state, bw_state = tf.contrib.rnn.static_bidirectional_rnn( - cell_fw=self.fw_cell, - cell_bw=self.bw_cell, - inputs=list_rnn_inputs, - initial_state_fw=self.fw_initial_state, - initial_state_bw=self.bw_initial_state) - - if return_last: - raise Exception("Do not support return_last at the moment.") - # self.outputs = outputs[-1] - else: - self.outputs = outputs - if return_seq_2d: - # 2D Tensor [n_example, n_hidden] - try: # TF1.0 - self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, n_hidden * 2]) - except Exception: # TF0.12 - self.outputs = tf.reshape(tf.concat(1, outputs), [-1, n_hidden * 2]) - else: - # : stack more RNN layer after that - # 3D Tensor [n_example/n_steps, n_steps, n_hidden] - - try: # TF1.0 - self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, n_steps, n_hidden * 2]) - except Exception: # TF0.12 - self.outputs = tf.reshape(tf.concat(1, outputs), [-1, n_steps, n_hidden * 2]) - self.fw_final_state = fw_state - self.bw_final_state = bw_state - - # Retrieve just the RNN variables. - rnn_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) - - logging.info(" n_params : %d" % (len(rnn_variables))) - - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - self.all_layers.append(self.outputs) - self.all_params.extend(rnn_variables) - - -class ConvRNNCell(object): - """Abstract object representing an Convolutional RNN Cell.""" - - def __call__(self, inputs, state, scope=None): - """Run this RNN cell on inputs, starting from the given state.""" - raise NotImplementedError("Abstract method") - - @property - def state_size(self): - """size(s) of state(s) used by this cell.""" - raise NotImplementedError("Abstract method") - - @property - def output_size(self): - """Integer or TensorShape: size of outputs produced by this cell.""" - raise NotImplementedError("Abstract method") - - def zero_state(self, batch_size, dtype=LayersConfig.tf_dtype): - """Return zero-filled state tensor(s). - Args: - batch_size: int, float, or unit Tensor representing the batch size. - Returns: - tensor of shape '[batch_size x shape[0] x shape[1] x num_features] - filled with zeros - - """ - shape = self.shape - num_features = self.num_features - # TODO : TypeError: 'NoneType' object is not subscriptable - zeros = tf.zeros([batch_size, shape[0], shape[1], num_features * 2], dtype=dtype) - return zeros - - -class BasicConvLSTMCell(ConvRNNCell): - """Basic Conv LSTM recurrent network cell. - - Parameters - ----------- - shape : tuple of int - The height and width of the cell. - filter_size : tuple of int - The height and width of the filter - num_features : int - The hidden size of the cell - forget_bias : float - The bias added to forget gates (see above). - input_size : int - Deprecated and unused. - state_is_tuple : boolen - If True, accepted and returned states are 2-tuples of the `c_state` and `m_state`. - If False, they are concatenated along the column axis. The latter behavior will soon be deprecated. - act : activation function - The activation function of this layer, tanh as default. - - """ - - def __init__(self, shape, filter_size, num_features, forget_bias=1.0, input_size=None, state_is_tuple=False, act=tf.nn.tanh): - """Initialize the basic Conv LSTM cell.""" - # if not state_is_tuple: - # logging.warn("%s: Using a concatenated state is slower and will soon be " - # "deprecated. Use state_is_tuple=True.", self) - if input_size is not None: - logging.warn("%s: The input_size parameter is deprecated.", self) - self.shape = shape - self.filter_size = filter_size - self.num_features = num_features - self._forget_bias = forget_bias - self._state_is_tuple = state_is_tuple - self._activation = act - - @property - def state_size(self): - """State size of the LSTMStateTuple.""" - return (LSTMStateTuple(self._num_units, self._num_units) if self._state_is_tuple else 2 * self._num_units) - - @property - def output_size(self): - """Number of units in outputs.""" - return self._num_units - - def __call__(self, inputs, state, scope=None): - """Long short-term memory cell (LSTM).""" - with tf.variable_scope(scope or type(self).__name__): # "BasicLSTMCell" - # Parameters of gates are concatenated into one multiply for efficiency. - if self._state_is_tuple: - c, h = state - else: - # print state - # c, h = tf.split(3, 2, state) - c, h = tf.split(state, 2, 3) - concat = _conv_linear([inputs, h], self.filter_size, self.num_features * 4, True) - - # i = input_gate, j = new_input, f = forget_gate, o = output_gate - # i, j, f, o = tf.split(3, 4, concat) - i, j, f, o = tf.split(concat, 4, 3) - - new_c = (c * tf.nn.sigmoid(f + self._forget_bias) + tf.nn.sigmoid(i) * self._activation(j)) - new_h = self._activation(new_c) * tf.nn.sigmoid(o) - - if self._state_is_tuple: - new_state = LSTMStateTuple(new_c, new_h) - else: - new_state = tf.concat([new_c, new_h], 3) - return new_h, new_state - - -def _conv_linear(args, filter_size, num_features, bias, bias_start=0.0, scope=None): - """convolution: - - Parameters - ---------- - args : tensor - 4D Tensor or a list of 4D, batch x n, Tensors. - filter_size : tuple of int - Filter height and width. - num_features : int - Nnumber of features. - bias_start : float - Starting value to initialize the bias; 0 by default. - scope : VariableScope - For the created subgraph; defaults to "Linear". - - Returns - -------- - - A 4D Tensor with shape [batch h w num_features] - - Raises - ------- - - ValueError : if some of the arguments has unspecified or wrong shape. - - """ - # Calculate the total size of arguments on dimension 1. - total_arg_size_depth = 0 - shapes = [a.get_shape().as_list() for a in args] - for shape in shapes: - if len(shape) != 4: - raise ValueError("Linear is expecting 4D arguments: %s" % str(shapes)) - if not shape[3]: - raise ValueError("Linear expects shape[4] of arguments: %s" % str(shapes)) - else: - total_arg_size_depth += shape[3] - - dtype = [a.dtype for a in args][0] - - # Now the computation. - with tf.variable_scope(scope or "Conv"): - matrix = tf.get_variable("Matrix", [filter_size[0], filter_size[1], total_arg_size_depth, num_features], dtype=dtype) - if len(args) == 1: - res = tf.nn.conv2d(args[0], matrix, strides=[1, 1, 1, 1], padding='SAME') - else: - res = tf.nn.conv2d(tf.concat(args, 3), matrix, strides=[1, 1, 1, 1], padding='SAME') - if not bias: - return res - bias_term = tf.get_variable("Bias", [num_features], dtype=dtype, initializer=tf.constant_initializer(bias_start, dtype=dtype)) - return res + bias_term - - -class ConvLSTMLayer(Layer): - """A fixed length Convolutional LSTM layer. - - See this `paper `__ . - - Parameters - ---------- - layer : :class:`Layer` - Previous layer - cell_shape : tuple of int - The shape of each cell width * height - filter_size : tuple of int - The size of filter width * height - cell_fn : a convolutional RNN cell - Cell function like :class:`BasicConvLSTMCell` - feature_map : int - The number of feature map in the layer. - initializer : initializer - The initializer for initializing the parameters. - n_steps : int - The sequence length. - initial_state : None or ConvLSTM State - If None, `initial_state` is zero state. - return_last : boolean - Whether return last output or all outputs in each step. - - If True, return the last output, "Sequence input and single output". - - If False, return all outputs, "Synced sequence input and output". - - In other word, if you want to stack more RNNs on this layer, set to False. - return_seq_2d : boolean - Only consider this argument when `return_last` is `False` - - If True, return 2D Tensor [n_example, n_hidden], for stacking DenseLayer after it. - - If False, return 3D Tensor [n_example/n_steps, n_steps, n_hidden], for stacking multiple RNN after it. - name : str - A unique layer name. - - Attributes - ---------- - outputs : tensor - The output of this RNN. return_last = False, outputs = all cell_output, which is the hidden state. - cell_output.get_shape() = (?, h, w, c]) - - final_state : tensor or StateTuple - The finial state of this layer. - - When state_is_tuple = False, it is the final hidden and cell states, - - When state_is_tuple = True, You can get the final state after each iteration during training, then feed it to the initial state of next iteration. - - initial_state : tensor or StateTuple - It is the initial state of this ConvLSTM layer, you can use it to initialize - your state at the beginning of each epoch or iteration according to your - training procedure. - - batch_size : int or tensor - Is int, if able to compute the batch_size, otherwise, tensor for ``?``. - - """ - - def __init__( - self, - prev_layer, - cell_shape=None, - feature_map=1, - filter_size=(3, 3), - cell_fn=BasicConvLSTMCell, - initializer=tf.random_uniform_initializer(-0.1, 0.1), - n_steps=5, - initial_state=None, - return_last=False, - return_seq_2d=False, - name='convlstm', - ): - Layer.__init__(self, prev_layer=prev_layer, name=name) - self.inputs = prev_layer.outputs - logging.info("ConvLSTMLayer %s: feature_map:%d, n_steps:%d, " - "in_dim:%d %s, cell_fn:%s " % (self.name, feature_map, n_steps, self.inputs.get_shape().ndims, self.inputs.get_shape(), cell_fn.__name__)) - # You can get the dimension by .get_shape() or ._shape, and check the - # dimension by .with_rank() as follow. - # self.inputs.get_shape().with_rank(2) - # self.inputs.get_shape().with_rank(3) - - # Input dimension should be rank 5 [batch_size, n_steps(max), h, w, c] - try: - self.inputs.get_shape().with_rank(5) - except Exception: - raise Exception("RNN : Input dimension should be rank 5 : [batch_size, n_steps, input_x, " "input_y, feature_map]") - - fixed_batch_size = self.inputs.get_shape().with_rank_at_least(1)[0] - - if fixed_batch_size.value: - batch_size = fixed_batch_size.value - logging.info(" RNN batch_size (concurrent processes): %d" % batch_size) - else: - from tensorflow.python.ops import array_ops - batch_size = array_ops.shape(self.inputs)[0] - logging.info(" non specified batch_size, uses a tensor instead.") - self.batch_size = batch_size - outputs = [] - self.cell = cell = cell_fn(shape=cell_shape, filter_size=filter_size, num_features=feature_map) - if initial_state is None: - self.initial_state = cell.zero_state(batch_size, dtype=LayersConfig.tf_dtype) # dtype=tf.float32) # 1.2.3 - state = self.initial_state - # with tf.variable_scope("model", reuse=None, initializer=initializer): - with tf.variable_scope(name, initializer=initializer) as vs: - for time_step in range(n_steps): - if time_step > 0: tf.get_variable_scope().reuse_variables() - (cell_output, state) = cell(self.inputs[:, time_step, :, :, :], state) - outputs.append(cell_output) - - # Retrieve just the RNN variables. - # rnn_variables = [v for v in tf.all_variables() if v.name.startswith(vs.name)] - rnn_variables = tf.get_collection(tf.GraphKeys.VARIABLES, scope=vs.name) - - logging.info(" n_params : %d" % (len(rnn_variables))) - - if return_last: - # 2D Tensor [batch_size, n_hidden] - self.outputs = outputs[-1] - else: - if return_seq_2d: - # PTB tutorial: stack dense layer after that, or compute the cost from the output - # 4D Tensor [n_example, h, w, c] - self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, cell_shape[0] * cell_shape[1] * feature_map]) - else: - # : stack more RNN layer after that - # 5D Tensor [n_example/n_steps, n_steps, h, w, c] - self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, n_steps, cell_shape[0], cell_shape[1], feature_map]) - - self.final_state = state - - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - self.all_layers.append(self.outputs) - self.all_params.extend(rnn_variables) - - -# Advanced Ops for Dynamic RNN -def advanced_indexing_op(inputs, index): - """Advanced Indexing for Sequences, returns the outputs by given sequence lengths. - When return the last output :class:`DynamicRNNLayer` uses it to get the last outputs with the sequence lengths. - - Parameters - ----------- - inputs : tensor for data - With shape of [batch_size, n_step(max), n_features] - index : tensor for indexing - Sequence length in Dynamic RNN. [batch_size] - - Examples - --------- - >>> batch_size, max_length, n_features = 3, 5, 2 - >>> z = np.random.uniform(low=-1, high=1, size=[batch_size, max_length, n_features]).astype(np.float32) - >>> b_z = tf.constant(z) - >>> sl = tf.placeholder(dtype=tf.int32, shape=[batch_size]) - >>> o = advanced_indexing_op(b_z, sl) - >>> - >>> sess = tf.InteractiveSession() - >>> tl.layers.initialize_global_variables(sess) - >>> - >>> order = np.asarray([1,1,2]) - >>> print("real",z[0][order[0]-1], z[1][order[1]-1], z[2][order[2]-1]) - >>> y = sess.run([o], feed_dict={sl:order}) - >>> print("given",order) - >>> print("out", y) - ... real [-0.93021595 0.53820813] [-0.92548317 -0.77135968] [ 0.89952248 0.19149846] - ... given [1 1 2] - ... out [array([[-0.93021595, 0.53820813], - ... [-0.92548317, -0.77135968], - ... [ 0.89952248, 0.19149846]], dtype=float32)] - - References - ----------- - - Modified from TFlearn (the original code is used for fixed length rnn), `references `__. - - """ - batch_size = tf.shape(inputs)[0] - # max_length = int(inputs.get_shape()[1]) # for fixed length rnn, length is given - max_length = tf.shape(inputs)[1] # for dynamic_rnn, length is unknown - dim_size = int(inputs.get_shape()[2]) - index = tf.range(0, batch_size) * max_length + (index - 1) - flat = tf.reshape(inputs, [-1, dim_size]) - relevant = tf.gather(flat, index) - return relevant - - -def retrieve_seq_length_op(data): - """An op to compute the length of a sequence from input shape of [batch_size, n_step(max), n_features], - it can be used when the features of padding (on right hand side) are all zeros. - - Parameters - ----------- - data : tensor - [batch_size, n_step(max), n_features] with zero padding on right hand side. - - Examples - --------- - >>> data = [[[1],[2],[0],[0],[0]], - ... [[1],[2],[3],[0],[0]], - ... [[1],[2],[6],[1],[0]]] - >>> data = np.asarray(data) - >>> print(data.shape) - ... (3, 5, 1) - >>> data = tf.constant(data) - >>> sl = retrieve_seq_length_op(data) - >>> sess = tf.InteractiveSession() - >>> tl.layers.initialize_global_variables(sess) - >>> y = sl.eval() - ... [2 3 4] - - Multiple features - >>> data = [[[1,2],[2,2],[1,2],[1,2],[0,0]], - ... [[2,3],[2,4],[3,2],[0,0],[0,0]], - ... [[3,3],[2,2],[5,3],[1,2],[0,0]]] - >>> print(sl) - ... [4 3 4] - - References - ------------ - Borrow from `TFlearn `__. - - """ - with tf.name_scope('GetLength'): - # TF 1.0 change reduction_indices to axis - used = tf.sign(tf.reduce_max(tf.abs(data), 2)) - length = tf.reduce_sum(used, 1) - # TF < 1.0 - # used = tf.sign(tf.reduce_max(tf.abs(data), reduction_indices=2)) - # length = tf.reduce_sum(used, reduction_indices=1) - length = tf.cast(length, tf.int32) - return length - - -def retrieve_seq_length_op2(data): - """An op to compute the length of a sequence, from input shape of [batch_size, n_step(max)], - it can be used when the features of padding (on right hand side) are all zeros. - - Parameters - ----------- - data : tensor - [batch_size, n_step(max)] with zero padding on right hand side. - - Examples - -------- - >>> data = [[1,2,0,0,0], - ... [1,2,3,0,0], - ... [1,2,6,1,0]] - >>> o = retrieve_seq_length_op2(data) - >>> sess = tf.InteractiveSession() - >>> tl.layers.initialize_global_variables(sess) - >>> print(o.eval()) - ... [2 3 4] - - """ - return tf.reduce_sum(tf.cast(tf.greater(data, tf.zeros_like(data)), tf.int32), 1) - - -def retrieve_seq_length_op3(data, pad_val=0): # HangSheng: return tensor for sequence length, if input is tf.string - """Return tensor for sequence length, if input is ``tf.string``. - - """ - data_shape_size = data.get_shape().ndims - if data_shape_size == 3: - return tf.reduce_sum(tf.cast(tf.reduce_any(tf.not_equal(data, pad_val), axis=2), dtype=tf.int32), 1) - elif data_shape_size == 2: - return tf.reduce_sum(tf.cast(tf.not_equal(data, pad_val), dtype=tf.int32), 1) - elif data_shape_size == 1: - raise ValueError("retrieve_seq_length_op3: data has wrong shape!") - else: - raise ValueError("retrieve_seq_length_op3: handling data_shape_size %s hasn't been implemented!" % (data_shape_size)) - - -def target_mask_op(data, pad_val=0): # HangSheng: return tensor for mask,if input is tf.string - """Return tensor for mask, if input is ``tf.string``. - - """ - data_shape_size = data.get_shape().ndims - if data_shape_size == 3: - return tf.cast(tf.reduce_any(tf.not_equal(data, pad_val), axis=2), dtype=tf.int32) - elif data_shape_size == 2: - return tf.cast(tf.not_equal(data, pad_val), dtype=tf.int32) - elif data_shape_size == 1: - raise ValueError("target_mask_op: data has wrong shape!") - else: - raise ValueError("target_mask_op: handling data_shape_size %s hasn't been implemented!" % (data_shape_size)) - - -class DynamicRNNLayer(Layer): - """ - The :class:`DynamicRNNLayer` class is a dynamic recurrent layer, see ``tf.nn.dynamic_rnn``. - - Parameters - ---------- - layer : :class:`Layer` - Previous layer - cell_fn : TensorFlow cell function - A TensorFlow core RNN cell - - See `RNN Cells in TensorFlow `__ - - Note TF1.0+ and TF1.0- are different - cell_init_args : dictionary or None - The arguments for the cell function. - n_hidden : int - The number of hidden units in the layer. - initializer : initializer - The initializer for initializing the parameters. - sequence_length : tensor, array or None - The sequence length of each row of input data, see ``Advanced Ops for Dynamic RNN``. - - If None, it uses ``retrieve_seq_length_op`` to compute the sequence length, i.e. when the features of padding (on right hand side) are all zeros. - - If using word embedding, you may need to compute the sequence length from the ID array (the integer features before word embedding) by using ``retrieve_seq_length_op2`` or ``retrieve_seq_length_op``. - - You can also input an numpy array. - - More details about TensorFlow dynamic RNN in `Wild-ML Blog `__. - initial_state : None or RNN State - If None, `initial_state` is zero state. - dropout : tuple of float or int - The input and output keep probability (input_keep_prob, output_keep_prob). - - If one int, input and output keep probability are the same. - n_layer : int - The number of RNN layers, default is 1. - return_last : boolean or None - Whether return last output or all outputs in each step. - - If True, return the last output, "Sequence input and single output" - - If False, return all outputs, "Synced sequence input and output" - - In other word, if you want to stack more RNNs on this layer, set to False. - return_seq_2d : boolean - Only consider this argument when `return_last` is `False` - - If True, return 2D Tensor [n_example, n_hidden], for stacking DenseLayer after it. - - If False, return 3D Tensor [n_example/n_steps, n_steps, n_hidden], for stacking multiple RNN after it. - dynamic_rnn_init_args : dictionary - The arguments for ``tf.nn.dynamic_rnn``. - name : str - A unique layer name. - - Attributes - ------------ - outputs : tensor - The output of this layer. - - final_state : tensor or StateTuple - The finial state of this layer. - - When `state_is_tuple` is `False`, it is the final hidden and cell states, `states.get_shape() = [?, 2 * n_hidden]`. - - When `state_is_tuple` is `True`, it stores two elements: `(c, h)`. - - In practice, you can get the final state after each iteration during training, then feed it to the initial state of next iteration. - - initial_state : tensor or StateTuple - The initial state of this layer. - - In practice, you can set your state at the begining of each epoch or iteration according to your training procedure. - - batch_size : int or tensor - It is an integer, if it is able to compute the `batch_size`; otherwise, tensor for dynamic batch size. - - sequence_length : a tensor or array - The sequence lengths computed by Advanced Opt or the given sequence lengths, [batch_size] - - Notes - ----- - Input dimension should be rank 3 : [batch_size, n_steps(max), n_features], if no, please see :class:`ReshapeLayer`. - - Examples - -------- - Synced sequence input and output, for loss function see ``tl.cost.cross_entropy_seq_with_mask``. - - >>> input_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="input") - >>> net = tl.layers.EmbeddingInputlayer( - ... inputs=input_seqs, - ... vocabulary_size=vocab_size, - ... embedding_size=embedding_size, - ... name='embedding') - >>> net = tl.layers.DynamicRNNLayer(net, - ... cell_fn=tf.contrib.rnn.BasicLSTMCell, # for TF0.2 use tf.nn.rnn_cell.BasicLSTMCell, - ... n_hidden=embedding_size, - ... dropout=(0.7 if is_train else None), - ... sequence_length=tl.layers.retrieve_seq_length_op2(input_seqs), - ... return_last=False, # for encoder, set to True - ... return_seq_2d=True, # stack denselayer or compute cost after it - ... name='dynamicrnn') - ... net = tl.layers.DenseLayer(net, n_units=vocab_size, name="output") - - References - ---------- - - `Wild-ML Blog `__ - - `dynamic_rnn.ipynb `__ - - `tf.nn.dynamic_rnn `__ - - `tflearn rnn `__ - - ``tutorial_dynamic_rnn.py`` - - """ - - def __init__( - self, - prev_layer, - cell_fn, #tf.nn.rnn_cell.LSTMCell, - cell_init_args=None, - n_hidden=256, - initializer=tf.random_uniform_initializer(-0.1, 0.1), - sequence_length=None, - initial_state=None, - dropout=None, - n_layer=1, - return_last=None, - return_seq_2d=False, - dynamic_rnn_init_args=None, - name='dyrnn', - ): - if dynamic_rnn_init_args is None: - dynamic_rnn_init_args = {} - if cell_init_args is None: - cell_init_args = {'state_is_tuple': True} - if return_last is None: - return_last = True - - Layer.__init__(self, prev_layer=prev_layer, name=name) - if cell_fn is None: - raise Exception("Please put in cell_fn") - if 'GRU' in cell_fn.__name__: - try: - cell_init_args.pop('state_is_tuple') - except Exception: - logging.warning("pop state_is_tuple fails.") - self.inputs = prev_layer.outputs - - logging.info("DynamicRNNLayer %s: n_hidden:%d, in_dim:%d in_shape:%s cell_fn:%s dropout:%s n_layer:%d" % - (self.name, n_hidden, self.inputs.get_shape().ndims, self.inputs.get_shape(), cell_fn.__name__, dropout, n_layer)) - - # Input dimension should be rank 3 [batch_size, n_steps(max), n_features] - try: - self.inputs.get_shape().with_rank(3) - except Exception: - raise Exception("RNN : Input dimension should be rank 3 : [batch_size, n_steps(max), n_features]") - - # Get the batch_size - fixed_batch_size = self.inputs.get_shape().with_rank_at_least(1)[0] - if fixed_batch_size.value: - batch_size = fixed_batch_size.value - logging.info(" batch_size (concurrent processes): %d" % batch_size) - else: - from tensorflow.python.ops import array_ops - batch_size = array_ops.shape(self.inputs)[0] - logging.info(" non specified batch_size, uses a tensor instead.") - self.batch_size = batch_size - - # Creats the cell function - # cell_instance_fn=lambda: cell_fn(num_units=n_hidden, **cell_init_args) # HanSheng - rnn_creator = lambda: cell_fn(num_units=n_hidden, **cell_init_args) - - # Apply dropout - if dropout: - if isinstance(dropout, (tuple, list)): - in_keep_prob = dropout[0] - out_keep_prob = dropout[1] - elif isinstance(dropout, float): - in_keep_prob, out_keep_prob = dropout, dropout - else: - raise Exception("Invalid dropout type (must be a 2-D tuple of " "float)") - try: # TF1.0 - DropoutWrapper_fn = tf.contrib.rnn.DropoutWrapper - except Exception: - DropoutWrapper_fn = tf.nn.rnn_cell.DropoutWrapper - - # cell_instance_fn1=cell_instance_fn # HanSheng - # cell_instance_fn=DropoutWrapper_fn( - # cell_instance_fn1(), - # input_keep_prob=in_keep_prob, - # output_keep_prob=out_keep_prob) - cell_creator = lambda is_last=True: \ - DropoutWrapper_fn(rnn_creator(), - input_keep_prob=in_keep_prob, - output_keep_prob=out_keep_prob if is_last else 1.0) - else: - cell_creator = rnn_creator - self.cell = cell_creator() - # Apply multiple layers - if n_layer > 1: - try: - MultiRNNCell_fn = tf.contrib.rnn.MultiRNNCell - except Exception: - MultiRNNCell_fn = tf.nn.rnn_cell.MultiRNNCell - - # cell_instance_fn2=cell_instance_fn # HanSheng - if dropout: - try: - # cell_instance_fn=lambda: MultiRNNCell_fn([cell_instance_fn2() for _ in range(n_layer)], state_is_tuple=True) # HanSheng - self.cell = MultiRNNCell_fn([cell_creator(is_last=i == n_layer - 1) for i in range(n_layer)], state_is_tuple=True) - except Exception: # when GRU - # cell_instance_fn=lambda: MultiRNNCell_fn([cell_instance_fn2() for _ in range(n_layer)]) # HanSheng - self.cell = MultiRNNCell_fn([cell_creator(is_last=i == n_layer - 1) for i in range(n_layer)]) - else: - try: - self.cell = MultiRNNCell_fn([cell_creator() for _ in range(n_layer)], state_is_tuple=True) - except Exception: # when GRU - self.cell = MultiRNNCell_fn([cell_creator() for _ in range(n_layer)]) - - # self.cell=cell_instance_fn() # HanSheng - - # Initialize initial_state - if initial_state is None: - self.initial_state = self.cell.zero_state(batch_size, dtype=LayersConfig.tf_dtype) # dtype=tf.float32) - else: - self.initial_state = initial_state - - # Computes sequence_length - if sequence_length is None: - try: # TF1.0 - sequence_length = retrieve_seq_length_op(self.inputs if isinstance(self.inputs, tf.Tensor) else tf.stack(self.inputs)) - except Exception: # TF0.12 - sequence_length = retrieve_seq_length_op(self.inputs if isinstance(self.inputs, tf.Tensor) else tf.pack(self.inputs)) - - # Main - Computes outputs and last_states - with tf.variable_scope(name, initializer=initializer) as vs: - outputs, last_states = tf.nn.dynamic_rnn( - cell=self.cell, - # inputs=X - inputs=self.inputs, - # dtype=tf.float64, - sequence_length=sequence_length, - initial_state=self.initial_state, - **dynamic_rnn_init_args) - rnn_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) - - # logging.info(" n_params : %d" % (len(rnn_variables))) - # Manage the outputs - if return_last: - # [batch_size, n_hidden] - # outputs = tf.transpose(tf.pack(outputs), [1, 0, 2]) # TF1.0 tf.pack --> tf.stack - self.outputs = advanced_indexing_op(outputs, sequence_length) - else: - # [batch_size, n_step(max), n_hidden] - # self.outputs = result[0]["outputs"] - # self.outputs = outputs # it is 3d, but it is a list - if return_seq_2d: - # PTB tutorial: - # 2D Tensor [n_example, n_hidden] - try: # TF1.0 - self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, n_hidden]) - except Exception: # TF0.12 - self.outputs = tf.reshape(tf.concat(1, outputs), [-1, n_hidden]) - else: - # : - # 3D Tensor [batch_size, n_steps(max), n_hidden] - max_length = tf.shape(outputs)[1] - batch_size = tf.shape(outputs)[0] - - try: # TF1.0 - self.outputs = tf.reshape(tf.concat(outputs, 1), [batch_size, max_length, n_hidden]) - except Exception: # TF0.12 - self.outputs = tf.reshape(tf.concat(1, outputs), [batch_size, max_length, n_hidden]) - # self.outputs = tf.reshape(tf.concat(1, outputs), [-1, max_length, n_hidden]) - - # Final state - self.final_state = last_states - - self.sequence_length = sequence_length - - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - - self.all_layers.append(self.outputs) - self.all_params.extend(rnn_variables) - - -class BiDynamicRNNLayer(Layer): - """ - The :class:`BiDynamicRNNLayer` class is a RNN layer, you can implement vanilla RNN, - LSTM and GRU with it. - - Parameters - ---------- - layer : :class:`Layer` - Previous layer. - cell_fn : TensorFlow cell function - A TensorFlow core RNN cell - - See `RNN Cells in TensorFlow `__. - - Note TF1.0+ and TF1.0- are different. - cell_init_args : dictionary - The arguments for the cell initializer. - n_hidden : int - The number of hidden units in the layer. - initializer : initializer - The initializer for initializing the parameters. - sequence_length : tensor, array or None - The sequence length of each row of input data, see ``Advanced Ops for Dynamic RNN``. - - If None, it uses ``retrieve_seq_length_op`` to compute the sequence length, i.e. when the features of padding (on right hand side) are all zeros. - - If using word embedding, you may need to compute the sequence length from the ID array (the integer features before word embedding) by using ``retrieve_seq_length_op2`` or ``retrieve_seq_length_op``. - - You can also input an numpy array. - - More details about TensorFlow dynamic RNN in `Wild-ML Blog `__. - fw_initial_state : None or forward RNN State - If None, `initial_state` is zero state. - bw_initial_state : None or backward RNN State - If None, `initial_state` is zero state. - dropout : tuple of float or int - The input and output keep probability (input_keep_prob, output_keep_prob). - - If one int, input and output keep probability are the same. - n_layer : int - The number of RNN layers, default is 1. - return_last : boolean - Whether return last output or all outputs in each step. - - If True, return the last output, "Sequence input and single output" - - If False, return all outputs, "Synced sequence input and output" - - In other word, if you want to stack more RNNs on this layer, set to False. - return_seq_2d : boolean - Only consider this argument when `return_last` is `False` - - If True, return 2D Tensor [n_example, 2 * n_hidden], for stacking DenseLayer after it. - - If False, return 3D Tensor [n_example/n_steps, n_steps, 2 * n_hidden], for stacking multiple RNN after it. - dynamic_rnn_init_args : dictionary - The arguments for ``tf.nn.bidirectional_dynamic_rnn``. - name : str - A unique layer name. - - Attributes - ----------------------- - outputs : tensor - The output of this layer. (?, 2 * n_hidden) - - fw(bw)_final_state : tensor or StateTuple - The finial state of this layer. - - When `state_is_tuple` is `False`, it is the final hidden and cell states, `states.get_shape() = [?, 2 * n_hidden]`. - - When `state_is_tuple` is `True`, it stores two elements: `(c, h)`. - - In practice, you can get the final state after each iteration during training, then feed it to the initial state of next iteration. - - fw(bw)_initial_state : tensor or StateTuple - The initial state of this layer. - - In practice, you can set your state at the begining of each epoch or iteration according to your training procedure. - - batch_size : int or tensor - It is an integer, if it is able to compute the `batch_size`; otherwise, tensor for dynamic batch size. - - sequence_length : a tensor or array - The sequence lengths computed by Advanced Opt or the given sequence lengths, [batch_size]. - - Notes - ----- - Input dimension should be rank 3 : [batch_size, n_steps(max), n_features], if no, please see :class:`ReshapeLayer`. - - References - ---------- - - `Wild-ML Blog `__ - - `bidirectional_rnn.ipynb `__ - - """ - - def __init__( - self, - prev_layer, - cell_fn, #tf.nn.rnn_cell.LSTMCell, - cell_init_args=None, - n_hidden=256, - initializer=tf.random_uniform_initializer(-0.1, 0.1), - sequence_length=None, - fw_initial_state=None, - bw_initial_state=None, - dropout=None, - n_layer=1, - return_last=False, - return_seq_2d=False, - dynamic_rnn_init_args=None, - name='bi_dyrnn_layer', - ): - if cell_init_args is None: - cell_init_args = {'state_is_tuple': True} - if dynamic_rnn_init_args is None: - dynamic_rnn_init_args = {} - - Layer.__init__(self, prev_layer=prev_layer, name=name) - if cell_fn is None: - raise Exception("Please put in cell_fn") - if 'GRU' in cell_fn.__name__: - try: - cell_init_args.pop('state_is_tuple') - except Exception: - logging.warning("pop state_is_tuple fails.") - self.inputs = prev_layer.outputs - - logging.info("BiDynamicRNNLayer %s: n_hidden:%d in_dim:%d in_shape:%s cell_fn:%s dropout:%s n_layer:%d" % - (self.name, n_hidden, self.inputs.get_shape().ndims, self.inputs.get_shape(), cell_fn.__name__, dropout, n_layer)) - - # Input dimension should be rank 3 [batch_size, n_steps(max), n_features] - try: - self.inputs.get_shape().with_rank(3) - except Exception: - raise Exception("RNN : Input dimension should be rank 3 : [batch_size, n_steps(max), n_features]") - - # Get the batch_size - fixed_batch_size = self.inputs.get_shape().with_rank_at_least(1)[0] - if fixed_batch_size.value: - batch_size = fixed_batch_size.value - logging.info(" batch_size (concurrent processes): %d" % batch_size) - else: - from tensorflow.python.ops import array_ops - batch_size = array_ops.shape(self.inputs)[0] - logging.info(" non specified batch_size, uses a tensor instead.") - self.batch_size = batch_size - - with tf.variable_scope(name, initializer=initializer) as vs: - # Creats the cell function - # cell_instance_fn=lambda: cell_fn(num_units=n_hidden, **cell_init_args) # HanSheng - rnn_creator = lambda: cell_fn(num_units=n_hidden, **cell_init_args) - - # Apply dropout - if dropout: - if isinstance(dropout, (tuple, list)): - in_keep_prob = dropout[0] - out_keep_prob = dropout[1] - elif isinstance(dropout, float): - in_keep_prob, out_keep_prob = dropout, dropout - else: - raise Exception("Invalid dropout type (must be a 2-D tuple of " "float)") - try: - DropoutWrapper_fn = tf.contrib.rnn.DropoutWrapper - except Exception: - DropoutWrapper_fn = tf.nn.rnn_cell.DropoutWrapper - - # cell_instance_fn1=cell_instance_fn # HanSheng - # cell_instance_fn=lambda: DropoutWrapper_fn( - # cell_instance_fn1(), - # input_keep_prob=in_keep_prob, - # output_keep_prob=out_keep_prob) - cell_creator = lambda is_last=True: \ - DropoutWrapper_fn(rnn_creator(), - input_keep_prob=in_keep_prob, - output_keep_prob=out_keep_prob if is_last else 1.0) - else: - cell_creator = rnn_creator - - # if dropout: - # self.fw_cell = DropoutWrapper_fn(self.fw_cell, input_keep_prob=1.0, output_keep_prob=out_keep_prob) - # self.bw_cell = DropoutWrapper_fn(self.bw_cell, input_keep_prob=1.0, output_keep_prob=out_keep_prob) - - # self.fw_cell=cell_instance_fn() - # self.bw_cell=cell_instance_fn() - # Initial state of RNN - - self.fw_initial_state = fw_initial_state - self.bw_initial_state = bw_initial_state - # Computes sequence_length - if sequence_length is None: - try: # TF1.0 - sequence_length = retrieve_seq_length_op(self.inputs if isinstance(self.inputs, tf.Tensor) else tf.stack(self.inputs)) - except Exception: # TF0.12 - sequence_length = retrieve_seq_length_op(self.inputs if isinstance(self.inputs, tf.Tensor) else tf.pack(self.inputs)) - - if n_layer > 1: - if dropout: - self.fw_cell = [cell_creator(is_last=i == n_layer - 1) for i in range(n_layer)] - self.bw_cell = [cell_creator(is_last=i == n_layer - 1) for i in range(n_layer)] - else: - self.fw_cell = [cell_creator() for _ in range(n_layer)] - self.bw_cell = [cell_creator() for _ in range(n_layer)] - from tensorflow.contrib.rnn import stack_bidirectional_dynamic_rnn - outputs, states_fw, states_bw = stack_bidirectional_dynamic_rnn( - cells_fw=self.fw_cell, - cells_bw=self.bw_cell, - inputs=self.inputs, - sequence_length=sequence_length, - initial_states_fw=self.fw_initial_state, - initial_states_bw=self.bw_initial_state, - dtype=LayersConfig.tf_dtype, - **dynamic_rnn_init_args) - - else: - self.fw_cell = cell_creator() - self.bw_cell = cell_creator() - outputs, (states_fw, states_bw) = tf.nn.bidirectional_dynamic_rnn( - cell_fw=self.fw_cell, - cell_bw=self.bw_cell, - inputs=self.inputs, - sequence_length=sequence_length, - initial_state_fw=self.fw_initial_state, - initial_state_bw=self.bw_initial_state, - dtype=LayersConfig.tf_dtype, - **dynamic_rnn_init_args) - - rnn_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) - - logging.info(" n_params : %d" % (len(rnn_variables))) - - # Manage the outputs - try: # TF1.0 - outputs = tf.concat(outputs, 2) - except Exception: # TF0.12 - outputs = tf.concat(2, outputs) - - if return_last: - # [batch_size, 2 * n_hidden] - raise NotImplementedError("Return last is not implemented yet.") - # self.outputs = advanced_indexing_op(outputs, sequence_length) - else: - # [batch_size, n_step(max), 2 * n_hidden] - if return_seq_2d: - # PTB tutorial: - # 2D Tensor [n_example, 2 * n_hidden] - try: # TF1.0 - self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, 2 * n_hidden]) - except Exception: # TF0.12 - self.outputs = tf.reshape(tf.concat(1, outputs), [-1, 2 * n_hidden]) - else: - # : - # 3D Tensor [batch_size, n_steps(max), 2 * n_hidden] - max_length = tf.shape(outputs)[1] - batch_size = tf.shape(outputs)[0] - try: # TF1.0 - self.outputs = tf.reshape(tf.concat(outputs, 1), [batch_size, max_length, 2 * n_hidden]) - except Exception: # TF0.12 - self.outputs = tf.reshape(tf.concat(1, outputs), [batch_size, max_length, 2 * n_hidden]) - - # Final state - self.fw_final_states = states_fw - self.bw_final_states = states_bw - - self.sequence_length = sequence_length - - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - - self.all_layers.append(self.outputs) - self.all_params.extend(rnn_variables) - - -class Seq2Seq(Layer): - """ - The :class:`Seq2Seq` class is a simple :class:`DynamicRNNLayer` based Seq2seq layer without using `tl.contrib.seq2seq `__. - See `Model `__ - and `Sequence to Sequence Learning with Neural Networks `__. - - - Please check this example `Chatbot in 200 lines of code `__. - - The Author recommends users to read the source code of :class:`DynamicRNNLayer` and :class:`Seq2Seq`. - - Parameters - ---------- - net_encode_in : :class:`Layer` - Encode sequences, [batch_size, None, n_features]. - net_decode_in : :class:`Layer` - Decode sequences, [batch_size, None, n_features]. - cell_fn : TensorFlow cell function - A TensorFlow core RNN cell - - see `RNN Cells in TensorFlow `__ - - Note TF1.0+ and TF1.0- are different - cell_init_args : dictionary or None - The arguments for the cell initializer. - n_hidden : int - The number of hidden units in the layer. - initializer : initializer - The initializer for the parameters. - encode_sequence_length : tensor - For encoder sequence length, see :class:`DynamicRNNLayer` . - decode_sequence_length : tensor - For decoder sequence length, see :class:`DynamicRNNLayer` . - initial_state_encode : None or RNN state - If None, `initial_state_encode` is zero state, it can be set by placeholder or other RNN. - initial_state_decode : None or RNN state - If None, `initial_state_decode` is the final state of the RNN encoder, it can be set by placeholder or other RNN. - dropout : tuple of float or int - The input and output keep probability (input_keep_prob, output_keep_prob). - - If one int, input and output keep probability are the same. - n_layer : int - The number of RNN layers, default is 1. - return_seq_2d : boolean - Only consider this argument when `return_last` is `False` - - If True, return 2D Tensor [n_example, 2 * n_hidden], for stacking DenseLayer after it. - - If False, return 3D Tensor [n_example/n_steps, n_steps, 2 * n_hidden], for stacking multiple RNN after it. - name : str - A unique layer name. - - Attributes - ------------ - outputs : tensor - The output of RNN decoder. - initial_state_encode : tensor or StateTuple - Initial state of RNN encoder. - initial_state_decode : tensor or StateTuple - Initial state of RNN decoder. - final_state_encode : tensor or StateTuple - Final state of RNN encoder. - final_state_decode : tensor or StateTuple - Final state of RNN decoder. - - Notes - -------- - - How to feed data: `Sequence to Sequence Learning with Neural Networks `__ - - input_seqs : ``['how', 'are', 'you', '']`` - - decode_seqs : ``['', 'I', 'am', 'fine', '']`` - - target_seqs : ``['I', 'am', 'fine', '', '']`` - - target_mask : ``[1, 1, 1, 1, 0]`` - - related functions : tl.prepro - - Examples - ---------- - >>> from tensorlayer.layers import * - >>> batch_size = 32 - >>> encode_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="encode_seqs") - >>> decode_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="decode_seqs") - >>> target_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="target_seqs") - >>> target_mask = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="target_mask") # tl.prepro.sequences_get_mask() - >>> with tf.variable_scope("model"): - ... # for chatbot, you can use the same embedding layer, - ... # for translation, you may want to use 2 seperated embedding layers - >>> with tf.variable_scope("embedding") as vs: - >>> net_encode = EmbeddingInputlayer( - ... inputs = encode_seqs, - ... vocabulary_size = 10000, - ... embedding_size = 200, - ... name = 'seq_embedding') - >>> vs.reuse_variables() - >>> tl.layers.set_name_reuse(True) - >>> net_decode = EmbeddingInputlayer( - ... inputs = decode_seqs, - ... vocabulary_size = 10000, - ... embedding_size = 200, - ... name = 'seq_embedding') - >>> net = Seq2Seq(net_encode, net_decode, - ... cell_fn = tf.contrib.rnn.BasicLSTMCell, - ... n_hidden = 200, - ... initializer = tf.random_uniform_initializer(-0.1, 0.1), - ... encode_sequence_length = retrieve_seq_length_op2(encode_seqs), - ... decode_sequence_length = retrieve_seq_length_op2(decode_seqs), - ... initial_state_encode = None, - ... dropout = None, - ... n_layer = 1, - ... return_seq_2d = True, - ... name = 'seq2seq') - >>> net_out = DenseLayer(net, n_units=10000, act=tf.identity, name='output') - >>> e_loss = tl.cost.cross_entropy_seq_with_mask(logits=net_out.outputs, target_seqs=target_seqs, input_mask=target_mask, return_details=False, name='cost') - >>> y = tf.nn.softmax(net_out.outputs) - >>> net_out.print_params(False) - - """ - - def __init__( - self, - net_encode_in, - net_decode_in, - cell_fn, #tf.nn.rnn_cell.LSTMCell, - cell_init_args=None, - n_hidden=256, - initializer=tf.random_uniform_initializer(-0.1, 0.1), - encode_sequence_length=None, - decode_sequence_length=None, - initial_state_encode=None, - initial_state_decode=None, - dropout=None, - n_layer=1, - return_seq_2d=False, - name='seq2seq', - ): - if cell_init_args is None: - cell_init_args = {'state_is_tuple': True} - - Layer.__init__(self, name=name) - if cell_fn is None: - raise Exception("Please put in cell_fn") - if 'GRU' in cell_fn.__name__: - try: - cell_init_args.pop('state_is_tuple') - except Exception: - logging.warning("pop state_is_tuple fails.") - # self.inputs = layer.outputs - logging.info("[*] Seq2Seq %s: n_hidden:%d cell_fn:%s dropout:%s n_layer:%d" % (self.name, n_hidden, cell_fn.__name__, dropout, n_layer)) - - with tf.variable_scope(name): - # tl.layers.set_name_reuse(reuse) - # network = InputLayer(self.inputs, name=name+'/input') - network_encode = DynamicRNNLayer( - net_encode_in, - cell_fn=cell_fn, - cell_init_args=cell_init_args, - n_hidden=n_hidden, - initializer=initializer, - initial_state=initial_state_encode, - dropout=dropout, - n_layer=n_layer, - sequence_length=encode_sequence_length, - return_last=False, - return_seq_2d=True, - name='encode') - # vs.reuse_variables() - # tl.layers.set_name_reuse(True) - network_decode = DynamicRNNLayer( - net_decode_in, - cell_fn=cell_fn, - cell_init_args=cell_init_args, - n_hidden=n_hidden, - initializer=initializer, - initial_state=(network_encode.final_state if initial_state_decode is None else initial_state_decode), - dropout=dropout, - n_layer=n_layer, - sequence_length=decode_sequence_length, - return_last=False, - return_seq_2d=return_seq_2d, - name='decode') - self.outputs = network_decode.outputs - - # rnn_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) - - # Initial state - self.initial_state_encode = network_encode.initial_state - self.initial_state_decode = network_decode.initial_state - - # Final state - self.final_state_encode = network_encode.final_state - self.final_state_decode = network_decode.final_state - - # self.sequence_length = sequence_length - self.all_layers = list(network_encode.all_layers) - self.all_params = list(network_encode.all_params) - self.all_drop = dict(network_encode.all_drop) - - self.all_layers.extend(list(network_decode.all_layers)) - self.all_params.extend(list(network_decode.all_params)) - self.all_drop.update(dict(network_decode.all_drop)) - - self.all_layers.append(self.outputs) - # self.all_params.extend( rnn_variables ) - - self.all_layers = list_remove_repeat(self.all_layers) - self.all_params = list_remove_repeat(self.all_params) diff --git a/tensorlayer/layers/shape.py b/tensorlayer/layers/shape.py deleted file mode 100644 index 3e4644d..0000000 --- a/tensorlayer/layers/shape.py +++ /dev/null @@ -1,130 +0,0 @@ -# -*- coding: utf-8 -*- - -from .core import * -from .. import _logging as logging -import tensorflow as tf - -__all__ = [ - 'FlattenLayer', - 'ReshapeLayer', - 'TransposeLayer', -] - - -class FlattenLayer(Layer): - """A layer that reshapes high-dimension input into a vector. - - Then we often apply DenseLayer, RNNLayer, ConcatLayer and etc on the top of a flatten layer. - [batch_size, mask_row, mask_col, n_mask] ---> [batch_size, mask_row * mask_col * n_mask] - - Parameters - ---------- - layer : :class:`Layer` - Previous layer. - name : str - A unique layer name. - - Examples - -------- - >>> x = tf.placeholder(tf.float32, shape=[None, 28, 28, 1]) - >>> net = tl.layers.InputLayer(x, name='input') - >>> net = tl.layers.FlattenLayer(net, name='flatten') - ... [?, 784] - - """ - - def __init__( - self, - prev_layer, - name='flatten_layer', - ): - Layer.__init__(self, prev_layer=prev_layer, name=name) - self.inputs = prev_layer.outputs - self.outputs = flatten_reshape(self.inputs, name=name) - self.n_units = int(self.outputs.get_shape()[-1]) - logging.info("FlattenLayer %s: %d" % (self.name, self.n_units)) - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - self.all_layers.append(self.outputs) - - -class ReshapeLayer(Layer): - """A layer that reshapes a given tensor. - - Parameters - ---------- - layer : :class:`Layer` - Previous layer - shape : tuple of int - The output shape, see ``tf.reshape``. - name : str - A unique layer name. - - Examples - -------- - >>> x = tf.placeholder(tf.float32, shape=(None, 784)) - >>> net = tl.layers.InputLayer(x, name='input') - >>> net = tl.layers.ReshapeLayer(net, [-1, 28, 28, 1], name='reshape') - >>> print(net.outputs) - ... (?, 28, 28, 1) - - """ - - def __init__( - self, - prev_layer, - shape, - name='reshape_layer', - ): - Layer.__init__(self, prev_layer=prev_layer, name=name) - self.inputs = prev_layer.outputs - self.outputs = tf.reshape(self.inputs, shape=shape, name=name) - logging.info("ReshapeLayer %s: %s" % (self.name, self.outputs.get_shape())) - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - self.all_layers.append(self.outputs) - - -class TransposeLayer(Layer): - """A layer that transposes the dimension of a tensor. - - See `tf.transpose() `__ . - - Parameters - ---------- - layer : :class:`Layer` - Previous layer - perm: list of int - The permutation of the dimensions, similar with ``numpy.transpose``. - name : str - A unique layer name. - - Examples - ---------- - >>> x = tf.placeholder(tf.float32, shape=[None, 28, 28, 1]) - >>> net = tl.layers.InputLayer(x, name='input') - >>> net = tl.layers.TransposeLayer(net, perm=[0, 1, 3, 2], name='trans') - ... [None, 28, 1, 28] - - """ - - def __init__( - self, - prev_layer, - perm, - name='transpose', - ): - Layer.__init__(self, prev_layer=prev_layer, name=name) - self.inputs = prev_layer.outputs - assert perm is not None - - logging.info("TransposeLayer %s: perm:%s" % (self.name, perm)) - # with tf.variable_scope(name) as vs: - self.outputs = tf.transpose(self.inputs, perm=perm, name=name) - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - self.all_layers.append(self.outputs) - # self.all_params.extend( variables ) diff --git a/tensorlayer/layers/spatial_transformer.py b/tensorlayer/layers/spatial_transformer.py deleted file mode 100644 index bc24590..0000000 --- a/tensorlayer/layers/spatial_transformer.py +++ /dev/null @@ -1,288 +0,0 @@ -# -*- coding: utf-8 -*- - -from six.moves import xrange -from .core import * -from .. import _logging as logging -import tensorflow as tf -import numpy as np - -__all__ = [ - 'transformer', - 'batch_transformer', - 'SpatialTransformer2dAffineLayer', -] - - -def transformer(U, theta, out_size, name='SpatialTransformer2dAffine'): - """Spatial Transformer Layer for `2D Affine Transformation `__ - , see :class:`SpatialTransformer2dAffineLayer` class. - - Parameters - ---------- - U : list of float - The output of a convolutional net should have the - shape [num_batch, height, width, num_channels]. - theta: float - The output of the localisation network should be [num_batch, 6], value range should be [0, 1] (via tanh). - out_size: tuple of int - The size of the output of the network (height, width) - name: str - Optional function name - - Returns - ------- - Tensor - The transformed tensor. - - References - ---------- - - `Spatial Transformer Networks `__ - - `TensorFlow/Models `__ - - Notes - ----- - To initialize the network to the identity transform init. - - >>> ``theta`` to - >>> identity = np.array([[1., 0., 0.], - ... [0., 1., 0.]]) - >>> identity = identity.flatten() - >>> theta = tf.Variable(initial_value=identity) - - """ - - def _repeat(x, n_repeats): - with tf.variable_scope('_repeat'): - rep = tf.transpose(tf.expand_dims(tf.ones(shape=tf.stack([ - n_repeats, - ])), 1), [1, 0]) - rep = tf.cast(rep, 'int32') - x = tf.matmul(tf.reshape(x, (-1, 1)), rep) - return tf.reshape(x, [-1]) - - def _interpolate(im, x, y, out_size): - with tf.variable_scope('_interpolate'): - # constants - num_batch = tf.shape(im)[0] - height = tf.shape(im)[1] - width = tf.shape(im)[2] - channels = tf.shape(im)[3] - - x = tf.cast(x, 'float32') - y = tf.cast(y, 'float32') - height_f = tf.cast(height, 'float32') - width_f = tf.cast(width, 'float32') - out_height = out_size[0] - out_width = out_size[1] - zero = tf.zeros([], dtype='int32') - max_y = tf.cast(tf.shape(im)[1] - 1, 'int32') - max_x = tf.cast(tf.shape(im)[2] - 1, 'int32') - - # scale indices from [-1, 1] to [0, width/height] - x = (x + 1.0) * (width_f) / 2.0 - y = (y + 1.0) * (height_f) / 2.0 - - # do sampling - x0 = tf.cast(tf.floor(x), 'int32') - x1 = x0 + 1 - y0 = tf.cast(tf.floor(y), 'int32') - y1 = y0 + 1 - - x0 = tf.clip_by_value(x0, zero, max_x) - x1 = tf.clip_by_value(x1, zero, max_x) - y0 = tf.clip_by_value(y0, zero, max_y) - y1 = tf.clip_by_value(y1, zero, max_y) - dim2 = width - dim1 = width * height - base = _repeat(tf.range(num_batch) * dim1, out_height * out_width) - base_y0 = base + y0 * dim2 - base_y1 = base + y1 * dim2 - idx_a = base_y0 + x0 - idx_b = base_y1 + x0 - idx_c = base_y0 + x1 - idx_d = base_y1 + x1 - - # use indices to lookup pixels in the flat image and restore - # channels dim - im_flat = tf.reshape(im, tf.stack([-1, channels])) - im_flat = tf.cast(im_flat, 'float32') - Ia = tf.gather(im_flat, idx_a) - Ib = tf.gather(im_flat, idx_b) - Ic = tf.gather(im_flat, idx_c) - Id = tf.gather(im_flat, idx_d) - - # and finally calculate interpolated values - x0_f = tf.cast(x0, 'float32') - x1_f = tf.cast(x1, 'float32') - y0_f = tf.cast(y0, 'float32') - y1_f = tf.cast(y1, 'float32') - wa = tf.expand_dims(((x1_f - x) * (y1_f - y)), 1) - wb = tf.expand_dims(((x1_f - x) * (y - y0_f)), 1) - wc = tf.expand_dims(((x - x0_f) * (y1_f - y)), 1) - wd = tf.expand_dims(((x - x0_f) * (y - y0_f)), 1) - output = tf.add_n([wa * Ia, wb * Ib, wc * Ic, wd * Id]) - return output - - def _meshgrid(height, width): - with tf.variable_scope('_meshgrid'): - # This should be equivalent to: - # x_t, y_t = np.meshgrid(np.linspace(-1, 1, width), - # np.linspace(-1, 1, height)) - # ones = np.ones(np.prod(x_t.shape)) - # grid = np.vstack([x_t.flatten(), y_t.flatten(), ones]) - x_t = tf.matmul(tf.ones(shape=tf.stack([height, 1])), tf.transpose(tf.expand_dims(tf.linspace(-1.0, 1.0, width), 1), [1, 0])) - y_t = tf.matmul(tf.expand_dims(tf.linspace(-1.0, 1.0, height), 1), tf.ones(shape=tf.stack([1, width]))) - - x_t_flat = tf.reshape(x_t, (1, -1)) - y_t_flat = tf.reshape(y_t, (1, -1)) - - ones = tf.ones_like(x_t_flat) - grid = tf.concat(axis=0, values=[x_t_flat, y_t_flat, ones]) - return grid - - def _transform(theta, input_dim, out_size): - with tf.variable_scope('_transform'): - num_batch = tf.shape(input_dim)[0] - num_channels = tf.shape(input_dim)[3] - theta = tf.reshape(theta, (-1, 2, 3)) - theta = tf.cast(theta, 'float32') - - # grid of (x_t, y_t, 1), eq (1) in ref [1] - out_height = out_size[0] - out_width = out_size[1] - grid = _meshgrid(out_height, out_width) - grid = tf.expand_dims(grid, 0) - grid = tf.reshape(grid, [-1]) - grid = tf.tile(grid, tf.stack([num_batch])) - grid = tf.reshape(grid, tf.stack([num_batch, 3, -1])) - - # Transform A x (x_t, y_t, 1)^T -> (x_s, y_s) - T_g = tf.matmul(theta, grid) - x_s = tf.slice(T_g, [0, 0, 0], [-1, 1, -1]) - y_s = tf.slice(T_g, [0, 1, 0], [-1, 1, -1]) - x_s_flat = tf.reshape(x_s, [-1]) - y_s_flat = tf.reshape(y_s, [-1]) - - input_transformed = _interpolate(input_dim, x_s_flat, y_s_flat, out_size) - - output = tf.reshape(input_transformed, tf.stack([num_batch, out_height, out_width, num_channels])) - return output - - with tf.variable_scope(name): - output = _transform(theta, U, out_size) - return output - - -def batch_transformer(U, thetas, out_size, name='BatchSpatialTransformer2dAffine'): - """Batch Spatial Transformer function for `2D Affine Transformation `__. - - Parameters - ---------- - U : list of float - tensor of inputs [batch, height, width, num_channels] - thetas : list of float - a set of transformations for each input [batch, num_transforms, 6] - out_size : list of int - the size of the output [out_height, out_width] - name : str - optional function name - - Returns - ------ - float - Tensor of size [batch * num_transforms, out_height, out_width, num_channels] - - """ - with tf.variable_scope(name): - num_batch, num_transforms = map(int, thetas.get_shape().as_list()[:2]) - indices = [[i] * num_transforms for i in xrange(num_batch)] - input_repeated = tf.gather(U, tf.reshape(indices, [-1])) - return transformer(input_repeated, thetas, out_size) - - -class SpatialTransformer2dAffineLayer(Layer): - """The :class:`SpatialTransformer2dAffineLayer` class is a 2D `Spatial Transformer Layer `__ for - `2D Affine Transformation `__. - - Parameters - ----------- - layer : :class:`Layer` - Previous layer. - theta_layer : :class:`Layer` - The localisation network. - - We will use a :class:`DenseLayer` to make the theta size to [batch, 6], value range to [0, 1] (via tanh). - out_size : tuple of int or None - The size of the output of the network (height, width), the feature maps will be resized by this. - name : str - A unique layer name. - - References - ----------- - - `Spatial Transformer Networks `__ - - `TensorFlow/Models `__ - - """ - - def __init__( - self, - prev_layer=None, - theta_layer=None, - out_size=None, - name='sapatial_trans_2d_affine', - ): - if out_size is None: - out_size = [40, 40] - - Layer.__init__(self, prev_layer=[prev_layer, theta_layer], name=name) - self.inputs = prev_layer.outputs - self.theta_layer = theta_layer - logging.info("SpatialTransformer2dAffineLayer %s: in_size:%s out_size:%s" % (name, self.inputs.get_shape().as_list(), out_size)) - - with tf.variable_scope(name) as vs: - # 1. make the localisation network to [batch, 6] via Flatten and Dense. - if self.theta_layer.outputs.get_shape().ndims > 2: - self.theta_layer.outputs = flatten_reshape(self.theta_layer.outputs, 'flatten') - # 2. To initialize the network to the identity transform init. - # 2.1 W - n_in = int(self.theta_layer.outputs.get_shape()[-1]) - shape = (n_in, 6) - W = tf.get_variable(name='W', initializer=tf.zeros(shape), dtype=LayersConfig.tf_dtype) - # 2.2 b - identity = tf.constant(np.array([[1., 0, 0], [0, 1., 0]]).astype('float32').flatten()) - b = tf.get_variable(name='b', initializer=identity, dtype=LayersConfig.tf_dtype) - # 2.3 transformation matrix - self.theta = tf.nn.tanh(tf.matmul(self.theta_layer.outputs, W) + b) - # 3. Spatial Transformer Sampling - # 3.1 transformation - self.outputs = transformer(self.inputs, self.theta, out_size=out_size) - # 3.2 automatically set batch_size and channels - # e.g. [?, 40, 40, ?] --> [64, 40, 40, 1] or [64, 20, 20, 4]/ Hao Dong - # - fixed_batch_size = self.inputs.get_shape().with_rank_at_least(1)[0] - if fixed_batch_size.value: - batch_size = fixed_batch_size.value - else: - from tensorflow.python.ops import array_ops - batch_size = array_ops.shape(self.inputs)[0] - - n_channels = self.inputs.get_shape().as_list()[-1] - # logging.info(self.outputs) - self.outputs = tf.reshape(self.outputs, shape=[batch_size, out_size[0], out_size[1], n_channels]) - # logging.info(self.outputs) - # exit() - # 4. Get all parameters - variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) - - # # fixed - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - # - # # theta_layer - # self.all_layers.extend(theta_layer.all_layers) - # self.all_params.extend(theta_layer.all_params) - # self.all_drop.update(theta_layer.all_drop) - - # this layer - self.all_layers.append(self.outputs) - self.all_params.extend(variables) diff --git a/tensorlayer/layers/special_activation.py b/tensorlayer/layers/special_activation.py deleted file mode 100644 index ec01434..0000000 --- a/tensorlayer/layers/special_activation.py +++ /dev/null @@ -1,67 +0,0 @@ -# -*- coding: utf-8 -*- - -from .core import * -from .. import _logging as logging -import tensorflow as tf - -__all__ = [ - 'PReluLayer', -] - - -class PReluLayer(Layer): - """ - The :class:`PReluLayer` class is Parametric Rectified Linear layer. - - Parameters - ---------- - layer : :class:`Layer` - Previous layer。 - channel_shared : boolean - If True, single weight is shared by all channels. - a_init : initializer - The initializer for initializing the alpha(s). - a_init_args : dictionary - The arguments for initializing the alpha(s). - name : str - A unique layer name. - - References - ----------- - - `Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification `__ - - """ - - def __init__( - self, - prev_layer, - channel_shared=False, - a_init=tf.constant_initializer(value=0.0), - a_init_args=None, - # restore = True, - name="prelu_layer"): - if a_init_args is None: - a_init_args = {} - - Layer.__init__(self, prev_layer=prev_layer, name=name) - self.inputs = prev_layer.outputs - logging.info("PReluLayer %s: channel_shared:%s" % (self.name, channel_shared)) - if channel_shared: - w_shape = (1, ) - else: - w_shape = int(self.inputs.get_shape()[-1]) - - # with tf.name_scope(name) as scope: - with tf.variable_scope(name): - alphas = tf.get_variable(name='alphas', shape=w_shape, initializer=a_init, dtype=LayersConfig.tf_dtype, **a_init_args) - try: # TF 1.0 - self.outputs = tf.nn.relu(self.inputs) + tf.multiply(alphas, (self.inputs - tf.abs(self.inputs))) * 0.5 - except Exception: # TF 0.12 - self.outputs = tf.nn.relu(self.inputs) + tf.mul(alphas, (self.inputs - tf.abs(self.inputs))) * 0.5 - - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - - self.all_layers.append(self.outputs) - self.all_params.extend([alphas]) diff --git a/tensorlayer/layers/stack.py b/tensorlayer/layers/stack.py deleted file mode 100644 index 693293d..0000000 --- a/tensorlayer/layers/stack.py +++ /dev/null @@ -1,116 +0,0 @@ -# -*- coding: utf-8 -*- - -from .core import * -from .. import _logging as logging -import tensorflow as tf - -__all__ = [ - 'StackLayer', - 'UnStackLayer', -] - - -class StackLayer(Layer): - """ - The :class:`StackLayer` class is layer for stacking a list of rank-R tensors into one rank-(R+1) tensor, see `tf.stack() `__. - - Parameters - ---------- - layers : list of :class:`Layer` - Previous layers to stack. - axis : int - Dimension along which to concatenate. - name : str - A unique layer name. - - Examples - --------- - >>> x = tf.placeholder(tf.float32, shape=[None, 30]) - >>> net = tl.layers.InputLayer(x, name='input') - >>> net1 = tl.layers.DenseLayer(net, 10, name='dense1') - >>> net2 = tl.layers.DenseLayer(net, 10, name='dense2') - >>> net3 = tl.layers.DenseLayer(net, 10, name='dense3') - >>> net = tl.layers.StackLayer([net1, net2, net3], axis=1, name='stack') - ... (?, 3, 10) - - """ - - def __init__( - self, - layers, - axis=1, - name='stack', - ): - Layer.__init__(self, prev_layer=layers, name=name) - self.inputs = [] - for l in layers: - self.inputs.append(l.outputs) - - self.outputs = tf.stack(self.inputs, axis=axis, name=name) - - logging.info("StackLayer %s: axis: %d" % (self.name, axis)) - - # self.all_layers = list(layers[0].all_layers) - # self.all_params = list(layers[0].all_params) - # self.all_drop = dict(layers[0].all_drop) - # - # for i in range(1, len(layers)): - # self.all_layers.extend(list(layers[i].all_layers)) - # self.all_params.extend(list(layers[i].all_params)) - # self.all_drop.update(dict(layers[i].all_drop)) - # - # self.all_layers = list_remove_repeat(self.all_layers) - # self.all_params = list_remove_repeat(self.all_params) - - self.all_layers.append(self.outputs) - - -def unstack_layer(layer, num=None, axis=0, name='unstack'): - """ - It is layer for unstacking the given dimension of a rank-R tensor into rank-(R-1) tensors., see `tf.unstack() `__. - - Parameters - ---------- - layer : :class:`Layer` - Previous layer - num : int or None - The length of the dimension axis. Automatically inferred if None (the default). - axis : int - Dimension along which axis to concatenate. - name : str - A unique layer name. - - Returns - ------- - list of :class:`Layer` - The list of layer objects unstacked from the input. - - """ - inputs = layer.outputs - with tf.variable_scope(name): - outputs = tf.unstack(inputs, num=num, axis=axis) - - logging.info("UnStackLayer %s: num: %s axis: %d, n_outputs: %d" % (name, num, axis, len(outputs))) - - net_new = [] - scope_name = tf.get_variable_scope().name - if scope_name: - full_name = scope_name + '/' + name - else: - full_name = name - - for i, _v in enumerate(outputs): - n = Layer(prev_layer=layer, name=full_name + str(i)) - n.outputs = outputs[i] - # n.all_layers = list(layer.all_layers) - # n.all_params = list(layer.all_params) - # n.all_drop = dict(layer.all_drop) - # n.all_layers.append(inputs) - - net_new.append(n) - - return net_new - - -# Alias -UnStackLayer = unstack_layer diff --git a/tensorlayer/layers/super_resolution.py b/tensorlayer/layers/super_resolution.py deleted file mode 100644 index bc65a1b..0000000 --- a/tensorlayer/layers/super_resolution.py +++ /dev/null @@ -1,171 +0,0 @@ -# -*- coding: utf-8 -*- - -from .core import * -from .. import _logging as logging -import tensorflow as tf - -__all__ = [ - 'SubpixelConv1d', - 'SubpixelConv2d', -] - - -def subpixel_conv2d(net, scale=2, n_out_channel=None, act=tf.identity, name='subpixel_conv2d'): - """It is a 2D sub-pixel up-sampling layer, usually be used - for Super-Resolution applications, see `SRGAN `__ for example. - - Parameters - ------------ - net : :class:`Layer` - Previous layer, - scale : int - The up-scaling ratio, a wrong setting will lead to dimension size error. - n_out_channel : int or None - The number of output channels. - - If None, automatically set n_out_channel == the number of input channels / (scale x scale). - - The number of input channels == (scale x scale) x The number of output channels. - act : activation function - The activation function of this layer. - name : str - A unique layer name. - - Returns - ------- - :class:`Layer` - A 2D sub-pixel up-sampling layer - - Examples - --------- - >>> # examples here just want to tell you how to set the n_out_channel. - >>> x = np.random.rand(2, 16, 16, 4) - >>> X = tf.placeholder("float32", shape=(2, 16, 16, 4), name="X") - >>> net = InputLayer(X, name='input') - >>> net = SubpixelConv2d(net, scale=2, n_out_channel=1, name='subpixel_conv2d') - >>> y = sess.run(net.outputs, feed_dict={X: x}) - >>> print(x.shape, y.shape) - ... (2, 16, 16, 4) (2, 32, 32, 1) - >>> - >>> x = np.random.rand(2, 16, 16, 4*10) - >>> X = tf.placeholder("float32", shape=(2, 16, 16, 4*10), name="X") - >>> net = InputLayer(X, name='input2') - >>> net = SubpixelConv2d(net, scale=2, n_out_channel=10, name='subpixel_conv2d2') - >>> y = sess.run(net.outputs, feed_dict={X: x}) - >>> print(x.shape, y.shape) - ... (2, 16, 16, 40) (2, 32, 32, 10) - >>> - >>> x = np.random.rand(2, 16, 16, 25*10) - >>> X = tf.placeholder("float32", shape=(2, 16, 16, 25*10), name="X") - >>> net = InputLayer(X, name='input3') - >>> net = SubpixelConv2d(net, scale=5, n_out_channel=None, name='subpixel_conv2d3') - >>> y = sess.run(net.outputs, feed_dict={X: x}) - >>> print(x.shape, y.shape) - ... (2, 16, 16, 250) (2, 80, 80, 10) - - References - ------------ - - `Real-Time Single Image and Video Super-Resolution Using an Efficient Sub-Pixel Convolutional Neural Network `__ - - """ - # github/Tetrachrome/subpixel https://github.com/Tetrachrome/subpixel/blob/master/subpixel.py - - _err_log = "SubpixelConv2d: The number of input channels == (scale x scale) x The number of output channels" - - # scope_name = tf.get_variable_scope().name - # if scope_name: - # whole_name = scope_name + '/' + name - # else: - # whole_name = name - - def _PS(X, r, n_out_channels): - if n_out_channels >= 1: - assert int(X.get_shape()[-1]) == (r**2) * n_out_channels, _err_log - - # bsize, a, b, c = X.get_shape().as_list() - # bsize = tf.shape(X)[0] # Handling Dimension(None) type for undefined batch dim - # Xs=tf.split(X,r,3) #b*h*w*r*r - # Xr=tf.concat(Xs,2) #b*h*(r*w)*r - # X=tf.reshape(Xr,(bsize,r*a,r*b,n_out_channel)) # b*(r*h)*(r*w)*c - - X = tf.depth_to_space(X, r) - else: - logging.info(_err_log) - return X - - inputs = net.outputs - if n_out_channel is None: - assert int(inputs.get_shape()[-1]) / (scale**2) % 1 == 0, _err_log - n_out_channel = int(int(inputs.get_shape()[-1]) / (scale**2)) - - logging.info("SubpixelConv2d %s: scale: %d n_out_channel: %s act: %s" % (name, scale, n_out_channel, act.__name__)) - - net_new = Layer(prev_layer=net, name=name) #whole_name) - # with tf.name_scope(name): - with tf.variable_scope(name): - net_new.outputs = act(_PS(inputs, r=scale, n_out_channels=n_out_channel)) - - # net_new.all_layers = list(net.all_layers) - # net_new.all_params = list(net.all_params) - # net_new.all_drop = dict(net.all_drop) - net_new.all_layers.append(net_new.outputs) - return net_new - - -def subpixel_conv1d(net, scale=2, act=tf.identity, name='subpixel_conv1d'): - """It is a 1D sub-pixel up-sampling layer. - - Calls a TensorFlow function that directly implements this functionality. - We assume input has dim (batch, width, r) - - Parameters - ------------ - net : :class:`Layer` - Previous layer with output shape of (batch, width, r). - scale : int - The up-scaling ratio, a wrong setting will lead to Dimension size error. - act : activation function - The activation function of this layer. - name : str - A unique layer name. - - Returns - ------- - :class:`Layer` - A 1D sub-pixel up-sampling layer - - Examples - ---------- - >>> t_signal = tf.placeholder('float32', [10, 100, 4], name='x') - >>> n = InputLayer(t_signal, name='in') - >>> n = SubpixelConv1d(n, scale=2, name='s') - >>> print(n.outputs.shape) - ... (10, 200, 2) - - References - ----------- - `Audio Super Resolution Implementation `__. - - """ - - def _PS(I, r): - X = tf.transpose(I, [2, 1, 0]) # (r, w, b) - X = tf.batch_to_space_nd(X, [r], [[0, 0]]) # (1, r*w, b) - X = tf.transpose(X, [2, 1, 0]) - return X - - logging.info("SubpixelConv1d %s: scale: %d act: %s" % (name, scale, act.__name__)) - - inputs = net.outputs - net_new = Layer(prev_layer=net, name=name) - with tf.name_scope(name): - net_new.outputs = act(_PS(inputs, r=scale)) - - # net_new.all_layers = list(net.all_layers) - # net_new.all_params = list(net.all_params) - # net_new.all_drop = dict(net.all_drop) - net_new.all_layers.append(net_new.outputs) - return net_new - - -# Alias -SubpixelConv2d = subpixel_conv2d -SubpixelConv1d = subpixel_conv1d diff --git a/tensorlayer/layers/time_distribution.py b/tensorlayer/layers/time_distribution.py deleted file mode 100644 index cbfbdc4..0000000 --- a/tensorlayer/layers/time_distribution.py +++ /dev/null @@ -1,85 +0,0 @@ -# -*- coding: utf-8 -*- - -from .core import * -from .. import _logging as logging -import tensorflow as tf - -__all__ = [ - 'TimeDistributedLayer', -] - - -class TimeDistributedLayer(Layer): - """ - The :class:`TimeDistributedLayer` class that applies a function to every timestep of the input tensor. - For example, if use :class:`DenseLayer` as the `layer_class`, we input (batch_size, length, dim) and - output (batch_size , length, new_dim). - - Parameters - ---------- - layer : :class:`Layer` - Previous layer with output size of (batch_size, length, dim). - layer_class : a :class:`Layer` class - The layer class name. - args : dictionary - The arguments for the ``layer_class``. - name : str - A unique layer name. - - Examples - -------- - >>> batch_size = 32 - >>> timestep = 20 - >>> input_dim = 100 - >>> x = tf.placeholder(dtype=tf.float32, shape=[batch_size, timestep, input_dim], name="encode_seqs") - >>> net = InputLayer(x, name='input') - >>> net = TimeDistributedLayer(net, layer_class=DenseLayer, args={'n_units':50, 'name':'dense'}, name='time_dense') - ... [TL] InputLayer input: (32, 20, 100) - ... [TL] TimeDistributedLayer time_dense: layer_class:DenseLayer - >>> print(net.outputs._shape) - ... (32, 20, 50) - >>> net.print_params(False) - ... param 0: (100, 50) time_dense/dense/W:0 - ... param 1: (50,) time_dense/dense/b:0 - ... num of params: 5050 - - """ - - def __init__( - self, - prev_layer, - layer_class=None, - args=None, - name='time_distributed', - ): - if args is None: - args = {} - if not isinstance(args, dict): - raise TypeError("'args' must be a dict.") - - Layer.__init__(self, prev_layer=prev_layer, name=name) - self.inputs = prev_layer.outputs - logging.info("TimeDistributedLayer %s: layer_class:%s args:%s" % (self.name, layer_class.__name__, args)) - - if not isinstance(self.inputs, tf.Tensor): - self.inputs = tf.transpose(tf.stack(self.inputs), [1, 0, 2]) - - input_shape = self.inputs.get_shape() - - timestep = input_shape[1] - x = tf.unstack(self.inputs, axis=1) - - is_name_reuse = tf.get_variable_scope().reuse - for i in range(0, timestep): - with tf.variable_scope(name, reuse=(is_name_reuse if i == 0 else True)) as vs: - net = layer_class(InputLayer(x[i], name=args['name'] + str(i)), **args) - x[i] = net.outputs - variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) - - self.outputs = tf.stack(x, axis=1, name=name) - - # self.all_layers = list(layer.all_layers) - # self.all_params = list(layer.all_params) - # self.all_drop = dict(layer.all_drop) - self.all_layers.append(self.outputs) - self.all_params.extend(variables) diff --git a/tensorlayer/nlp.py b/tensorlayer/nlp.py deleted file mode 100644 index ce0290d..0000000 --- a/tensorlayer/nlp.py +++ /dev/null @@ -1,1112 +0,0 @@ -# -*- coding: utf-8 -*- - -import collections, os, random -import re, subprocess, tempfile, warnings -import numpy as np -import tensorflow as tf -from six.moves import urllib, xrange -from tensorflow.python.platform import gfile -from . import _logging as logging - -__all__ = [ - 'generate_skip_gram_batch', - 'sample', - 'sample_top', - 'SimpleVocabulary', - 'Vocabulary', - 'process_sentence', - 'create_vocab', - 'simple_read_words', - 'read_words', - 'read_analogies_file', - 'build_vocab', - 'build_reverse_dictionary', - 'build_words_dataset', - 'words_to_word_ids', - 'word_ids_to_words', - 'save_vocab', - 'basic_tokenizer', - 'create_vocabulary', - 'initialize_vocabulary', - 'sentence_to_token_ids', - 'data_to_token_ids', - 'moses_multi_bleu', -] - - -def generate_skip_gram_batch(data, batch_size, num_skips, skip_window, data_index=0): - """Generate a training batch for the Skip-Gram model. - - See `Word2Vec example `__. - - Parameters - ---------- - data : list of data - To present context, usually a list of integers. - batch_size : int - Batch size to return. - num_skips : int - How many times to reuse an input to generate a label. - skip_window : int - How many words to consider left and right. - data_index : int - Index of the context location. This code use `data_index` to instead of yield like ``tl.iterate``. - - Returns - ------- - batch : list of data - Inputs. - labels : list of data - Labels - data_index : int - Index of the context location. - - Examples - -------- - Setting num_skips=2, skip_window=1, use the right and left words. - In the same way, num_skips=4, skip_window=2 means use the nearby 4 words. - - >>> data = [1,2,3,4,5,6,7,8,9,10,11] - >>> batch, labels, data_index = tl.nlp.generate_skip_gram_batch(data=data, batch_size=8, num_skips=2, skip_window=1, data_index=0) - >>> print(batch) - ... [2 2 3 3 4 4 5 5] - >>> print(labels) - ... [[3] - ... [1] - ... [4] - ... [2] - ... [5] - ... [3] - ... [4] - ... [6]] - - """ - # global data_index # you can put data_index outside the function, then - # modify the global data_index in the function without return it. - # note: without using yield, this code use data_index to instead. - - if batch_size % num_skips != 0: - raise Exception("batch_size should be able to be divided by num_skips.") - if num_skips > 2 * skip_window: - raise Exception("num_skips <= 2 * skip_window") - batch = np.ndarray(shape=(batch_size), dtype=np.int32) - labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32) - span = 2 * skip_window + 1 # [ skip_window target skip_window ] - buffer = collections.deque(maxlen=span) - for _ in range(span): - buffer.append(data[data_index]) - data_index = (data_index + 1) % len(data) - for i in range(batch_size // num_skips): - target = skip_window # target label at the center of the buffer - targets_to_avoid = [skip_window] - for j in range(num_skips): - while target in targets_to_avoid: - target = random.randint(0, span - 1) - targets_to_avoid.append(target) - batch[i * num_skips + j] = buffer[skip_window] - labels[i * num_skips + j, 0] = buffer[target] - buffer.append(data[data_index]) - data_index = (data_index + 1) % len(data) - return batch, labels, data_index - - -def sample(a=None, temperature=1.0): - """Sample an index from a probability array. - - Parameters - ---------- - a : list of float - List of probabilities. - temperature : float or None - The higher the more uniform. When a = [0.1, 0.2, 0.7], - - temperature = 0.7, the distribution will be sharpen [0.05048273, 0.13588945, 0.81362782] - - temperature = 1.0, the distribution will be the same [0.1, 0.2, 0.7] - - temperature = 1.5, the distribution will be filtered [0.16008435, 0.25411807, 0.58579758] - - If None, it will be ``np.argmax(a)`` - - Notes - ------ - - No matter what is the temperature and input list, the sum of all probabilities will be one. Even if input list = [1, 100, 200], the sum of all probabilities will still be one. - - For large vocabulary size, choice a higher temperature or ``tl.nlp.sample_top`` to avoid error. - - """ - if a is None: - raise Exception("a : list of float") - b = np.copy(a) - try: - if temperature == 1: - return np.argmax(np.random.multinomial(1, a, 1)) - if temperature is None: - return np.argmax(a) - else: - a = np.log(a) / temperature - a = np.exp(a) / np.sum(np.exp(a)) - return np.argmax(np.random.multinomial(1, a, 1)) - except Exception: - # np.set_printoptions(threshold=np.nan) - # logging.info(a) - # logging.info(np.sum(a)) - # logging.info(np.max(a)) - # logging.info(np.min(a)) - # exit() - message = "For large vocabulary_size, choice a higher temperature\ - to avoid log error. Hint : use ``sample_top``. " - - warnings.warn(message, Warning) - # logging.info(a) - # logging.info(b) - return np.argmax(np.random.multinomial(1, b, 1)) - - -def sample_top(a=None, top_k=10): - """Sample from ``top_k`` probabilities. - - Parameters - ---------- - a : list of float - List of probabilities. - top_k : int - Number of candidates to be considered. - - """ - if a is None: - a = [] - - idx = np.argpartition(a, -top_k)[-top_k:] - probs = a[idx] - # logging.info("new %f" % probs) - probs = probs / np.sum(probs) - choice = np.random.choice(idx, p=probs) - return choice - # old implementation - # a = np.array(a) - # idx = np.argsort(a)[::-1] - # idx = idx[:top_k] - # # a = a[idx] - # probs = a[idx] - # logging.info("prev %f" % probs) - # # probs = probs / np.sum(probs) - # # choice = np.random.choice(idx, p=probs) - # # return choice - - -# Vector representations of words (Advanced) UNDOCUMENT -class SimpleVocabulary(object): - """Simple vocabulary wrapper, see create_vocab(). - - Parameters - ------------ - vocab : dictionary - A dictionary that maps word to ID. - unk_id : int - The ID for 'unknown' word. - - """ - - def __init__(self, vocab, unk_id): - """Initialize the vocabulary.""" - self._vocab = vocab - self._unk_id = unk_id - - def word_to_id(self, word): - """Returns the integer id of a word string.""" - if word in self._vocab: - return self._vocab[word] - else: - return self._unk_id - - -class Vocabulary(object): - """Create Vocabulary class from a given vocabulary and its id-word, word-id convert. - See create_vocab() and ``tutorial_tfrecord3.py``. - - Parameters - ----------- - vocab_file : str - The file contains the vocabulary (can be created via ``tl.nlp.create_vocab``), where the words are the first whitespace-separated token on each line (other tokens are ignored) and the word ids are the corresponding line numbers. - start_word : str - Special word denoting sentence start. - end_word : str - Special word denoting sentence end. - unk_word : str - Special word denoting unknown words. - - Attributes - ------------ - vocab : dictionary - A dictionary that maps word to ID. - reverse_vocab : list of int - A list that maps ID to word. - start_id : int - For start ID. - end_id : int - For end ID. - unk_id : int - For unknown ID. - pad_id : int - For Padding ID. - - Examples - ------------- - The vocab file looks like follow, includes `start_word` , `end_word` ... - - >>> a 969108 - >>> 586368 - >>> 586368 - >>> . 440479 - >>> on 213612 - >>> of 202290 - >>> the 196219 - >>> in 182598 - >>> with 152984 - >>> and 139109 - >>> is 97322 - - """ - - def __init__(self, vocab_file, start_word="", end_word="", unk_word="", pad_word=""): - if not tf.gfile.Exists(vocab_file): - tf.logging.fatal("Vocab file %s not found." % vocab_file) - tf.logging.info("Initializing vocabulary from file: %s" % vocab_file) - - with tf.gfile.GFile(vocab_file, mode="r") as f: - reverse_vocab = list(f.readlines()) - reverse_vocab = [line.split()[0] for line in reverse_vocab] - # assert start_word in reverse_vocab - # assert end_word in reverse_vocab - if start_word not in reverse_vocab: # haodong - reverse_vocab.append(start_word) - if end_word not in reverse_vocab: - reverse_vocab.append(end_word) - if unk_word not in reverse_vocab: - reverse_vocab.append(unk_word) - if pad_word not in reverse_vocab: - reverse_vocab.append(pad_word) - - vocab = dict([(x, y) for (y, x) in enumerate(reverse_vocab)]) - - logging.info("Vocabulary from %s : %s %s %s" % (vocab_file, start_word, end_word, unk_word)) - logging.info(" vocabulary with %d words (includes start_word, end_word, unk_word)" % len(vocab)) - # tf.logging.info(" vocabulary with %d words" % len(vocab)) - - self.vocab = vocab # vocab[word] = id - self.reverse_vocab = reverse_vocab # reverse_vocab[id] = word - - # Save special word ids. - self.start_id = vocab[start_word] - self.end_id = vocab[end_word] - self.unk_id = vocab[unk_word] - self.pad_id = vocab[pad_word] - logging.info(" start_id: %d" % self.start_id) - logging.info(" end_id : %d" % self.end_id) - logging.info(" unk_id : %d" % self.unk_id) - logging.info(" pad_id : %d" % self.pad_id) - - def word_to_id(self, word): - """Returns the integer word id of a word string.""" - if word in self.vocab: - return self.vocab[word] - else: - return self.unk_id - - def id_to_word(self, word_id): - """Returns the word string of an integer word id.""" - if word_id >= len(self.reverse_vocab): - return self.reverse_vocab[self.unk_id] - else: - return self.reverse_vocab[word_id] - - -def process_sentence(sentence, start_word="", end_word=""): - """Seperate a sentence string into a list of string words, add start_word and end_word, - see ``create_vocab()`` and ``tutorial_tfrecord3.py``. - - Parameters - ---------- - sentence : str - A sentence. - start_word : str or None - The start word. If None, no start word will be appended. - end_word : str or None - The end word. If None, no end word will be appended. - - Returns - --------- - list of str - A list of strings that separated into words. - - Examples - ----------- - >>> c = "how are you?" - >>> c = tl.nlp.process_sentence(c) - >>> print(c) - ... ['', 'how', 'are', 'you', '?', ''] - - Notes - ------- - - You have to install the following package. - - `Installing NLTK `__ - - `Installing NLTK data `__ - - """ - try: - import nltk - except: - raise Exception("Hint : NLTK is required.") - if start_word is not None: - process_sentence = [start_word] - else: - process_sentence = [] - process_sentence.extend(nltk.tokenize.word_tokenize(sentence.lower())) - if end_word is not None: - process_sentence.append(end_word) - return process_sentence - - -def create_vocab(sentences, word_counts_output_file, min_word_count=1): - """Creates the vocabulary of word to word_id. - - See ``tutorial_tfrecord3.py``. - - The vocabulary is saved to disk in a text file of word counts. The id of each - word in the file is its corresponding 0-based line number. - - Parameters - ------------ - sentences : list of list of str - All sentences for creating the vocabulary. - word_counts_output_file : str - The file name. - min_word_count : int - Minimum number of occurrences for a word. - - Returns - -------- - :class:`SimpleVocabulary` - The simple vocabulary object, see :class:`Vocabulary` for more. - - Examples - -------- - Pre-process sentences - - >>> captions = ["one two , three", "four five five"] - >>> processed_capts = [] - >>> for c in captions: - >>> c = tl.nlp.process_sentence(c, start_word="", end_word="") - >>> processed_capts.append(c) - >>> print(processed_capts) - ...[['', 'one', 'two', ',', 'three', ''], ['', 'four', 'five', 'five', '']] - - Create vocabulary - - >>> tl.nlp.create_vocab(processed_capts, word_counts_output_file='vocab.txt', min_word_count=1) - ... Creating vocabulary. - ... Total words: 8 - ... Words in vocabulary: 8 - ... Wrote vocabulary file: vocab.txt - - Get vocabulary object - - >>> vocab = tl.nlp.Vocabulary('vocab.txt', start_word="", end_word="", unk_word="") - ... INFO:tensorflow:Initializing vocabulary from file: vocab.txt - ... [TL] Vocabulary from vocab.txt : - ... vocabulary with 10 words (includes start_word, end_word, unk_word) - ... start_id: 2 - ... end_id: 3 - ... unk_id: 9 - ... pad_id: 0 - - """ - from collections import Counter - logging.info("Creating vocabulary.") - counter = Counter() - for c in sentences: - counter.update(c) - # logging.info('c',c) - logging.info(" Total words: %d" % len(counter)) - - # Filter uncommon words and sort by descending count. - word_counts = [x for x in counter.items() if x[1] >= min_word_count] - word_counts.sort(key=lambda x: x[1], reverse=True) - word_counts = [("", 0)] + word_counts # 1st id should be reserved for padding - # logging.info(word_counts) - logging.info(" Words in vocabulary: %d" % len(word_counts)) - - # Write out the word counts file. - with tf.gfile.FastGFile(word_counts_output_file, "w") as f: - f.write("\n".join(["%s %d" % (w, c) for w, c in word_counts])) - logging.info(" Wrote vocabulary file: %s" % word_counts_output_file) - - # Create the vocabulary dictionary. - reverse_vocab = [x[0] for x in word_counts] - unk_id = len(reverse_vocab) - vocab_dict = dict([(x, y) for (y, x) in enumerate(reverse_vocab)]) - vocab = SimpleVocabulary(vocab_dict, unk_id) - - return vocab - - -# Vector representations of words -def simple_read_words(filename="nietzsche.txt"): - """Read context from file without any preprocessing. - - Parameters - ---------- - filename : str - A file path (like .txt file) - - Returns - -------- - str - The context in a string. - - """ - with open(filename, "r") as f: - words = f.read() - return words - - -def read_words(filename="nietzsche.txt", replace=None): - """Read list format context from a file. - - For customized read_words method, see ``tutorial_generate_text.py``. - - Parameters - ---------- - filename : str - a file path. - replace : list of str - replace original string by target string. - - Returns - ------- - list of str - The context in a list (split using space). - """ - if replace is None: - replace = ['\n', ''] - - with tf.gfile.GFile(filename, "r") as f: - try: # python 3.4 or older - context_list = f.read().replace(*replace).split() - except Exception: # python 3.5 - f.seek(0) - replace = [x.encode('utf-8') for x in replace] - context_list = f.read().replace(*replace).split() - return context_list - - -def read_analogies_file(eval_file='questions-words.txt', word2id=None): - """Reads through an analogy question file, return its id format. - - Parameters - ---------- - eval_file : str - The file name. - word2id : dictionary - a dictionary that maps word to ID. - - Returns - -------- - numpy.array - A ``[n_examples, 4]`` numpy array containing the analogy question's word IDs. - - Examples - --------- - The file should be in this format - - >>> : capital-common-countries - >>> Athens Greece Baghdad Iraq - >>> Athens Greece Bangkok Thailand - >>> Athens Greece Beijing China - >>> Athens Greece Berlin Germany - >>> Athens Greece Bern Switzerland - >>> Athens Greece Cairo Egypt - >>> Athens Greece Canberra Australia - >>> Athens Greece Hanoi Vietnam - >>> Athens Greece Havana Cuba - - Get the tokenized analogy question data - - >>> words = tl.files.load_matt_mahoney_text8_dataset() - >>> data, count, dictionary, reverse_dictionary = tl.nlp.build_words_dataset(words, vocabulary_size, True) - >>> analogy_questions = tl.nlp.read_analogies_file(eval_file='questions-words.txt', word2id=dictionary) - >>> print(analogy_questions) - ... [[ 3068 1248 7161 1581] - ... [ 3068 1248 28683 5642] - ... [ 3068 1248 3878 486] - ... ..., - ... [ 1216 4309 19982 25506] - ... [ 1216 4309 3194 8650] - ... [ 1216 4309 140 312]] - - """ - if word2id is None: - word2id = {} - - questions = [] - questions_skipped = 0 - with open(eval_file, "rb") as analogy_f: - for line in analogy_f: - if line.startswith(b":"): # Skip comments. - continue - words = line.strip().lower().split(b" ") # lowercase - ids = [word2id.get(w.strip()) for w in words] - if None in ids or len(ids) != 4: - questions_skipped += 1 - else: - questions.append(np.array(ids)) - logging.info("Eval analogy file: %s" % eval_file) - logging.info("Questions: %d", len(questions)) - logging.info("Skipped: %d", questions_skipped) - analogy_questions = np.array(questions, dtype=np.int32) - return analogy_questions - - -def build_vocab(data): - """Build vocabulary. - - Given the context in list format. - Return the vocabulary, which is a dictionary for word to id. - e.g. {'campbell': 2587, 'atlantic': 2247, 'aoun': 6746 .... } - - Parameters - ---------- - data : list of str - The context in list format - - Returns - -------- - dictionary - that maps word to unique ID. e.g. {'campbell': 2587, 'atlantic': 2247, 'aoun': 6746 .... } - - References - --------------- - - `tensorflow.models.rnn.ptb.reader `_ - - Examples - -------- - >>> data_path = os.getcwd() + '/simple-examples/data' - >>> train_path = os.path.join(data_path, "ptb.train.txt") - >>> word_to_id = build_vocab(read_txt_words(train_path)) - - """ - # data = _read_words(filename) - counter = collections.Counter(data) - # logging.info('counter %s' % counter) # dictionary for the occurrence number of each word, e.g. 'banknote': 1, 'photography': 1, 'kia': 1 - count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0])) - # logging.info('count_pairs %s' % count_pairs) # convert dictionary to list of tuple, e.g. ('ssangyong', 1), ('swapo', 1), ('wachter', 1) - words, _ = list(zip(*count_pairs)) - word_to_id = dict(zip(words, range(len(words)))) - # logging.info(words) # list of words - # logging.info(word_to_id) # dictionary for word to id, e.g. 'campbell': 2587, 'atlantic': 2247, 'aoun': 6746 - return word_to_id - - -def build_reverse_dictionary(word_to_id): - """Given a dictionary that maps word to integer id. - Returns a reverse dictionary that maps a id to word. - - Parameters - ---------- - word_to_id : dictionary - that maps word to ID. - - Returns - -------- - dictionary - A dictionary that maps IDs to words. - - """ - reverse_dictionary = dict(zip(word_to_id.values(), word_to_id.keys())) - return reverse_dictionary - - -def build_words_dataset(words=None, vocabulary_size=50000, printable=True, unk_key='UNK'): - """Build the words dictionary and replace rare words with 'UNK' token. - The most common word has the smallest integer id. - - Parameters - ---------- - words : list of str or byte - The context in list format. You may need to do preprocessing on the words, such as lower case, remove marks etc. - vocabulary_size : int - The maximum vocabulary size, limiting the vocabulary size. Then the script replaces rare words with 'UNK' token. - printable : boolean - Whether to print the read vocabulary size of the given words. - unk_key : str - Represent the unknown words. - - Returns - -------- - data : list of int - The context in a list of ID. - count : list of tuple and list - Pair words and IDs. - - count[0] is a list : the number of rare words - - count[1:] are tuples : the number of occurrence of each word - - e.g. [['UNK', 418391], (b'the', 1061396), (b'of', 593677), (b'and', 416629), (b'one', 411764)] - dictionary : dictionary - It is `word_to_id` that maps word to ID. - reverse_dictionary : a dictionary - It is `id_to_word` that maps ID to word. - - Examples - -------- - >>> words = tl.files.load_matt_mahoney_text8_dataset() - >>> vocabulary_size = 50000 - >>> data, count, dictionary, reverse_dictionary = tl.nlp.build_words_dataset(words, vocabulary_size) - - References - ----------------- - - `tensorflow/examples/tutorials/word2vec/word2vec_basic.py `__ - - """ - if words is None: - raise Exception("words : list of str or byte") - - count = [[unk_key, -1]] - count.extend(collections.Counter(words).most_common(vocabulary_size - 1)) - dictionary = dict() - for word, _ in count: - dictionary[word] = len(dictionary) - data = list() - unk_count = 0 - for word in words: - if word in dictionary: - index = dictionary[word] - else: - index = 0 # dictionary['UNK'] - unk_count += 1 - data.append(index) - count[0][1] = unk_count - reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys())) - if printable: - logging.info('Real vocabulary size %d' % len(collections.Counter(words).keys())) - logging.info('Limited vocabulary size {}'.format(vocabulary_size)) - if len(collections.Counter(words).keys()) < vocabulary_size: - raise Exception( - "len(collections.Counter(words).keys()) >= vocabulary_size , the limited vocabulary_size must be less than or equal to the read vocabulary_size") - return data, count, dictionary, reverse_dictionary - - -def words_to_word_ids(data=None, word_to_id=None, unk_key='UNK'): - """Convert a list of string (words) to IDs. - - Parameters - ---------- - data : list of string or byte - The context in list format - word_to_id : a dictionary - that maps word to ID. - unk_key : str - Represent the unknown words. - - Returns - -------- - list of int - A list of IDs to represent the context. - - Examples - -------- - >>> words = tl.files.load_matt_mahoney_text8_dataset() - >>> vocabulary_size = 50000 - >>> data, count, dictionary, reverse_dictionary = tl.nlp.build_words_dataset(words, vocabulary_size, True) - >>> context = [b'hello', b'how', b'are', b'you'] - >>> ids = tl.nlp.words_to_word_ids(words, dictionary) - >>> context = tl.nlp.word_ids_to_words(ids, reverse_dictionary) - >>> print(ids) - ... [6434, 311, 26, 207] - >>> print(context) - ... [b'hello', b'how', b'are', b'you'] - - References - --------------- - - `tensorflow.models.rnn.ptb.reader `__ - - """ - if data is None: - raise Exception("data : list of string or byte") - if word_to_id is None: - raise Exception("word_to_id : a dictionary") - # if isinstance(data[0], six.string_types): - # logging.info(type(data[0])) - # # exit() - # logging.info(data[0]) - # logging.info(word_to_id) - # return [word_to_id[str(word)] for word in data] - # else: - - word_ids = [] - for word in data: - if word_to_id.get(word) is not None: - word_ids.append(word_to_id[word]) - else: - word_ids.append(word_to_id[unk_key]) - return word_ids - # return [word_to_id[word] for word in data] # this one - - # if isinstance(data[0], str): - # # logging.info('is a string object') - # return [word_to_id[word] for word in data] - # else:#if isinstance(s, bytes): - # # logging.info('is a unicode object') - # # logging.info(data[0]) - # return [word_to_id[str(word)] f - - -def word_ids_to_words(data, id_to_word): - """Convert a list of integer to strings (words). - - Parameters - ---------- - data : list of int - The context in list format. - id_to_word : dictionary - a dictionary that maps ID to word. - - Returns - -------- - list of str - A list of string or byte to represent the context. - - Examples - --------- - >>> see ``tl.nlp.words_to_word_ids`` - - """ - return [id_to_word[i] for i in data] - - -def save_vocab(count=None, name='vocab.txt'): - """Save the vocabulary to a file so the model can be reloaded. - - Parameters - ---------- - count : a list of tuple and list - count[0] is a list : the number of rare words, - count[1:] are tuples : the number of occurrence of each word, - e.g. [['UNK', 418391], (b'the', 1061396), (b'of', 593677), (b'and', 416629), (b'one', 411764)] - - Examples - --------- - >>> words = tl.files.load_matt_mahoney_text8_dataset() - >>> vocabulary_size = 50000 - >>> data, count, dictionary, reverse_dictionary = tl.nlp.build_words_dataset(words, vocabulary_size, True) - >>> tl.nlp.save_vocab(count, name='vocab_text8.txt') - >>> vocab_text8.txt - ... UNK 418391 - ... the 1061396 - ... of 593677 - ... and 416629 - ... one 411764 - ... in 372201 - ... a 325873 - ... to 316376 - - """ - if count is None: - count = [] - - pwd = os.getcwd() - vocabulary_size = len(count) - with open(os.path.join(pwd, name), "w") as f: - for i in xrange(vocabulary_size): - f.write("%s %d\n" % (tf.compat.as_text(count[i][0]), count[i][1])) - logging.info("%d vocab saved to %s in %s" % (vocabulary_size, name, pwd)) - - -# Functions for translation - - -def basic_tokenizer(sentence, _WORD_SPLIT=re.compile(b"([.,!?\"':;)(])")): - """Very basic tokenizer: split the sentence into a list of tokens. - - Parameters - ----------- - sentence : tensorflow.python.platform.gfile.GFile Object - _WORD_SPLIT : regular expression for word spliting. - - - Examples - -------- - >>> see create_vocabulary - >>> from tensorflow.python.platform import gfile - >>> train_path = "wmt/giga-fren.release2" - >>> with gfile.GFile(train_path + ".en", mode="rb") as f: - >>> for line in f: - >>> tokens = tl.nlp.basic_tokenizer(line) - >>> logging.info(tokens) - >>> exit() - ... [b'Changing', b'Lives', b'|', b'Changing', b'Society', b'|', b'How', - ... b'It', b'Works', b'|', b'Technology', b'Drives', b'Change', b'Home', - ... b'|', b'Concepts', b'|', b'Teachers', b'|', b'Search', b'|', b'Overview', - ... b'|', b'Credits', b'|', b'HHCC', b'Web', b'|', b'Reference', b'|', - ... b'Feedback', b'Virtual', b'Museum', b'of', b'Canada', b'Home', b'Page'] - - References - ---------- - - Code from ``/tensorflow/models/rnn/translation/data_utils.py`` - - """ - words = [] - sentence = tf.compat.as_bytes(sentence) - for space_separated_fragment in sentence.strip().split(): - words.extend(re.split(_WORD_SPLIT, space_separated_fragment)) - return [w for w in words if w] - - -def create_vocabulary(vocabulary_path, data_path, max_vocabulary_size, tokenizer=None, normalize_digits=True, _DIGIT_RE=re.compile(br"\d"), _START_VOCAB=None): - """Create vocabulary file (if it does not exist yet) from data file. - - Data file is assumed to contain one sentence per line. Each sentence is - tokenized and digits are normalized (if normalize_digits is set). - Vocabulary contains the most-frequent tokens up to max_vocabulary_size. - We write it to vocabulary_path in a one-token-per-line format, so that later - token in the first line gets id=0, second line gets id=1, and so on. - - Parameters - ----------- - vocabulary_path : str - Path where the vocabulary will be created. - data_path : str - Data file that will be used to create vocabulary. - max_vocabulary_size : int - Limit on the size of the created vocabulary. - tokenizer : function - A function to use to tokenize each data sentence. If None, basic_tokenizer will be used. - normalize_digits : boolean - If true, all digits are replaced by `0`. - _DIGIT_RE : regular expression function - Default is ``re.compile(br"\d")``. - _START_VOCAB : list of str - The pad, go, eos and unk token, default is ``[b"_PAD", b"_GO", b"_EOS", b"_UNK"]``. - - References - ---------- - - Code from ``/tensorflow/models/rnn/translation/data_utils.py`` - - """ - if _START_VOCAB is None: - _START_VOCAB = [b"_PAD", b"_GO", b"_EOS", b"_UNK"] - if not gfile.Exists(vocabulary_path): - logging.info("Creating vocabulary %s from data %s" % (vocabulary_path, data_path)) - vocab = {} - with gfile.GFile(data_path, mode="rb") as f: - counter = 0 - for line in f: - counter += 1 - if counter % 100000 == 0: - logging.info(" processing line %d" % counter) - tokens = tokenizer(line) if tokenizer else basic_tokenizer(line) - for w in tokens: - word = re.sub(_DIGIT_RE, b"0", w) if normalize_digits else w - if word in vocab: - vocab[word] += 1 - else: - vocab[word] = 1 - vocab_list = _START_VOCAB + sorted(vocab, key=vocab.get, reverse=True) - if len(vocab_list) > max_vocabulary_size: - vocab_list = vocab_list[:max_vocabulary_size] - with gfile.GFile(vocabulary_path, mode="wb") as vocab_file: - for w in vocab_list: - vocab_file.write(w + b"\n") - else: - logging.info("Vocabulary %s from data %s exists" % (vocabulary_path, data_path)) - - -def initialize_vocabulary(vocabulary_path): - """Initialize vocabulary from file, return the `word_to_id` (dictionary) - and `id_to_word` (list). - - We assume the vocabulary is stored one-item-per-line, so a file will result in a vocabulary {"dog": 0, "cat": 1}, and this function will also return the reversed-vocabulary ["dog", "cat"]. - - Parameters - ----------- - vocabulary_path : str - Path to the file containing the vocabulary. - - Returns - -------- - vocab : dictionary - a dictionary that maps word to ID. - rev_vocab : list of int - a list that maps ID to word. - - Examples - --------- - >>> Assume 'test' contains - ... dog - ... cat - ... bird - >>> vocab, rev_vocab = tl.nlp.initialize_vocabulary("test") - >>> print(vocab) - >>> {b'cat': 1, b'dog': 0, b'bird': 2} - >>> print(rev_vocab) - >>> [b'dog', b'cat', b'bird'] - - Raises - ------- - ValueError : if the provided vocabulary_path does not exist. - - """ - if gfile.Exists(vocabulary_path): - rev_vocab = [] - with gfile.GFile(vocabulary_path, mode="rb") as f: - rev_vocab.extend(f.readlines()) - rev_vocab = [tf.compat.as_bytes(line.strip()) for line in rev_vocab] - vocab = dict([(x, y) for (y, x) in enumerate(rev_vocab)]) - return vocab, rev_vocab - else: - raise ValueError("Vocabulary file %s not found.", vocabulary_path) - - -def sentence_to_token_ids(sentence, vocabulary, tokenizer=None, normalize_digits=True, UNK_ID=3, _DIGIT_RE=re.compile(br"\d")): - """Convert a string to list of integers representing token-ids. - - For example, a sentence "I have a dog" may become tokenized into - ["I", "have", "a", "dog"] and with vocabulary {"I": 1, "have": 2, - "a": 4, "dog": 7"} this function will return [1, 2, 4, 7]. - - Parameters - ----------- - sentence : tensorflow.python.platform.gfile.GFile Object - The sentence in bytes format to convert to token-ids, see ``basic_tokenizer()`` and ``data_to_token_ids()``. - vocabulary : dictionary - Mmapping tokens to integers. - tokenizer : function - A function to use to tokenize each sentence. If None, ``basic_tokenizer`` will be used. - normalize_digits : boolean - If true, all digits are replaced by 0. - - Returns - -------- - list of int - The token-ids for the sentence. - - """ - if tokenizer: - words = tokenizer(sentence) - else: - words = basic_tokenizer(sentence) - if not normalize_digits: - return [vocabulary.get(w, UNK_ID) for w in words] - # Normalize digits by 0 before looking words up in the vocabulary. - return [vocabulary.get(re.sub(_DIGIT_RE, b"0", w), UNK_ID) for w in words] - - -def data_to_token_ids(data_path, target_path, vocabulary_path, tokenizer=None, normalize_digits=True, UNK_ID=3, _DIGIT_RE=re.compile(br"\d")): - """Tokenize data file and turn into token-ids using given vocabulary file. - - This function loads data line-by-line from data_path, calls the above - sentence_to_token_ids, and saves the result to target_path. See comment - for sentence_to_token_ids on the details of token-ids format. - - Parameters - ----------- - data_path : str - Path to the data file in one-sentence-per-line format. - target_path : str - Path where the file with token-ids will be created. - vocabulary_path : str - Path to the vocabulary file. - tokenizer : function - A function to use to tokenize each sentence. If None, ``basic_tokenizer`` will be used. - normalize_digits : boolean - If true, all digits are replaced by 0. - - References - ---------- - - Code from ``/tensorflow/models/rnn/translation/data_utils.py`` - - """ - if not gfile.Exists(target_path): - logging.info("Tokenizing data in %s" % data_path) - vocab, _ = initialize_vocabulary(vocabulary_path) - with gfile.GFile(data_path, mode="rb") as data_file: - with gfile.GFile(target_path, mode="w") as tokens_file: - counter = 0 - for line in data_file: - counter += 1 - if counter % 100000 == 0: - logging.info(" tokenizing line %d" % counter) - token_ids = sentence_to_token_ids(line, vocab, tokenizer, normalize_digits, UNK_ID=UNK_ID, _DIGIT_RE=_DIGIT_RE) - tokens_file.write(" ".join([str(tok) for tok in token_ids]) + "\n") - else: - logging.info("Target path %s exists" % target_path) - - -def moses_multi_bleu(hypotheses, references, lowercase=False): - """Calculate the bleu score for hypotheses and references - using the MOSES ulti-bleu.perl script. - - Parameters - ------------ - hypotheses : numpy.array.string - A numpy array of strings where each string is a single example. - references : numpy.array.string - A numpy array of strings where each string is a single example. - lowercase : boolean - If True, pass the "-lc" flag to the multi-bleu script - - Examples - --------- - >>> hypotheses = ["a bird is flying on the sky"] - >>> references = ["two birds are flying on the sky", "a bird is on the top of the tree", "an airplane is on the sky",] - >>> score = tl.nlp.moses_multi_bleu(hypotheses, references) - - Returns - -------- - float - The BLEU score - - References - ---------- - - `Google/seq2seq/metric/bleu `__ - - """ - if np.size(hypotheses) == 0: - return np.float32(0.0) - - # Get MOSES multi-bleu script - try: - multi_bleu_path, _ = urllib.request.urlretrieve("https://raw.githubusercontent.com/moses-smt/mosesdecoder/" "master/scripts/generic/multi-bleu.perl") - os.chmod(multi_bleu_path, 0o755) - except Exception: # pylint: disable=W0702 - tf.logging.info("Unable to fetch multi-bleu.perl script, using local.") - metrics_dir = os.path.dirname(os.path.realpath(__file__)) - bin_dir = os.path.abspath(os.path.join(metrics_dir, "..", "..", "bin")) - multi_bleu_path = os.path.join(bin_dir, "tools/multi-bleu.perl") - - # Dump hypotheses and references to tempfiles - hypothesis_file = tempfile.NamedTemporaryFile() - hypothesis_file.write("\n".join(hypotheses).encode("utf-8")) - hypothesis_file.write(b"\n") - hypothesis_file.flush() - reference_file = tempfile.NamedTemporaryFile() - reference_file.write("\n".join(references).encode("utf-8")) - reference_file.write(b"\n") - reference_file.flush() - - # Calculate BLEU using multi-bleu script - with open(hypothesis_file.name, "r") as read_pred: - bleu_cmd = [multi_bleu_path] - if lowercase: - bleu_cmd += ["-lc"] - bleu_cmd += [reference_file.name] - try: - bleu_out = subprocess.check_output(bleu_cmd, stdin=read_pred, stderr=subprocess.STDOUT) - bleu_out = bleu_out.decode("utf-8") - bleu_score = re.search(r"BLEU = (.+?),", bleu_out).group(1) - bleu_score = float(bleu_score) - except subprocess.CalledProcessError as error: - if error.output is not None: - tf.logging.warning("multi-bleu.perl script returned non-zero exit code") - tf.logging.warning(error.output) - bleu_score = np.float32(0.0) - - # Close temp files - hypothesis_file.close() - reference_file.close() - - return np.float32(bleu_score) diff --git a/tensorlayer/prepro.py b/tensorlayer/prepro.py deleted file mode 100644 index 1d2de81..0000000 --- a/tensorlayer/prepro.py +++ /dev/null @@ -1,3189 +0,0 @@ -# -*- coding: utf-8 -*- - -import threading, time, scipy, skimage -import numpy as np -import scipy.ndimage as ndi -# import tensorlayer as tl -from scipy import linalg -from scipy.ndimage.filters import gaussian_filter -from scipy.ndimage.interpolation import map_coordinates -from six.moves import range -from skimage import exposure, transform - -# linalg https://docs.scipy.org/doc/scipy/reference/linalg.html -# ndimage https://docs.scipy.org/doc/scipy/reference/ndimage.html - -__all__ = [ - 'threading_data', - 'rotation', - 'rotation_multi', - 'crop', - 'crop_multi', - 'flip_axis', - 'flip_axis_multi', - 'shift', - 'shift_multi', - 'shear', - 'shear_multi', - 'shear2', - 'shear_multi2', - 'swirl', - 'swirl_multi', - 'elastic_transform', - 'elastic_transform_multi', - 'zoom', - 'zoom_multi', - 'brightness', - 'brightness_multi', - 'illumination', - 'rgb_to_hsv', - 'hsv_to_rgb', - 'adjust_hue', - 'imresize', - 'pixel_value_scale', - 'samplewise_norm', - 'featurewise_norm', - 'get_zca_whitening_principal_components_img', - 'zca_whitening', - 'channel_shift', - 'channel_shift_multi', - 'drop', - 'transform_matrix_offset_center', - 'apply_transform', - 'projective_transform_by_points', - 'array_to_img', - 'find_contours', - 'pt2map', - 'binary_dilation', - 'dilation', - 'binary_erosion', - 'erosion', - 'obj_box_coords_rescale', - 'obj_box_coord_rescale', - 'obj_box_coord_scale_to_pixelunit', - 'obj_box_coord_centroid_to_upleft_butright', - 'obj_box_coord_upleft_butright_to_centroid', - 'obj_box_coord_centroid_to_upleft', - 'obj_box_coord_upleft_to_centroid', - 'parse_darknet_ann_str_to_list', - 'parse_darknet_ann_list_to_cls_box', - 'obj_box_left_right_flip', - 'obj_box_imresize', - 'obj_box_crop', - 'obj_box_shift', - 'obj_box_zoom', - 'pad_sequences', - 'remove_pad_sequences', - 'process_sequences', - 'sequences_add_start_id', - 'sequences_add_end_id', - 'sequences_add_end_id_after_pad', - 'sequences_get_mask', -] - - -def threading_data(data=None, fn=None, thread_count=None, **kwargs): - """Process a batch of data by given function by threading. - - Usually be used for data augmentation. - - Parameters - ----------- - data : numpy.array or others - The data to be processed. - thread_count : int - The number of threads to use. - fn : function - The function for data processing. - more args : the args for `fn` - Ssee Examples below. - - Examples - -------- - Process images. - - >>> images, _, _, _ = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3)) - >>> images = tl.prepro.threading_data(images[0:32], tl.prepro.zoom, zoom_range=[0.5, 1]) - - Customized image preprocessing function. - - >>> def distort_img(x): - ... x = tl.prepro.flip_axis(x, axis=0, is_random=True) - ... x = tl.prepro.flip_axis(x, axis=1, is_random=True) - ... x = tl.prepro.crop(x, 100, 100, is_random=True) - ... return x - >>> images = tl.prepro.threading_data(images, distort_img) - - Process images and masks together (Usually be used for image segmentation). - - >>> X, Y --> [batch_size, row, col, 1] - >>> data = tl.prepro.threading_data([_ for _ in zip(X, Y)], tl.prepro.zoom_multi, zoom_range=[0.5, 1], is_random=True) - ... data --> [batch_size, 2, row, col, 1] - >>> X_, Y_ = data.transpose((1,0,2,3,4)) - ... X_, Y_ --> [batch_size, row, col, 1] - >>> tl.vis.save_image(X_, 'images.png') - >>> tl.vis.save_image(Y_, 'masks.png') - - Process images and masks together by using ``thread_count``. - - >>> X, Y --> [batch_size, row, col, 1] - >>> data = tl.prepro.threading_data(X, tl.prepro.zoom_multi, 8, zoom_range=[0.5, 1], is_random=True) - ... data --> [batch_size, 2, row, col, 1] - >>> X_, Y_ = data.transpose((1,0,2,3,4)) - ... X_, Y_ --> [batch_size, row, col, 1] - >>> tl.vis.save_image(X_, 'after.png') - >>> tl.vis.save_image(Y_, 'before.png') - - Customized function for processing images and masks together. - - >>> def distort_img(data): - ... x, y = data - ... x, y = tl.prepro.flip_axis_multi([x, y], axis=0, is_random=True) - ... x, y = tl.prepro.flip_axis_multi([x, y], axis=1, is_random=True) - ... x, y = tl.prepro.crop_multi([x, y], 100, 100, is_random=True) - ... return x, y - >>> X, Y --> [batch_size, row, col, channel] - >>> data = tl.prepro.threading_data([_ for _ in zip(X, Y)], distort_img) - >>> X_, Y_ = data.transpose((1,0,2,3,4)) - - Returns - ------- - list or numpyarray - The processed results. - - References - ---------- - - `python queue `__ - - `run with limited queue `__ - - """ - - def apply_fn(results, i, data, kwargs): - results[i] = fn(data, **kwargs) - - if thread_count is None: - results = [None] * len(data) - threads = [] - # for i in range(len(data)): - # t = threading.Thread(name='threading_and_return', target=apply_fn, args=(results, i, data[i], kwargs)) - for i, d in enumerate(data): - t = threading.Thread(name='threading_and_return', target=apply_fn, args=(results, i, d, kwargs)) - t.start() - threads.append(t) - else: - divs = np.linspace(0, len(data), thread_count + 1) - divs = np.round(divs).astype(int) - results = [None] * thread_count - threads = [] - for i in range(thread_count): - t = threading.Thread(name='threading_and_return', target=apply_fn, args=(results, i, data[divs[i]:divs[i + 1]], kwargs)) - t.start() - threads.append(t) - - for t in threads: - t.join() - - if thread_count is None: - try: - return np.asarray(results) - except Exception: - return results - else: - return np.concatenate(results) - - -def rotation(x, rg=20, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1): - """Rotate an image randomly or non-randomly. - - Parameters - ----------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - rg : int or float - Degree to rotate, usually 0 ~ 180. - is_random : boolean - If True, randomly rotate. Default is False - row_index col_index and channel_index : int - Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0). - fill_mode : str - Method to fill missing pixel, default `nearest`, more options `constant`, `reflect` or `wrap`, see `scipy ndimage affine_transform `__ - cval : float - Value used for points outside the boundaries of the input if mode=`constant`. Default is 0.0 - order : int - The order of interpolation. The order has to be in the range 0-5. See ``tl.prepro.apply_transform`` and `scipy ndimage affine_transform `__ - - Returns - ------- - numpy.array - A processed image. - - Examples - --------- - >>> x --> [row, col, 1] - >>> x = tl.prepro.rotation(x, rg=40, is_random=False) - >>> tl.vis.save_image(x, 'im.png') - - """ - if is_random: - theta = np.pi / 180 * np.random.uniform(-rg, rg) - else: - theta = np.pi / 180 * rg - rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0], [np.sin(theta), np.cos(theta), 0], [0, 0, 1]]) - - h, w = x.shape[row_index], x.shape[col_index] - transform_matrix = transform_matrix_offset_center(rotation_matrix, h, w) - x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval, order) - return x - - -def rotation_multi(x, rg=20, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1): - """Rotate multiple images with the same arguments, randomly or non-randomly. - Usually be used for image segmentation which x=[X, Y], X and Y should be matched. - - Parameters - ----------- - x : list of numpy.array - List of images with dimension of [n_images, row, col, channel] (default). - others : args - See ``tl.prepro.rotation``. - - Returns - ------- - numpy.array - A list of processed images. - - Examples - -------- - >>> x, y --> [row, col, 1] greyscale - >>> x, y = tl.prepro.rotation_multi([x, y], rg=90, is_random=False) - - """ - if is_random: - theta = np.pi / 180 * np.random.uniform(-rg, rg) - else: - theta = np.pi / 180 * rg - rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0], [np.sin(theta), np.cos(theta), 0], [0, 0, 1]]) - - h, w = x[0].shape[row_index], x[0].shape[col_index] - transform_matrix = transform_matrix_offset_center(rotation_matrix, h, w) - results = [] - for data in x: - results.append(apply_transform(data, transform_matrix, channel_index, fill_mode, cval, order)) - return np.asarray(results) - - -# crop -def crop(x, wrg, hrg, is_random=False, row_index=0, col_index=1): - """Randomly or centrally crop an image. - - Parameters - ---------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - wrg : int - Size of width. - hrg : int - Size of height. - is_random : boolean, - If True, randomly crop, else central crop. Default is False. - row_index: int - index of row. - col_index: int - index of column. - - Returns - ------- - numpy.array - A processed image. - - """ - h, w = x.shape[row_index], x.shape[col_index] - assert (h > hrg) and (w > wrg), "The size of cropping should smaller than the original image" - if is_random: - h_offset = int(np.random.uniform(0, h - hrg) - 1) - w_offset = int(np.random.uniform(0, w - wrg) - 1) - # logging.info(h_offset, w_offset, x[h_offset: hrg+h_offset ,w_offset: wrg+w_offset].shape) - return x[h_offset:hrg + h_offset, w_offset:wrg + w_offset] - else: # central crop - h_offset = int(np.floor((h - hrg) / 2.)) - w_offset = int(np.floor((w - wrg) / 2.)) - h_end = h_offset + hrg - w_end = w_offset + wrg - return x[h_offset:h_end, w_offset:w_end] - # old implementation - # h_offset = (h - hrg)/2 - # w_offset = (w - wrg)/2 - # # logging.info(x[h_offset: h-h_offset ,w_offset: w-w_offset].shape) - # return x[h_offset: h-h_offset ,w_offset: w-w_offset] - # central crop - - -def crop_multi(x, wrg, hrg, is_random=False, row_index=0, col_index=1): - """Randomly or centrally crop multiple images. - - Parameters - ---------- - x : list of numpy.array - List of images with dimension of [n_images, row, col, channel] (default). - others : args - See ``tl.prepro.crop``. - - Returns - ------- - numpy.array - A list of processed images. - - """ - h, w = x[0].shape[row_index], x[0].shape[col_index] - assert (h > hrg) and (w > wrg), "The size of cropping should smaller than the original image" - if is_random: - h_offset = int(np.random.uniform(0, h - hrg) - 1) - w_offset = int(np.random.uniform(0, w - wrg) - 1) - results = [] - for data in x: - results.append(data[h_offset:hrg + h_offset, w_offset:wrg + w_offset]) - return np.asarray(results) - else: - # central crop - h_offset = (h - hrg) / 2 - w_offset = (w - wrg) / 2 - results = [] - for data in x: - results.append(data[h_offset:h - h_offset, w_offset:w - w_offset]) - return np.asarray(results) - - -# flip -def flip_axis(x, axis=1, is_random=False): - """Flip the axis of an image, such as flip left and right, up and down, randomly or non-randomly, - - Parameters - ---------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - axis : int - Which axis to flip. - - 0, flip up and down - - 1, flip left and right - - 2, flip channel - is_random : boolean - If True, randomly flip. Default is False. - - Returns - ------- - numpy.array - A processed image. - - """ - if is_random: - factor = np.random.uniform(-1, 1) - if factor > 0: - x = np.asarray(x).swapaxes(axis, 0) - x = x[::-1, ...] - x = x.swapaxes(0, axis) - return x - else: - return x - else: - x = np.asarray(x).swapaxes(axis, 0) - x = x[::-1, ...] - x = x.swapaxes(0, axis) - return x - - -def flip_axis_multi(x, axis, is_random=False): - """Flip the axises of multiple images together, such as flip left and right, up and down, randomly or non-randomly, - - Parameters - ----------- - x : list of numpy.array - List of images with dimension of [n_images, row, col, channel] (default). - others : args - See ``tl.prepro.flip_axis``. - - Returns - ------- - numpy.array - A list of processed images. - - """ - if is_random: - factor = np.random.uniform(-1, 1) - if factor > 0: - # x = np.asarray(x).swapaxes(axis, 0) - # x = x[::-1, ...] - # x = x.swapaxes(0, axis) - # return x - results = [] - for data in x: - data = np.asarray(data).swapaxes(axis, 0) - data = data[::-1, ...] - data = data.swapaxes(0, axis) - results.append(data) - return np.asarray(results) - else: - return np.asarray(x) - else: - # x = np.asarray(x).swapaxes(axis, 0) - # x = x[::-1, ...] - # x = x.swapaxes(0, axis) - # return x - results = [] - for data in x: - data = np.asarray(data).swapaxes(axis, 0) - data = data[::-1, ...] - data = data.swapaxes(0, axis) - results.append(data) - return np.asarray(results) - - -# shift -def shift(x, wrg=0.1, hrg=0.1, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1): - """Shift an image randomly or non-randomly. - - Parameters - ----------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - wrg : float - Percentage of shift in axis x, usually -0.25 ~ 0.25. - hrg : float - Percentage of shift in axis y, usually -0.25 ~ 0.25. - is_random : boolean - If True, randomly shift. Default is False. - row_index col_index and channel_index : int - Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0). - fill_mode : str - Method to fill missing pixel, default `nearest`, more options `constant`, `reflect` or `wrap`, see `scipy ndimage affine_transform `__ - cval : float - Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0. - order : int - The order of interpolation. The order has to be in the range 0-5. See ``tl.prepro.apply_transform`` and `scipy ndimage affine_transform `__ - - Returns - ------- - numpy.array - A processed image. - - """ - h, w = x.shape[row_index], x.shape[col_index] - if is_random: - tx = np.random.uniform(-hrg, hrg) * h - ty = np.random.uniform(-wrg, wrg) * w - else: - tx, ty = hrg * h, wrg * w - translation_matrix = np.array([[1, 0, tx], [0, 1, ty], [0, 0, 1]]) - - transform_matrix = translation_matrix # no need to do offset - x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval, order) - return x - - -def shift_multi(x, wrg=0.1, hrg=0.1, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1): - """Shift images with the same arguments, randomly or non-randomly. - Usually be used for image segmentation which x=[X, Y], X and Y should be matched. - - Parameters - ----------- - x : list of numpy.array - List of images with dimension of [n_images, row, col, channel] (default). - others : args - See ``tl.prepro.shift``. - - Returns - ------- - numpy.array - A list of processed images. - - """ - h, w = x[0].shape[row_index], x[0].shape[col_index] - if is_random: - tx = np.random.uniform(-hrg, hrg) * h - ty = np.random.uniform(-wrg, wrg) * w - else: - tx, ty = hrg * h, wrg * w - translation_matrix = np.array([[1, 0, tx], [0, 1, ty], [0, 0, 1]]) - - transform_matrix = translation_matrix # no need to do offset - results = [] - for data in x: - results.append(apply_transform(data, transform_matrix, channel_index, fill_mode, cval, order)) - return np.asarray(results) - - -# shear -def shear(x, intensity=0.1, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1): - """Shear an image randomly or non-randomly. - - Parameters - ----------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - intensity : float - Percentage of shear, usually -0.5 ~ 0.5 (is_random==True), 0 ~ 0.5 (is_random==False), - you can have a quick try by shear(X, 1). - is_random : boolean - If True, randomly shear. Default is False. - row_index col_index and channel_index : int - Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0). - fill_mode : str - Method to fill missing pixel, default `nearest`, more options `constant`, `reflect` or `wrap`, see and `scipy ndimage affine_transform `__ - cval : float - Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0. - order : int - The order of interpolation. The order has to be in the range 0-5. See ``tl.prepro.apply_transform`` and `scipy ndimage affine_transform `__ - - Returns - ------- - numpy.array - A processed image. - - References - ----------- - - `Affine transformation `__ - - """ - if is_random: - shear = np.random.uniform(-intensity, intensity) - else: - shear = intensity - shear_matrix = np.array([[1, -np.sin(shear), 0], [0, np.cos(shear), 0], [0, 0, 1]]) - - h, w = x.shape[row_index], x.shape[col_index] - transform_matrix = transform_matrix_offset_center(shear_matrix, h, w) - x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval, order) - return x - - -def shear_multi(x, intensity=0.1, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1): - """Shear images with the same arguments, randomly or non-randomly. - Usually be used for image segmentation which x=[X, Y], X and Y should be matched. - - Parameters - ----------- - x : list of numpy.array - List of images with dimension of [n_images, row, col, channel] (default). - others : args - See ``tl.prepro.shear``. - - Returns - ------- - numpy.array - A list of processed images. - - """ - if is_random: - shear = np.random.uniform(-intensity, intensity) - else: - shear = intensity - shear_matrix = np.array([[1, -np.sin(shear), 0], [0, np.cos(shear), 0], [0, 0, 1]]) - - h, w = x[0].shape[row_index], x[0].shape[col_index] - transform_matrix = transform_matrix_offset_center(shear_matrix, h, w) - results = [] - for data in x: - results.append(apply_transform(data, transform_matrix, channel_index, fill_mode, cval, order)) - return np.asarray(results) - - -def shear2(x, shear=(0.1, 0.1), is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1): - """Shear an image randomly or non-randomly. - - Parameters - ----------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - shear : tuple of two floats - Percentage of shear for height and width direction (0, 1). - is_random : boolean - If True, randomly shear. Default is False. - row_index col_index and channel_index : int - Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0). - fill_mode : str - Method to fill missing pixel, default `nearest`, more options `constant`, `reflect` or `wrap`, see `scipy ndimage affine_transform `__ - cval : float - Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0. - order : int - The order of interpolation. The order has to be in the range 0-5. See ``tl.prepro.apply_transform`` and `scipy ndimage affine_transform `__ - - Returns - ------- - numpy.array - A processed image. - - References - ----------- - - `Affine transformation `__ - - """ - assert len(shear) == 2, "shear should be tuple of 2 floats, or you want to use tl.prepro.shear rather than tl.prepro.shear2 ?" - if is_random: - shear[0] = np.random.uniform(-shear[0], shear[0]) - shear[1] = np.random.uniform(-shear[1], shear[1]) - - shear_matrix = np.array([[1, shear[0], 0], [shear[1], 1, 0], [0, 0, 1]]) - - h, w = x.shape[row_index], x.shape[col_index] - transform_matrix = transform_matrix_offset_center(shear_matrix, h, w) - x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval, order) - return x - - -def shear_multi2(x, shear=(0.1, 0.1), is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1): - """Shear images with the same arguments, randomly or non-randomly. - Usually be used for image segmentation which x=[X, Y], X and Y should be matched. - - Parameters - ----------- - x : list of numpy.array - List of images with dimension of [n_images, row, col, channel] (default). - others : args - See ``tl.prepro.shear2``. - - Returns - ------- - numpy.array - A list of processed images. - - """ - assert len(shear) == 2, "shear should be tuple of 2 floats, or you want to use tl.prepro.shear_multi rather than tl.prepro.shear_multi2 ?" - if is_random: - shear[0] = np.random.uniform(-shear[0], shear[0]) - shear[1] = np.random.uniform(-shear[1], shear[1]) - - shear_matrix = np.array([[1, shear[0], 0], [shear[1], 1, 0], [0, 0, 1]]) - - h, w = x[0].shape[row_index], x[0].shape[col_index] - transform_matrix = transform_matrix_offset_center(shear_matrix, h, w) - results = [] - for data in x: - results.append(apply_transform(data, transform_matrix, channel_index, fill_mode, cval, order)) - return np.asarray(results) - - -# swirl -def swirl(x, - center=None, - strength=1, - radius=100, - rotation=0, - output_shape=None, - order=1, - mode='constant', - cval=0, - clip=True, - preserve_range=False, - is_random=False): - """Swirl an image randomly or non-randomly, see `scikit-image swirl API `__ - and `example `__. - - Parameters - ----------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - center : tuple or 2 int or None - Center coordinate of transformation (optional). - strength : float - The amount of swirling applied. - radius : float - The extent of the swirl in pixels. The effect dies out rapidly beyond radius. - rotation : float - Additional rotation applied to the image, usually [0, 360], relates to center. - output_shape : tuple of 2 int or None - Shape of the output image generated (height, width). By default the shape of the input image is preserved. - order : int, optional - The order of the spline interpolation, default is 1. The order has to be in the range 0-5. See skimage.transform.warp for detail. - mode : str - One of `constant` (default), `edge`, `symmetric` `reflect` and `wrap`. - Points outside the boundaries of the input are filled according to the given mode, with `constant` used as the default. Modes match the behaviour of numpy.pad. - cval : float - Used in conjunction with mode `constant`, the value outside the image boundaries. - clip : boolean - Whether to clip the output to the range of values of the input image. This is enabled by default, since higher order interpolation may produce values outside the given input range. - preserve_range : boolean - Whether to keep the original range of values. Otherwise, the input image is converted according to the conventions of img_as_float. - is_random : boolean, - If True, random swirl. Default is False. - - random center = [(0 ~ x.shape[0]), (0 ~ x.shape[1])] - - random strength = [0, strength] - - random radius = [1e-10, radius] - - random rotation = [-rotation, rotation] - - Returns - ------- - numpy.array - A processed image. - - Examples - --------- - >>> x --> [row, col, 1] greyscale - >>> x = tl.prepro.swirl(x, strength=4, radius=100) - - """ - assert radius != 0, Exception("Invalid radius value") - rotation = np.pi / 180 * rotation - if is_random: - center_h = int(np.random.uniform(0, x.shape[0])) - center_w = int(np.random.uniform(0, x.shape[1])) - center = (center_h, center_w) - strength = np.random.uniform(0, strength) - radius = np.random.uniform(1e-10, radius) - rotation = np.random.uniform(-rotation, rotation) - - max_v = np.max(x) - if max_v > 1: # Note: the input of this fn should be [-1, 1], rescale is required. - x = x / max_v - swirled = skimage.transform.swirl( - x, - center=center, - strength=strength, - radius=radius, - rotation=rotation, - output_shape=output_shape, - order=order, - mode=mode, - cval=cval, - clip=clip, - preserve_range=preserve_range) - if max_v > 1: - swirled = swirled * max_v - return swirled - - -def swirl_multi(x, - center=None, - strength=1, - radius=100, - rotation=0, - output_shape=None, - order=1, - mode='constant', - cval=0, - clip=True, - preserve_range=False, - is_random=False): - """Swirl multiple images with the same arguments, randomly or non-randomly. - Usually be used for image segmentation which x=[X, Y], X and Y should be matched. - - Parameters - ----------- - x : list of numpy.array - List of images with dimension of [n_images, row, col, channel] (default). - others : args - See ``tl.prepro.swirl``. - - Returns - ------- - numpy.array - A list of processed images. - - """ - assert radius != 0, Exception("Invalid radius value") - rotation = np.pi / 180 * rotation - if is_random: - center_h = int(np.random.uniform(0, x[0].shape[0])) - center_w = int(np.random.uniform(0, x[0].shape[1])) - center = (center_h, center_w) - strength = np.random.uniform(0, strength) - radius = np.random.uniform(1e-10, radius) - rotation = np.random.uniform(-rotation, rotation) - - results = [] - for data in x: - max_v = np.max(data) - if max_v > 1: # Note: the input of this fn should be [-1, 1], rescale is required. - data = data / max_v - swirled = skimage.transform.swirl( - data, - center=center, - strength=strength, - radius=radius, - rotation=rotation, - output_shape=output_shape, - order=order, - mode=mode, - cval=cval, - clip=clip, - preserve_range=preserve_range) - if max_v > 1: - swirled = swirled * max_v - results.append(swirled) - return np.asarray(results) - - -# elastic_transform -def elastic_transform(x, alpha, sigma, mode="constant", cval=0, is_random=False): - """Elastic transformation for image as described in `[Simard2003] `__. - - Parameters - ----------- - x : numpy.array - A greyscale image. - alpha : float - Alpha value for elastic transformation. - sigma : float or sequence of float - The smaller the sigma, the more transformation. Standard deviation for Gaussian kernel. The standard deviations of the Gaussian filter are given for each axis as a sequence, or as a single number, in which case it is equal for all axes. - mode : str - See `scipy.ndimage.filters.gaussian_filter `__. Default is `constant`. - cval : float, - Used in conjunction with `mode` of `constant`, the value outside the image boundaries. - is_random : boolean - Default is False. - - Returns - ------- - numpy.array - A processed image. - - Examples - --------- - >>> x = tl.prepro.elastic_transform(x, alpha=x.shape[1]*3, sigma=x.shape[1]*0.07) - - References - ------------ - - `Github `__. - - `Kaggle `__ - - """ - if is_random is False: - random_state = np.random.RandomState(None) - else: - random_state = np.random.RandomState(int(time.time())) - # - is_3d = False - if len(x.shape) == 3 and x.shape[-1] == 1: - x = x[:, :, 0] - is_3d = True - elif len(x.shape) == 3 and x.shape[-1] != 1: - raise Exception("Only support greyscale image") - assert len(x.shape) == 2, "input should be grey-scale image" - - shape = x.shape - - dx = gaussian_filter((random_state.rand(*shape) * 2 - 1), sigma, mode=mode, cval=cval) * alpha - dy = gaussian_filter((random_state.rand(*shape) * 2 - 1), sigma, mode=mode, cval=cval) * alpha - - x_, y_ = np.meshgrid(np.arange(shape[0]), np.arange(shape[1]), indexing='ij') - indices = np.reshape(x_ + dx, (-1, 1)), np.reshape(y_ + dy, (-1, 1)) - if is_3d: - return map_coordinates(x, indices, order=1).reshape((shape[0], shape[1], 1)) - else: - return map_coordinates(x, indices, order=1).reshape(shape) - - -def elastic_transform_multi(x, alpha, sigma, mode="constant", cval=0, is_random=False): - """Elastic transformation for images as described in `[Simard2003] `__. - - Parameters - ----------- - x : list of numpy.array - List of greyscale images. - others : args - See ``tl.prepro.elastic_transform``. - - Returns - ------- - numpy.array - A list of processed images. - - """ - if is_random is False: - random_state = np.random.RandomState(None) - else: - random_state = np.random.RandomState(int(time.time())) - - shape = x[0].shape - if len(shape) == 3: - shape = (shape[0], shape[1]) - new_shape = random_state.rand(*shape) - - results = [] - for data in x: - is_3d = False - if len(data.shape) == 3 and data.shape[-1] == 1: - data = data[:, :, 0] - is_3d = True - elif len(data.shape) == 3 and data.shape[-1] != 1: - raise Exception("Only support greyscale image") - assert len(data.shape) == 2, "input should be grey-scale image" - - dx = gaussian_filter((new_shape * 2 - 1), sigma, mode=mode, cval=cval) * alpha - dy = gaussian_filter((new_shape * 2 - 1), sigma, mode=mode, cval=cval) * alpha - - x_, y_ = np.meshgrid(np.arange(shape[0]), np.arange(shape[1]), indexing='ij') - indices = np.reshape(x_ + dx, (-1, 1)), np.reshape(y_ + dy, (-1, 1)) - # logging.info(data.shape) - if is_3d: - results.append(map_coordinates(data, indices, order=1).reshape((shape[0], shape[1], 1))) - else: - results.append(map_coordinates(data, indices, order=1).reshape(shape)) - return np.asarray(results) - - -# zoom -def zoom(x, zoom_range=(0.9, 1.1), is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1): - """Zoom in and out of a single image, randomly or non-randomly. - - Parameters - ----------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - zoom_range : list or tuple - Zoom range for height and width. - - If is_random=False, (h, w) are the fixed zoom factor for row and column axies, factor small than one is zoom in. - - If is_random=True, (h, w) are (min zoom out, max zoom out) for x and y with different random zoom in/out factor, e.g (0.5, 1) zoom in 1~2 times. - is_random : boolean - If True, randomly zoom. Default is False. - row_index col_index and channel_index : int - Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0). - fill_mode : str - Method to fill missing pixel, default `nearest`, more options `constant`, `reflect` or `wrap`, see `scipy ndimage affine_transform `__ - cval : float - Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0. - order : int - The order of interpolation. The order has to be in the range 0-5. See ``tl.prepro.apply_transform`` and `scipy ndimage affine_transform `__ - - Returns - ------- - numpy.array - A processed image. - - """ - if len(zoom_range) != 2: - raise Exception('zoom_range should be a tuple or list of two floats. ' 'Received arg: ', zoom_range) - if is_random: - if zoom_range[0] == 1 and zoom_range[1] == 1: - zx, zy = 1, 1 - logging.info(" random_zoom : not zoom in/out") - else: - zx, zy = np.random.uniform(zoom_range[0], zoom_range[1], 2) - else: - zx, zy = zoom_range - # logging.info(zx, zy) - zoom_matrix = np.array([[zx, 0, 0], [0, zy, 0], [0, 0, 1]]) - - h, w = x.shape[row_index], x.shape[col_index] - transform_matrix = transform_matrix_offset_center(zoom_matrix, h, w) - x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval, order) - return x - - -def zoom_multi(x, zoom_range=(0.9, 1.1), is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1): - """Zoom in and out of images with the same arguments, randomly or non-randomly. - Usually be used for image segmentation which x=[X, Y], X and Y should be matched. - - Parameters - ----------- - x : list of numpy.array - List of images with dimension of [n_images, row, col, channel] (default). - others : args - See ``tl.prepro.zoom``. - - Returns - ------- - numpy.array - A list of processed images. - - """ - if len(zoom_range) != 2: - raise Exception('zoom_range should be a tuple or list of two floats. ' 'Received arg: ', zoom_range) - - if is_random: - if zoom_range[0] == 1 and zoom_range[1] == 1: - zx, zy = 1, 1 - logging.info(" random_zoom : not zoom in/out") - else: - zx, zy = np.random.uniform(zoom_range[0], zoom_range[1], 2) - else: - zx, zy = zoom_range - - zoom_matrix = np.array([[zx, 0, 0], [0, zy, 0], [0, 0, 1]]) - - h, w = x[0].shape[row_index], x[0].shape[col_index] - transform_matrix = transform_matrix_offset_center(zoom_matrix, h, w) - # x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval) - # return x - results = [] - for data in x: - results.append(apply_transform(data, transform_matrix, channel_index, fill_mode, cval, order)) - return np.asarray(results) - - -# image = tf.image.random_brightness(image, max_delta=32. / 255.) -# image = tf.image.random_saturation(image, lower=0.5, upper=1.5) -# image = tf.image.random_hue(image, max_delta=0.032) -# image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - - -def brightness(x, gamma=1, gain=1, is_random=False): - """Change the brightness of a single image, randomly or non-randomly. - - Parameters - ----------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - gamma : float - Non negative real number. Default value is 1. - - Small than 1 means brighter. - - If `is_random` is True, gamma in a range of (1-gamma, 1+gamma). - gain : float - The constant multiplier. Default value is 1. - is_random : boolean - If True, randomly change brightness. Default is False. - - Returns - ------- - numpy.array - A processed image. - - References - ----------- - - `skimage.exposure.adjust_gamma `__ - - `chinese blog `__ - - """ - if is_random: - gamma = np.random.uniform(1 - gamma, 1 + gamma) - x = exposure.adjust_gamma(x, gamma, gain) - return x - - -def brightness_multi(x, gamma=1, gain=1, is_random=False): - """Change the brightness of multiply images, randomly or non-randomly. - Usually be used for image segmentation which x=[X, Y], X and Y should be matched. - - Parameters - ----------- - x : list of numpyarray - List of images with dimension of [n_images, row, col, channel] (default). - others : args - See ``tl.prepro.brightness``. - - Returns - ------- - numpy.array - A list of processed images. - - """ - if is_random: - gamma = np.random.uniform(1 - gamma, 1 + gamma) - - results = [] - for data in x: - results.append(exposure.adjust_gamma(data, gamma, gain)) - return np.asarray(results) - - -def illumination(x, gamma=1., contrast=1., saturation=1., is_random=False): - """Perform illumination augmentation for a single image, randomly or non-randomly. - - Parameters - ----------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - gamma : float - Change brightness (the same with ``tl.prepro.brightness``) - - if is_random=False, one float number, small than one means brighter, greater than one means darker. - - if is_random=True, tuple of two float numbers, (min, max). - contrast : float - Change contrast. - - if is_random=False, one float number, small than one means blur. - - if is_random=True, tuple of two float numbers, (min, max). - saturation : float - Change saturation. - - if is_random=False, one float number, small than one means unsaturation. - - if is_random=True, tuple of two float numbers, (min, max). - is_random : boolean - If True, randomly change illumination. Default is False. - - Returns - ------- - numpy.array - A processed image. - - Examples - --------- - Random - - >>> x = tl.prepro.illumination(x, gamma=(0.5, 5.0), contrast=(0.3, 1.0), saturation=(0.7, 1.0), is_random=True) - - Non-random - - >>> x = tl.prepro.illumination(x, 0.5, 0.6, 0.8, is_random=False) - - """ - from PIL import Image, ImageEnhance - - if is_random: - try: - assert len(gamma) == len(contrast) == len(saturation) == 2, "if is_random = True, the arguments are (min, max)" - except: - raise Exception("if is_random = True, the arguments are (min, max)") - ## random change brightness # small --> brighter - illum_settings = np.random.randint(0, 3) # 0-brighter, 1-darker, 2 keep normal - - if illum_settings == 0: # brighter - gamma = np.random.uniform(gamma[0], 1.0) # (.5, 1.0) - elif illum_settings == 1: # darker - gamma = np.random.uniform(1.0, gamma[1]) # (1.0, 5.0) - else: - gamma = 1 - im_ = brightness(x, gamma=gamma, gain=1, is_random=False) - - # logging.info("using contrast and saturation") - image = Image.fromarray(im_) # array -> PIL - contrast_adjust = ImageEnhance.Contrast(image) - image = contrast_adjust.enhance(np.random.uniform(contrast[0], contrast[1])) #0.3,0.9)) - - saturation_adjust = ImageEnhance.Color(image) - image = saturation_adjust.enhance(np.random.uniform(saturation[0], saturation[1])) # (0.7,1.0)) - im_ = np.array(image) # PIL -> array - else: - im_ = brightness(x, gamma=gamma, gain=1, is_random=False) - image = Image.fromarray(im_) # array -> PIL - contrast_adjust = ImageEnhance.Contrast(image) - image = contrast_adjust.enhance(contrast) - - saturation_adjust = ImageEnhance.Color(image) - image = saturation_adjust.enhance(saturation) - im_ = np.array(image) # PIL -> array - return np.asarray(im_) - - -def rgb_to_hsv(rgb): - """Input RGB image [0~255] return HSV image [0~1]. - - Parameters - ------------ - rgb : numpy.array - An image with values between 0 and 255. - - Returns - ------- - numpy.array - A processed image. - - """ - # Translated from source of colorsys.rgb_to_hsv - # r,g,b should be a numpy arrays with values between 0 and 255 - # rgb_to_hsv returns an array of floats between 0.0 and 1.0. - rgb = rgb.astype('float') - hsv = np.zeros_like(rgb) - # in case an RGBA array was passed, just copy the A channel - hsv[..., 3:] = rgb[..., 3:] - r, g, b = rgb[..., 0], rgb[..., 1], rgb[..., 2] - maxc = np.max(rgb[..., :3], axis=-1) - minc = np.min(rgb[..., :3], axis=-1) - hsv[..., 2] = maxc - mask = maxc != minc - hsv[mask, 1] = (maxc - minc)[mask] / maxc[mask] - rc = np.zeros_like(r) - gc = np.zeros_like(g) - bc = np.zeros_like(b) - rc[mask] = (maxc - r)[mask] / (maxc - minc)[mask] - gc[mask] = (maxc - g)[mask] / (maxc - minc)[mask] - bc[mask] = (maxc - b)[mask] / (maxc - minc)[mask] - hsv[..., 0] = np.select([r == maxc, g == maxc], [bc - gc, 2.0 + rc - bc], default=4.0 + gc - rc) - hsv[..., 0] = (hsv[..., 0] / 6.0) % 1.0 - return hsv - - -def hsv_to_rgb(hsv): - """Input HSV image [0~1] return RGB image [0~255]. - - Parameters - ------------- - hsv : numpy.array - An image with values between 0.0 and 1.0 - - Returns - ------- - numpy.array - A processed image. - """ - # Translated from source of colorsys.hsv_to_rgb - # h,s should be a numpy arrays with values between 0.0 and 1.0 - # v should be a numpy array with values between 0.0 and 255.0 - # hsv_to_rgb returns an array of uints between 0 and 255. - rgb = np.empty_like(hsv) - rgb[..., 3:] = hsv[..., 3:] - h, s, v = hsv[..., 0], hsv[..., 1], hsv[..., 2] - i = (h * 6.0).astype('uint8') - f = (h * 6.0) - i - p = v * (1.0 - s) - q = v * (1.0 - s * f) - t = v * (1.0 - s * (1.0 - f)) - i = i % 6 - conditions = [s == 0.0, i == 1, i == 2, i == 3, i == 4, i == 5] - rgb[..., 0] = np.select(conditions, [v, q, p, p, t, v], default=v) - rgb[..., 1] = np.select(conditions, [v, v, v, q, p, p], default=t) - rgb[..., 2] = np.select(conditions, [v, p, t, v, v, q], default=p) - return rgb.astype('uint8') - - -def adjust_hue(im, hout=0.66, is_offset=True, is_clip=True, is_random=False): - """Adjust hue of an RGB image. - - This is a convenience method that converts an RGB image to float representation, converts it to HSV, add an offset to the hue channel, converts back to RGB and then back to the original data type. - For TF, see `tf.image.adjust_hue `__.and `tf.image.random_hue `__. - - Parameters - ----------- - im : numpy.array - An image with values between 0 and 255. - hout : float - The scale value for adjusting hue. - - If is_offset is False, set all hue values to this value. 0 is red; 0.33 is green; 0.66 is blue. - - If is_offset is True, add this value as the offset to the hue channel. - is_offset : boolean - Whether `hout` is added on HSV as offset or not. Default is True. - is_clip : boolean - If HSV value smaller than 0, set to 0. Default is True. - is_random : boolean - If True, randomly change hue. Default is False. - - Returns - ------- - numpy.array - A processed image. - - Examples - --------- - Random, add a random value between -0.2 and 0.2 as the offset to every hue values. - - >>> im_hue = tl.prepro.adjust_hue(image, hout=0.2, is_offset=True, is_random=False) - - Non-random, make all hue to green. - - >>> im_green = tl.prepro.adjust_hue(image, hout=0.66, is_offset=False, is_random=False) - - References - ----------- - - `tf.image.random_hue `__. - - `tf.image.adjust_hue `__. - - `StackOverflow: Changing image hue with python PIL `__. - - """ - hsv = rgb_to_hsv(im) - if is_random: - hout = np.random.uniform(-hout, hout) - - if is_offset: - hsv[..., 0] += hout - else: - hsv[..., 0] = hout - - if is_clip: - hsv[..., 0] = np.clip(hsv[..., 0], 0, np.inf) # Hao : can remove green dots - - rgb = hsv_to_rgb(hsv) - return rgb - - -# # contrast -# def constant(x, cutoff=0.5, gain=10, inv=False, is_random=False): -# # TODO -# x = exposure.adjust_sigmoid(x, cutoff=cutoff, gain=gain, inv=inv) -# return x -# -# def constant_multi(): -# #TODO -# pass - - -def imresize(x, size=None, interp='bicubic', mode=None): - """Resize an image by given output size and method. - - Warning, this function will rescale the value to [0, 255]. - - Parameters - ----------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - size : list of 2 int or None - For height and width. - interp : str - Interpolation method for re-sizing (`nearest`, `lanczos`, `bilinear`, `bicubic` (default) or `cubic`). - mode : str - The PIL image mode (`P`, `L`, etc.) to convert arr before resizing. - - Returns - ------- - numpy.array - A processed image. - - References - ------------ - - `scipy.misc.imresize `__ - - """ - if size is None: - size = [100, 100] - - if x.shape[-1] == 1: - # greyscale - x = scipy.misc.imresize(x[:, :, 0], size, interp=interp, mode=mode) - return x[:, :, np.newaxis] - elif x.shape[-1] == 3: - # rgb, bgr .. - return scipy.misc.imresize(x, size, interp=interp, mode=mode) - else: - raise Exception("Unsupported channel %d" % x.shape[-1]) - - -# value scale -def pixel_value_scale(im, val=0.9, clip=(-np.inf, np.inf), is_random=False): - """Scales each value in the pixels of the image. - - Parameters - ----------- - im : numpy.array - An image. - val : float - The scale value for changing pixel value. - - If is_random=False, multiply this value with all pixels. - - If is_random=True, multiply a value between [1-val, 1+val] with all pixels. - clip : tuple of 2 numbers - The minimum and maximum value. - is_random : boolean - If True, see ``val``. - - Returns - ------- - numpy.array - A processed image. - - Examples - ---------- - Random - - >>> im = pixel_value_scale(im, 0.1, [0, 255], is_random=True) - - Non-random - - >>> im = pixel_value_scale(im, 0.9, [0, 255], is_random=False) - - """ - if is_random: - scale = 1 + np.random.uniform(-val, val) - im = im * scale - else: - im = im * val - - if len(clip) == 2: - im = np.clip(im, clip[0], clip[1]) - else: - raise Exception("clip : tuple of 2 numbers") - - return im - - -# normailization -def samplewise_norm(x, rescale=None, samplewise_center=False, samplewise_std_normalization=False, channel_index=2, epsilon=1e-7): - """Normalize an image by rescale, samplewise centering and samplewise centering in order. - - Parameters - ----------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - rescale : float - Rescaling factor. If None or 0, no rescaling is applied, otherwise we multiply the data by the value provided (before applying any other transformation) - samplewise_center : boolean - If True, set each sample mean to 0. - samplewise_std_normalization : boolean - If True, divide each input by its std. - epsilon : float - A small position value for dividing standard deviation. - - Returns - ------- - numpy.array - A processed image. - - Examples - -------- - >>> x = samplewise_norm(x, samplewise_center=True, samplewise_std_normalization=True) - >>> print(x.shape, np.mean(x), np.std(x)) - ... (160, 176, 1), 0.0, 1.0 - - Notes - ------ - When samplewise_center and samplewise_std_normalization are True. - - For greyscale image, every pixels are subtracted and divided by the mean and std of whole image. - - For RGB image, every pixels are subtracted and divided by the mean and std of this pixel i.e. the mean and std of a pixel is 0 and 1. - - """ - if rescale: - x *= rescale - - if x.shape[channel_index] == 1: - # greyscale - if samplewise_center: - x = x - np.mean(x) - if samplewise_std_normalization: - x = x / np.std(x) - return x - elif x.shape[channel_index] == 3: - # rgb - if samplewise_center: - x = x - np.mean(x, axis=channel_index, keepdims=True) - if samplewise_std_normalization: - x = x / (np.std(x, axis=channel_index, keepdims=True) + epsilon) - return x - else: - raise Exception("Unsupported channels %d" % x.shape[channel_index]) - - -def featurewise_norm(x, mean=None, std=None, epsilon=1e-7): - """Normalize every pixels by the same given mean and std, which are usually - compute from all examples. - - Parameters - ----------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - mean : float - Value for subtraction. - std : float - Value for division. - epsilon : float - A small position value for dividing standard deviation. - - Returns - ------- - numpy.array - A processed image. - - """ - if mean: - x = x - mean - if std: - x = x / (std + epsilon) - return x - - -# whitening -def get_zca_whitening_principal_components_img(X): - """Return the ZCA whitening principal components matrix. - - Parameters - ----------- - x : numpy.array - Batch of images with dimension of [n_example, row, col, channel] (default). - - Returns - ------- - numpy.array - A processed image. - - """ - flatX = np.reshape(X, (X.shape[0], X.shape[1] * X.shape[2] * X.shape[3])) - logging.info("zca : computing sigma ..") - sigma = np.dot(flatX.T, flatX) / flatX.shape[0] - logging.info("zca : computing U, S and V ..") - U, S, _ = linalg.svd(sigma) # USV - logging.info("zca : computing principal components ..") - principal_components = np.dot(np.dot(U, np.diag(1. / np.sqrt(S + 10e-7))), U.T) - return principal_components - - -def zca_whitening(x, principal_components): - """Apply ZCA whitening on an image by given principal components matrix. - - Parameters - ----------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - principal_components : matrix - Matrix from ``get_zca_whitening_principal_components_img``. - - Returns - ------- - numpy.array - A processed image. - - """ - flatx = np.reshape(x, (x.size)) - # logging.info(principal_components.shape, x.shape) # ((28160, 28160), (160, 176, 1)) - # flatx = np.reshape(x, (x.shape)) - # flatx = np.reshape(x, (x.shape[0], )) - # logging.info(flatx.shape) # (160, 176, 1) - whitex = np.dot(flatx, principal_components) - x = np.reshape(whitex, (x.shape[0], x.shape[1], x.shape[2])) - return x - - -# developing -# def barrel_transform(x, intensity): -# # https://github.com/fchollet/keras/blob/master/keras/preprocessing/image.py -# # TODO -# pass -# -# def barrel_transform_multi(x, intensity): -# # https://github.com/fchollet/keras/blob/master/keras/preprocessing/image.py -# # TODO -# pass - - -# channel shift -def channel_shift(x, intensity, is_random=False, channel_index=2): - """Shift the channels of an image, randomly or non-randomly, see `numpy.rollaxis `__. - - Parameters - ----------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - intensity : float - Intensity of shifting. - is_random : boolean - If True, randomly shift. Default is False. - channel_index : int - Index of channel. Default is 2. - - Returns - ------- - numpy.array - A processed image. - - """ - if is_random: - factor = np.random.uniform(-intensity, intensity) - else: - factor = intensity - x = np.rollaxis(x, channel_index, 0) - min_x, max_x = np.min(x), np.max(x) - channel_images = [np.clip(x_channel + factor, min_x, max_x) for x_channel in x] - x = np.stack(channel_images, axis=0) - x = np.rollaxis(x, 0, channel_index + 1) - return x - # x = np.rollaxis(x, channel_index, 0) - # min_x, max_x = np.min(x), np.max(x) - # channel_images = [np.clip(x_channel + np.random.uniform(-intensity, intensity), min_x, max_x) - # for x_channel in x] - # x = np.stack(channel_images, axis=0) - # x = np.rollaxis(x, 0, channel_index+1) - # return x - - -def channel_shift_multi(x, intensity, is_random=False, channel_index=2): - """Shift the channels of images with the same arguments, randomly or non-randomly, see `numpy.rollaxis `__. - Usually be used for image segmentation which x=[X, Y], X and Y should be matched. - - Parameters - ----------- - x : list of numpy.array - List of images with dimension of [n_images, row, col, channel] (default). - others : args - See ``tl.prepro.channel_shift``. - - Returns - ------- - numpy.array - A list of processed images. - - """ - if is_random: - factor = np.random.uniform(-intensity, intensity) - else: - factor = intensity - - results = [] - for data in x: - data = np.rollaxis(data, channel_index, 0) - min_x, max_x = np.min(data), np.max(data) - channel_images = [np.clip(x_channel + factor, min_x, max_x) for x_channel in x] - data = np.stack(channel_images, axis=0) - data = np.rollaxis(x, 0, channel_index + 1) - results.append(data) - return np.asarray(results) - - -# noise -def drop(x, keep=0.5): - """Randomly set some pixels to zero by a given keeping probability. - - Parameters - ----------- - x : numpy.array - An image with dimension of [row, col, channel] or [row, col]. - keep : float - The keeping probability (0, 1), the lower more values will be set to zero. - - Returns - ------- - numpy.array - A processed image. - - """ - if len(x.shape) == 3: - if x.shape[-1] == 3: # color - img_size = x.shape - mask = np.random.binomial(n=1, p=keep, size=x.shape[:-1]) - for i in range(3): - x[:, :, i] = np.multiply(x[:, :, i], mask) - elif x.shape[-1] == 1: # greyscale image - img_size = x.shape - x = np.multiply(x, np.random.binomial(n=1, p=keep, size=img_size)) - else: - raise Exception("Unsupported shape {}".format(x.shape)) - elif len(x.shape) == 2 or 1: # greyscale matrix (image) or vector - img_size = x.shape - x = np.multiply(x, np.random.binomial(n=1, p=keep, size=img_size)) - else: - raise Exception("Unsupported shape {}".format(x.shape)) - return x - - -# x = np.asarray([[1,2,3,4,5,6,7,8,9,10],[1,2,3,4,5,6,7,8,9,10]]) -# x = np.asarray([x,x,x,x,x,x]) -# x.shape = 10, 4, 3 -# # logging.info(x) -# # exit() -# logging.info(x.shape) -# # exit() -# logging.info(drop(x, keep=1.)) -# exit() - - -# manual transform -def transform_matrix_offset_center(matrix, x, y): - """Return transform matrix offset center. - - Parameters - ---------- - matrix : numpy.array - Transform matrix. - x and y : 2 int - Size of image. - - Returns - ------- - numpy.array - The transform matrix. - - Examples - -------- - - See ``tl.prepro.rotation``, ``tl.prepro.shear``, ``tl.prepro.zoom``. - - """ - o_x = float(x) / 2 + 0.5 - o_y = float(y) / 2 + 0.5 - offset_matrix = np.array([[1, 0, o_x], [0, 1, o_y], [0, 0, 1]]) - reset_matrix = np.array([[1, 0, -o_x], [0, 1, -o_y], [0, 0, 1]]) - transform_matrix = np.dot(np.dot(offset_matrix, matrix), reset_matrix) - return transform_matrix - - -def apply_transform(x, transform_matrix, channel_index=2, fill_mode='nearest', cval=0., order=1): - """Return transformed images by given ``transform_matrix`` from ``transform_matrix_offset_center``. - - Parameters - ---------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - transform_matrix : numpy.array - Transform matrix (offset center), can be generated by ``transform_matrix_offset_center`` - channel_index : int - Index of channel, default 2. - fill_mode : str - Method to fill missing pixel, default `nearest`, more options `constant`, `reflect` or `wrap`, see `scipy ndimage affine_transform `__ - cval : float - Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0 - order : int - The order of interpolation. The order has to be in the range 0-5: - - 0 Nearest-neighbor - - 1 Bi-linear (default) - - 2 Bi-quadratic - - 3 Bi-cubic - - 4 Bi-quartic - - 5 Bi-quintic - - `scipy ndimage affine_transform `__ - - Returns - ------- - numpy.array - A processed image. - - Examples - -------- - - See ``tl.prepro.rotation``, ``tl.prepro.shift``, ``tl.prepro.shear``, ``tl.prepro.zoom``. - - """ - x = np.rollaxis(x, channel_index, 0) - final_affine_matrix = transform_matrix[:2, :2] - final_offset = transform_matrix[:2, 2] - channel_images = [ - ndi.interpolation.affine_transform(x_channel, final_affine_matrix, final_offset, order=order, mode=fill_mode, cval=cval) for x_channel in x - ] - x = np.stack(channel_images, axis=0) - x = np.rollaxis(x, 0, channel_index + 1) - return x - - -def projective_transform_by_points(x, src, dst, map_args=None, output_shape=None, order=1, mode='constant', cval=0.0, clip=True, preserve_range=False): - """Projective transform by given coordinates, usually 4 coordinates. - - see `scikit-image `__. - - Parameters - ----------- - x : numpy.array - An image with dimension of [row, col, channel] (default). - src : list or numpy - The original coordinates, usually 4 coordinates of (width, height). - dst : list or numpy - The coordinates after transformation, the number of coordinates is the same with src. - map_args : dictionary or None - Keyword arguments passed to inverse map. - output_shape : tuple of 2 int - Shape of the output image generated. By default the shape of the input image is preserved. Note that, even for multi-band images, only rows and columns need to be specified. - order : int - The order of interpolation. The order has to be in the range 0-5: - - 0 Nearest-neighbor - - 1 Bi-linear (default) - - 2 Bi-quadratic - - 3 Bi-cubic - - 4 Bi-quartic - - 5 Bi-quintic - mode : str - One of `constant` (default), `edge`, `symmetric`, `reflect` or `wrap`. - Points outside the boundaries of the input are filled according to the given mode. Modes match the behaviour of numpy.pad. - cval : float - Used in conjunction with mode `constant`, the value outside the image boundaries. - clip : boolean - Whether to clip the output to the range of values of the input image. This is enabled by default, since higher order interpolation may produce values outside the given input range. - preserve_range : boolean - Whether to keep the original range of values. Otherwise, the input image is converted according to the conventions of img_as_float. - - Returns - ------- - numpy.array - A processed image. - - Examples - -------- - Assume X is an image from CIFAR-10, i.e. shape == (32, 32, 3) - - >>> src = [[0,0],[0,32],[32,0],[32,32]] # [w, h] - >>> dst = [[10,10],[0,32],[32,0],[32,32]] - >>> x = tl.prepro.projective_transform_by_points(X, src, dst) - - References - ----------- - - `scikit-image : geometric transformations `__ - - `scikit-image : examples `__ - - """ - if map_args is None: - map_args = {} - # if type(src) is list: - if isinstance(src, list): # convert to numpy - src = np.array(src) - # if type(dst) is list: - if isinstance(dst, list): - dst = np.array(dst) - if np.max(x) > 1: # convert to [0, 1] - x = x / 255 - - m = transform.ProjectiveTransform() - m.estimate(dst, src) - warped = transform.warp(x, m, map_args=map_args, output_shape=output_shape, order=order, mode=mode, cval=cval, clip=clip, preserve_range=preserve_range) - return warped - - -# Numpy and PIL -def array_to_img(x, dim_ordering=(0, 1, 2), scale=True): - """Converts a numpy array to PIL image object (uint8 format). - - Parameters - ---------- - x : numpy.array - An image with dimension of 3 and channels of 1 or 3. - dim_ordering : tuple of 3 int - Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0). - scale : boolean - If True, converts image to [0, 255] from any range of value like [-1, 2]. Default is True. - - Returns - ------- - PIL.image - An image. - - References - ----------- - `PIL Image.fromarray `__ - - """ - from PIL import Image - # if dim_ordering == 'default': - # dim_ordering = K.image_dim_ordering() - # if dim_ordering == 'th': # theano - # x = x.transpose(1, 2, 0) - x = x.transpose(dim_ordering) - if scale: - x += max(-np.min(x), 0) - x_max = np.max(x) - if x_max != 0: - # logging.info(x_max) - # x /= x_max - x = x / x_max - x *= 255 - if x.shape[2] == 3: - # RGB - return Image.fromarray(x.astype('uint8'), 'RGB') - elif x.shape[2] == 1: - # grayscale - return Image.fromarray(x[:, :, 0].astype('uint8'), 'L') - else: - raise Exception('Unsupported channel number: ', x.shape[2]) - - -def find_contours(x, level=0.8, fully_connected='low', positive_orientation='low'): - """Find iso-valued contours in a 2D array for a given level value, returns list of (n, 2)-ndarrays - see `skimage.measure.find_contours `__. - - Parameters - ------------ - x : 2D ndarray of double. - Input data in which to find contours. - level : float - Value along which to find contours in the array. - fully_connected : str - Either `low` or `high`. Indicates whether array elements below the given level value are to be considered fully-connected (and hence elements above the value will only be face connected), or vice-versa. (See notes below for details.) - positive_orientation : str - Either `low` or `high`. Indicates whether the output contours will produce positively-oriented polygons around islands of low- or high-valued elements. If `low` then contours will wind counter-clockwise around elements below the iso-value. Alternately, this means that low-valued elements are always on the left of the contour. - - Returns - -------- - list of (n,2)-ndarrays - Each contour is an ndarray of shape (n, 2), consisting of n (row, column) coordinates along the contour. - - """ - return skimage.measure.find_contours(x, level, fully_connected=fully_connected, positive_orientation=positive_orientation) - - -def pt2map(list_points=None, size=(100, 100), val=1): - """Inputs a list of points, return a 2D image. - - Parameters - -------------- - list_points : list of 2 int - [[x, y], [x, y]..] for point coordinates. - size : tuple of 2 int - (w, h) for output size. - val : float or int - For the contour value. - - Returns - ------- - numpy.array - An image. - - """ - if list_points is None: - raise Exception("list_points : list of 2 int") - i_m = np.zeros(size) - if len(list_points) == 0: - return i_m - for xx in list_points: - for x in xx: - # logging.info(x) - i_m[int(np.round(x[0]))][int(np.round(x[1]))] = val - return i_m - - -def binary_dilation(x, radius=3): - """Return fast binary morphological dilation of an image. - see `skimage.morphology.binary_dilation `__. - - Parameters - ----------- - x : 2D array - A binary image. - radius : int - For the radius of mask. - - Returns - ------- - numpy.array - A processed binary image. - - """ - from skimage.morphology import disk, binary_dilation - mask = disk(radius) - x = binary_dilation(x, selem=mask) - return x - - -def dilation(x, radius=3): - """Return greyscale morphological dilation of an image, - see `skimage.morphology.dilation `__. - - Parameters - ----------- - x : 2D array - An greyscale image. - radius : int - For the radius of mask. - - Returns - ------- - numpy.array - A processed greyscale image. - - """ - from skimage.morphology import disk, dilation - mask = disk(radius) - x = dilation(x, selem=mask) - return x - - -def binary_erosion(x, radius=3): - """Return binary morphological erosion of an image, - see `skimage.morphology.binary_erosion `__. - - Parameters - ----------- - x : 2D array - A binary image. - radius : int - For the radius of mask. - - Returns - ------- - numpy.array - A processed binary image. - - """ - from skimage.morphology import disk, binary_erosion - mask = disk(radius) - x = binary_erosion(x, selem=mask) - return x - - -def erosion(x, radius=3): - """Return greyscale morphological erosion of an image, - see `skimage.morphology.erosion `__. - - Parameters - ----------- - x : 2D array - A greyscale image. - radius : int - For the radius of mask. - - Returns - ------- - numpy.array - A processed greyscale image. - - """ - from skimage.morphology import disk, erosion - mask = disk(radius) - x = erosion(x, selem=mask) - return x - - -def obj_box_coords_rescale(coords=None, shape=None): - """Scale down a list of coordinates from pixel unit to the ratio of image size i.e. in the range of [0, 1]. - - Parameters - ------------ - coords : list of list of 4 ints or None - For coordinates of more than one images .e.g.[[x, y, w, h], [x, y, w, h], ...]. - shape : list of 2 int or None - 【height, width]. - - Returns - ------- - list of list of 4 numbers - A list of new bounding boxes. - - - Examples - --------- - >>> coords = obj_box_coords_rescale(coords=[[30, 40, 50, 50], [10, 10, 20, 20]], shape=[100, 100]) - >>> print(coords) - ... [[0.3, 0.4, 0.5, 0.5], [0.1, 0.1, 0.2, 0.2]] - >>> coords = obj_box_coords_rescale(coords=[[30, 40, 50, 50]], shape=[50, 100]) - >>> print(coords) - ... [[0.3, 0.8, 0.5, 1.0]] - >>> coords = obj_box_coords_rescale(coords=[[30, 40, 50, 50]], shape=[100, 200]) - >>> print(coords) - ... [[0.15, 0.4, 0.25, 0.5]] - - Returns - ------- - list of 4 numbers - New coordinates. - - """ - if coords is None: - coords = [] - if shape is None: - shape = [100, 200] - - imh, imw = shape[0], shape[1] - imh = imh * 1.0 # * 1.0 for python2 : force division to be float point - imw = imw * 1.0 - coords_new = list() - for coord in coords: - assert len(coord) == 4, "coordinate should be 4 values : [x, y, w, h]" - x = coord[0] / imw - y = coord[1] / imh - w = coord[2] / imw - h = coord[3] / imh - coords_new.append([x, y, w, h]) - return coords_new - - -def obj_box_coord_rescale(coord=None, shape=None): - """Scale down one coordinates from pixel unit to the ratio of image size i.e. in the range of [0, 1]. - It is the reverse process of ``obj_box_coord_scale_to_pixelunit``. - - Parameters - ------------ - coords : list of 4 int or None - One coordinates of one image e.g. [x, y, w, h]. - shape : list of 2 int or None - For [height, width]. - - Returns - ------- - list of 4 numbers - New bounding box. - - Examples - --------- - >>> coord = tl.prepro.obj_box_coord_rescale(coord=[30, 40, 50, 50], shape=[100, 100]) - ... [0.3, 0.4, 0.5, 0.5] - - """ - if coord is None: - coord = [] - if shape is None: - shape = [100, 200] - - return obj_box_coords_rescale(coords=[coord], shape=shape)[0] - - -def obj_box_coord_scale_to_pixelunit(coord, shape=None): - """Convert one coordinate [x, y, w (or x2), h (or y2)] in ratio format to image coordinate format. - It is the reverse process of ``obj_box_coord_rescale``. - - Parameters - ----------- - coord : list of 4 float - One coordinate of one image [x, y, w (or x2), h (or y2)] in ratio format, i.e value range [0~1]. - shape : tuple of 2 or None - For [height, width]. - - Returns - ------- - list of 4 numbers - New bounding box. - - Examples - --------- - >>> x, y, x2, y2 = tl.prepro.obj_box_coord_scale_to_pixelunit([0.2, 0.3, 0.5, 0.7], shape=(100, 200, 3)) - ... [40, 30, 100, 70] - - """ - if shape is None: - shape = [100, 100] - - imh, imw = shape[0:2] - x = int(coord[0] * imw) - x2 = int(coord[2] * imw) - y = int(coord[1] * imh) - y2 = int(coord[3] * imh) - return [x, y, x2, y2] - - -# coords = obj_box_coords_rescale(coords=[[30, 40, 50, 50], [10, 10, 20, 20]], shape=[100, 100]) -# logging.info(coords) -# # ... [[0.3, 0.4, 0.5, 0.5], [0.1, 0.1, 0.2, 0.2]] -# coords = obj_box_coords_rescale(coords=[[30, 40, 50, 50]], shape=[50, 100]) -# logging.info(coords) -# # ... [[0.3, 0.8, 0.5, 1.0]] -# coords = obj_box_coords_rescale(coords=[[30, 40, 50, 50]], shape=[100, 200]) -# logging.info(coords) -# # ... [[0.15, 0.4, 0.25, 0.5]] -# exit() - - -def obj_box_coord_centroid_to_upleft_butright(coord, to_int=False): - """Convert one coordinate [x_center, y_center, w, h] to [x1, y1, x2, y2] in up-left and botton-right format. - - Parameters - ------------ - coord : list of 4 int/float - One coordinate. - to_int : boolean - Whether to convert output as integer. - - Returns - ------- - list of 4 numbers - New bounding box. - - Examples - --------- - >>> coord = obj_box_coord_centroid_to_upleft_butright([30, 40, 20, 20]) - ... [20, 30, 40, 50] - - """ - assert len(coord) == 4, "coordinate should be 4 values : [x, y, w, h]" - x_center, y_center, w, h = coord - x = x_center - w / 2. - y = y_center - h / 2. - x2 = x + w - y2 = y + h - if to_int: - return [int(x), int(y), int(x2), int(y2)] - else: - return [x, y, x2, y2] - - -# coord = obj_box_coord_centroid_to_upleft_butright([30, 40, 20, 20]) -# logging.info(coord) [20, 30, 40, 50] -# exit() - - -def obj_box_coord_upleft_butright_to_centroid(coord): - """Convert one coordinate [x1, y1, x2, y2] to [x_center, y_center, w, h]. - It is the reverse process of ``obj_box_coord_centroid_to_upleft_butright``. - - Parameters - ------------ - coord : list of 4 int/float - One coordinate. - - Returns - ------- - list of 4 numbers - New bounding box. - - """ - assert len(coord) == 4, "coordinate should be 4 values : [x1, y1, x2, y2]" - x1, y1, x2, y2 = coord - w = x2 - x1 - h = y2 - y1 - x_c = x1 + w / 2. - y_c = y1 + h / 2. - return [x_c, y_c, w, h] - - -def obj_box_coord_centroid_to_upleft(coord): - """Convert one coordinate [x_center, y_center, w, h] to [x, y, w, h]. - It is the reverse process of ``obj_box_coord_upleft_to_centroid``. - - Parameters - ------------ - coord : list of 4 int/float - One coordinate. - - Returns - ------- - list of 4 numbers - New bounding box. - - """ - assert len(coord) == 4, "coordinate should be 4 values : [x, y, w, h]" - x_center, y_center, w, h = coord - x = x_center - w / 2. - y = y_center - h / 2. - return [x, y, w, h] - - -def obj_box_coord_upleft_to_centroid(coord): - """Convert one coordinate [x, y, w, h] to [x_center, y_center, w, h]. - It is the reverse process of ``obj_box_coord_centroid_to_upleft``. - - Parameters - ------------ - coord : list of 4 int/float - One coordinate. - - Returns - ------- - list of 4 numbers - New bounding box. - - """ - assert len(coord) == 4, "coordinate should be 4 values : [x, y, w, h]" - x, y, w, h = coord - x_center = x + w / 2. - y_center = y + h / 2. - return [x_center, y_center, w, h] - - -def parse_darknet_ann_str_to_list(annotations): - """Input string format of class, x, y, w, h, return list of list format. - - Parameters - ----------- - annotations : str - The annotations in darkent format "class, x, y, w, h ...." seperated by "\\n". - - Returns - ------- - list of list of 4 numbers - List of bounding box. - - """ - annotations = annotations.split("\n") - ann = [] - for a in annotations: - a = a.split() - if len(a) == 5: - for i, _v in enumerate(a): - if i == 0: - a[i] = int(a[i]) - else: - a[i] = float(a[i]) - ann.append(a) - return ann - - -def parse_darknet_ann_list_to_cls_box(annotations): - """Parse darknet annotation format into two lists for class and bounding box. - - Input list of [[class, x, y, w, h], ...], return two list of [class ...] and [[x, y, w, h], ...]. - - Parameters - ------------ - annotations : list of list - A list of class and bounding boxes of images e.g. [[class, x, y, w, h], ...] - - Returns - ------- - list of int - List of class labels. - - list of list of 4 numbers - List of bounding box. - - """ - class_list = [] - bbox_list = [] - for ann in annotations: - class_list.append(ann[0]) - bbox_list.append(ann[1:]) - return class_list, bbox_list - - -def obj_box_left_right_flip(im, coords=None, is_rescale=False, is_center=False, is_random=False): - """Left-right flip the image and coordinates for object detection. - - Parameters - ---------- - im : numpy.array - An image with dimension of [row, col, channel] (default). - coords : list of list of 4 int/float or None - Coordinates [[x, y, w, h], [x, y, w, h], ...]. - is_rescale : boolean - Set to True, if the input coordinates are rescaled to [0, 1]. Default is False. - is_center : boolean - Set to True, if the x and y of coordinates are the centroid (i.e. darknet format). Default is False. - is_random : boolean - If True, randomly flip. Default is False. - - Returns - ------- - numpy.array - A processed image - list of list of 4 numbers - A list of new bounding boxes. - - Examples - -------- - >>> im = np.zeros([80, 100]) # as an image with shape width=100, height=80 - >>> im, coords = obj_box_left_right_flip(im, coords=[[0.2, 0.4, 0.3, 0.3], [0.1, 0.5, 0.2, 0.3]], is_rescale=True, is_center=True, is_random=False) - >>> print(coords) - ... [[0.8, 0.4, 0.3, 0.3], [0.9, 0.5, 0.2, 0.3]] - >>> im, coords = obj_box_left_right_flip(im, coords=[[0.2, 0.4, 0.3, 0.3]], is_rescale=True, is_center=False, is_random=False) - >>> print(coords) - ... [[0.5, 0.4, 0.3, 0.3]] - >>> im, coords = obj_box_left_right_flip(im, coords=[[20, 40, 30, 30]], is_rescale=False, is_center=True, is_random=False) - >>> print(coords) - ... [[80, 40, 30, 30]] - >>> im, coords = obj_box_left_right_flip(im, coords=[[20, 40, 30, 30]], is_rescale=False, is_center=False, is_random=False) - >>> print(coords) - ... [[50, 40, 30, 30]] - - """ - - if coords is None: - coords = [] - - def _flip(im, coords): - im = flip_axis(im, axis=1, is_random=False) - coords_new = list() - - for coord in coords: - assert len(coord) == 4, "coordinate should be 4 values : [x, y, w, h]" - if is_rescale: - if is_center: - # x_center' = 1 - x - x = 1. - coord[0] - else: - # x_center' = 1 - x - w - x = 1. - coord[0] - coord[2] - else: - if is_center: - # x' = im.width - x - x = im.shape[1] - coord[0] - else: - # x' = im.width - x - w - x = im.shape[1] - coord[0] - coord[2] - coords_new.append([x, coord[1], coord[2], coord[3]]) - return im, coords_new - - if is_random: - factor = np.random.uniform(-1, 1) - if factor > 0: - return _flip(im, coords) - else: - return im, coords - else: - return _flip(im, coords) - - -# im = np.zeros([80, 100]) # as an image with shape width=100, height=80 -# im, coords = obj_box_left_right_flip(im, coords=[[0.2, 0.4, 0.3, 0.3], [0.1, 0.5, 0.2, 0.3]], is_rescale=True, is_center=True, is_random=False) -# logging.info(coords) -# # ... [[0.8, 0.4, 0.3, 0.3], [0.9, 0.5, 0.2, 0.3]] -# im, coords = obj_box_left_right_flip(im, coords=[[0.2, 0.4, 0.3, 0.3]], is_rescale=True, is_center=False, is_random=False) -# logging.info(coords) -# # [[0.5, 0.4, 0.3, 0.3]] -# im, coords = obj_box_left_right_flip(im, coords=[[20, 40, 30, 30]], is_rescale=False, is_center=True, is_random=False) -# logging.info(coords) -# # ... [[80, 40, 30, 30]] -# im, coords = obj_box_left_right_flip(im, coords=[[20, 40, 30, 30]], is_rescale=False, is_center=False, is_random=False) -# logging.info(coords) -# # [[50, 40, 30, 30]] -# exit() - - -def obj_box_imresize(im, coords=None, size=None, interp='bicubic', mode=None, is_rescale=False): - """Resize an image, and compute the new bounding box coordinates. - - Parameters - ------------- - im : numpy.array - An image with dimension of [row, col, channel] (default). - coords : list of list of 4 int/float or None - Coordinates [[x, y, w, h], [x, y, w, h], ...] - size interp and mode : args - See ``tl.prepro.imresize``. - is_rescale : boolean - Set to True, if the input coordinates are rescaled to [0, 1], then return the original coordinates. Default is False. - - Returns - ------- - numpy.array - A processed image - list of list of 4 numbers - A list of new bounding boxes. - - Examples - -------- - >>> im = np.zeros([80, 100, 3]) # as an image with shape width=100, height=80 - >>> _, coords = obj_box_imresize(im, coords=[[20, 40, 30, 30], [10, 20, 20, 20]], size=[160, 200], is_rescale=False) - >>> print(coords) - ... [[40, 80, 60, 60], [20, 40, 40, 40]] - >>> _, coords = obj_box_imresize(im, coords=[[20, 40, 30, 30]], size=[40, 100], is_rescale=False) - >>> print(coords) - ... [[20, 20, 30, 15]] - >>> _, coords = obj_box_imresize(im, coords=[[20, 40, 30, 30]], size=[60, 150], is_rescale=False) - >>> print(coords) - ... [[30, 30, 45, 22]] - >>> im2, coords = obj_box_imresize(im, coords=[[0.2, 0.4, 0.3, 0.3]], size=[160, 200], is_rescale=True) - >>> print(coords, im2.shape) - ... [[0.2, 0.4, 0.3, 0.3]] (160, 200, 3) - - """ - if coords is None: - coords = [] - if size is None: - size = [100, 100] - - imh, imw = im.shape[0:2] - imh = imh * 1.0 # * 1.0 for python2 : force division to be float point - imw = imw * 1.0 - im = imresize(im, size=size, interp=interp, mode=mode) - - if is_rescale is False: - coords_new = list() - for coord in coords: - assert len(coord) == 4, "coordinate should be 4 values : [x, y, w, h]" - # x' = x * (imw'/imw) - x = int(coord[0] * (size[1] / imw)) - # y' = y * (imh'/imh) - # logging.info('>>', coord[1], size[0], imh) - y = int(coord[1] * (size[0] / imh)) - # w' = w * (imw'/imw) - w = int(coord[2] * (size[1] / imw)) - # h' = h * (imh'/imh) - h = int(coord[3] * (size[0] / imh)) - coords_new.append([x, y, w, h]) - return im, coords_new - else: - return im, coords - - -# im = np.zeros([80, 100, 3]) # as an image with shape width=100, height=80 -# _, coords = obj_box_imresize(im, coords=[[20, 40, 30, 30], [10, 20, 20, 20]], size=[160, 200], is_rescale=False) -# logging.info(coords) -# # ... [[40, 80, 60, 60], [20, 40, 40, 40]] -# _, coords = obj_box_imresize(im, coords=[[20, 40, 30, 30]], size=[40, 100], is_rescale=False) -# logging.info(coords) -# # ... [20, 20, 30, 15] -# _, coords = obj_box_imresize(im, coords=[[20, 40, 30, 30]], size=[60, 150], is_rescale=False) -# logging.info(coords) -# # ... [30, 30, 45, 22] -# im2, coords = obj_box_imresize(im, coords=[[0.2, 0.4, 0.3, 0.3]], size=[160, 200], is_rescale=True) -# logging.info(coords, im2.shape) -# # ... [0.2, 0.4, 0.3, 0.3] (160, 200, 3) -# exit() - - -def obj_box_crop(im, classes=None, coords=None, wrg=100, hrg=100, is_rescale=False, is_center=False, is_random=False, thresh_wh=0.02, thresh_wh2=12.): - """Randomly or centrally crop an image, and compute the new bounding box coordinates. - Objects outside the cropped image will be removed. - - Parameters - ----------- - im : numpy.array - An image with dimension of [row, col, channel] (default). - classes : list of int or None - Class IDs. - coords : list of list of 4 int/float or None - Coordinates [[x, y, w, h], [x, y, w, h], ...] - wrg hrg and is_random : args - See ``tl.prepro.crop``. - is_rescale : boolean - Set to True, if the input coordinates are rescaled to [0, 1]. Default is False. - is_center : boolean, default False - Set to True, if the x and y of coordinates are the centroid (i.e. darknet format). Default is False. - thresh_wh : float - Threshold, remove the box if its ratio of width(height) to image size less than the threshold. - thresh_wh2 : float - Threshold, remove the box if its ratio of width to height or vice verse higher than the threshold. - - Returns - ------- - numpy.array - A processed image - list of int - A list of classes - list of list of 4 numbers - A list of new bounding boxes. - - """ - if classes is None: - classes = [] - if coords is None: - coords = [] - - h, w = im.shape[0], im.shape[1] - assert (h > hrg) and (w > wrg), "The size of cropping should smaller than the original image" - if is_random: - h_offset = int(np.random.uniform(0, h - hrg) - 1) - w_offset = int(np.random.uniform(0, w - wrg) - 1) - h_end = hrg + h_offset - w_end = wrg + w_offset - im_new = im[h_offset:h_end, w_offset:w_end] - else: # central crop - h_offset = int(np.floor((h - hrg) / 2.)) - w_offset = int(np.floor((w - wrg) / 2.)) - h_end = h_offset + hrg - w_end = w_offset + wrg - im_new = im[h_offset:h_end, w_offset:w_end] - - # w - # _____________________________ - # | h/w offset | - # | ------- | - # h | | | | - # | | | | - # | ------- | - # | h/w end | - # |___________________________| - - def _get_coord(coord): - """Input pixel-unit [x, y, w, h] format, then make sure [x, y] it is the up-left coordinates, - before getting the new coordinates. - Boxes outsides the cropped image will be removed. - - """ - if is_center: - coord = obj_box_coord_centroid_to_upleft(coord) - - ##======= pixel unit format and upleft, w, h ==========## - - # x = np.clip( coord[0] - w_offset, 0, w_end - w_offset) - # y = np.clip( coord[1] - h_offset, 0, h_end - h_offset) - # w = np.clip( coord[2] , 0, w_end - w_offset) - # h = np.clip( coord[3] , 0, h_end - h_offset) - - x = coord[0] - w_offset - y = coord[1] - h_offset - w = coord[2] - h = coord[3] - - if x < 0: - if x + w <= 0: - return None - w = w + x - x = 0 - elif x > im_new.shape[1]: # object outside the cropped image - return None - - if y < 0: - if y + h <= 0: - return None - h = h + y - y = 0 - elif y > im_new.shape[0]: # object outside the cropped image - return None - - if (x is not None) and (x + w > im_new.shape[1]): # box outside the cropped image - w = im_new.shape[1] - x - - if (y is not None) and (y + h > im_new.shape[0]): # box outside the cropped image - h = im_new.shape[0] - y - - if (w / (h + 1.) > thresh_wh2) or (h / (w + 1.) > thresh_wh2): # object shape strange: too narrow - # logging.info('xx', w, h) - return None - - if (w / (im_new.shape[1] * 1.) < thresh_wh) or (h / (im_new.shape[0] * 1.) < thresh_wh): # object shape strange: too narrow - # logging.info('yy', w, im_new.shape[1], h, im_new.shape[0]) - return None - - coord = [x, y, w, h] - - ## convert back if input format is center. - if is_center: - coord = obj_box_coord_upleft_to_centroid(coord) - - return coord - - coords_new = list() - classes_new = list() - for i, _ in enumerate(coords): - coord = coords[i] - assert len(coord) == 4, "coordinate should be 4 values : [x, y, w, h]" - if is_rescale: - # for scaled coord, upscaled before process and scale back in the end. - coord = obj_box_coord_scale_to_pixelunit(coord, im.shape) - coord = _get_coord(coord) - if coord is not None: - coord = obj_box_coord_rescale(coord, im_new.shape) - coords_new.append(coord) - classes_new.append(classes[i]) - else: - coord = _get_coord(coord) - if coord is not None: - coords_new.append(coord) - classes_new.append(classes[i]) - return im_new, classes_new, coords_new - - -def obj_box_shift(im, - classes=None, - coords=None, - wrg=0.1, - hrg=0.1, - row_index=0, - col_index=1, - channel_index=2, - fill_mode='nearest', - cval=0., - order=1, - is_rescale=False, - is_center=False, - is_random=False, - thresh_wh=0.02, - thresh_wh2=12.): - """Shift an image randomly or non-randomly, and compute the new bounding box coordinates. - Objects outside the cropped image will be removed. - - Parameters - ----------- - im : numpy.array - An image with dimension of [row, col, channel] (default). - classes : list of int or None - Class IDs. - coords : list of list of 4 int/float or None - Coordinates [[x, y, w, h], [x, y, w, h], ...] - wrg, hrg row_index col_index channel_index is_random fill_mode cval and order : see ``tl.prepro.shift``. - is_rescale : boolean - Set to True, if the input coordinates are rescaled to [0, 1]. Default is False. - is_center : boolean - Set to True, if the x and y of coordinates are the centroid (i.e. darknet format). Default is False. - thresh_wh : float - Threshold, remove the box if its ratio of width(height) to image size less than the threshold. - thresh_wh2 : float - Threshold, remove the box if its ratio of width to height or vice verse higher than the threshold. - - - Returns - ------- - numpy.array - A processed image - list of int - A list of classes - list of list of 4 numbers - A list of new bounding boxes. - - """ - if classes is None: - classes = [] - if coords is None: - coords = [] - - imh, imw = im.shape[row_index], im.shape[col_index] - assert (hrg < 1.0) and (hrg > 0.) and (wrg < 1.0) and (wrg > 0.), "shift range should be (0, 1)" - if is_random: - tx = np.random.uniform(-hrg, hrg) * imh - ty = np.random.uniform(-wrg, wrg) * imw - else: - tx, ty = hrg * imh, wrg * imw - translation_matrix = np.array([[1, 0, tx], [0, 1, ty], [0, 0, 1]]) - - transform_matrix = translation_matrix # no need to do offset - im_new = apply_transform(im, transform_matrix, channel_index, fill_mode, cval, order) - - # modified from obj_box_crop - def _get_coord(coord): - """Input pixel-unit [x, y, w, h] format, then make sure [x, y] it is the up-left coordinates, - before getting the new coordinates. - Boxes outsides the cropped image will be removed. - - """ - if is_center: - coord = obj_box_coord_centroid_to_upleft(coord) - - ##======= pixel unit format and upleft, w, h ==========## - x = coord[0] - ty # only change this - y = coord[1] - tx # only change this - w = coord[2] - h = coord[3] - - if x < 0: - if x + w <= 0: - return None - w = w + x - x = 0 - elif x > im_new.shape[1]: # object outside the cropped image - return None - - if y < 0: - if y + h <= 0: - return None - h = h + y - y = 0 - elif y > im_new.shape[0]: # object outside the cropped image - return None - - if (x is not None) and (x + w > im_new.shape[1]): # box outside the cropped image - w = im_new.shape[1] - x - - if (y is not None) and (y + h > im_new.shape[0]): # box outside the cropped image - h = im_new.shape[0] - y - - if (w / (h + 1.) > thresh_wh2) or (h / (w + 1.) > thresh_wh2): # object shape strange: too narrow - # logging.info('xx', w, h) - return None - - if (w / (im_new.shape[1] * 1.) < thresh_wh) or (h / (im_new.shape[0] * 1.) < thresh_wh): # object shape strange: too narrow - # logging.info('yy', w, im_new.shape[1], h, im_new.shape[0]) - return None - - coord = [x, y, w, h] - - ## convert back if input format is center. - if is_center: - coord = obj_box_coord_upleft_to_centroid(coord) - - return coord - - coords_new = list() - classes_new = list() - for i, _ in enumerate(coords): - coord = coords[i] - assert len(coord) == 4, "coordinate should be 4 values : [x, y, w, h]" - if is_rescale: - # for scaled coord, upscaled before process and scale back in the end. - coord = obj_box_coord_scale_to_pixelunit(coord, im.shape) - coord = _get_coord(coord) - if coord is not None: - coord = obj_box_coord_rescale(coord, im_new.shape) - coords_new.append(coord) - classes_new.append(classes[i]) - else: - coord = _get_coord(coord) - if coord is not None: - coords_new.append(coord) - classes_new.append(classes[i]) - return im_new, classes_new, coords_new - - -def obj_box_zoom(im, - classes=None, - coords=None, - zoom_range=(0.9, 1.1), - row_index=0, - col_index=1, - channel_index=2, - fill_mode='nearest', - cval=0., - order=1, - is_rescale=False, - is_center=False, - is_random=False, - thresh_wh=0.02, - thresh_wh2=12.): - """Zoom in and out of a single image, randomly or non-randomly, and compute the new bounding box coordinates. - Objects outside the cropped image will be removed. - - Parameters - ----------- - im : numpy.array - An image with dimension of [row, col, channel] (default). - classes : list of int or None - Class IDs. - coords : list of list of 4 int/float or None - Coordinates [[x, y, w, h], [x, y, w, h], ...]. - zoom_range row_index col_index channel_index is_random fill_mode cval and order : see ``tl.prepro.zoom``. - is_rescale : boolean - Set to True, if the input coordinates are rescaled to [0, 1]. Default is False. - is_center : boolean - Set to True, if the x and y of coordinates are the centroid. (i.e. darknet format). Default is False. - thresh_wh : float - Threshold, remove the box if its ratio of width(height) to image size less than the threshold. - thresh_wh2 : float - Threshold, remove the box if its ratio of width to height or vice verse higher than the threshold. - - Returns - ------- - numpy.array - A processed image - list of int - A list of classes - list of list of 4 numbers - A list of new bounding boxes. - - """ - if classes is None: - classes = [] - if coords is None: - coords = [] - - if len(zoom_range) != 2: - raise Exception('zoom_range should be a tuple or list of two floats. ' 'Received arg: ', zoom_range) - if is_random: - if zoom_range[0] == 1 and zoom_range[1] == 1: - zx, zy = 1, 1 - logging.info(" random_zoom : not zoom in/out") - else: - zx, zy = np.random.uniform(zoom_range[0], zoom_range[1], 2) - else: - zx, zy = zoom_range - # logging.info(zx, zy) - zoom_matrix = np.array([[zx, 0, 0], [0, zy, 0], [0, 0, 1]]) - - h, w = im.shape[row_index], im.shape[col_index] - transform_matrix = transform_matrix_offset_center(zoom_matrix, h, w) - im_new = apply_transform(im, transform_matrix, channel_index, fill_mode, cval, order) - - # modified from obj_box_crop - def _get_coord(coord): - """Input pixel-unit [x, y, w, h] format, then make sure [x, y] it is the up-left coordinates, - before getting the new coordinates. - Boxes outsides the cropped image will be removed. - - """ - if is_center: - coord = obj_box_coord_centroid_to_upleft(coord) - - # ======= pixel unit format and upleft, w, h ========== - x = (coord[0] - im.shape[1] / 2) / zy + im.shape[1] / 2 # only change this - y = (coord[1] - im.shape[0] / 2) / zx + im.shape[0] / 2 # only change this - w = coord[2] / zy # only change this - h = coord[3] / zx # only change thisS - - if x < 0: - if x + w <= 0: - return None - w = w + x - x = 0 - elif x > im_new.shape[1]: # object outside the cropped image - return None - - if y < 0: - if y + h <= 0: - return None - h = h + y - y = 0 - elif y > im_new.shape[0]: # object outside the cropped image - return None - - if (x is not None) and (x + w > im_new.shape[1]): # box outside the cropped image - w = im_new.shape[1] - x - - if (y is not None) and (y + h > im_new.shape[0]): # box outside the cropped image - h = im_new.shape[0] - y - - if (w / (h + 1.) > thresh_wh2) or (h / (w + 1.) > thresh_wh2): # object shape strange: too narrow - # logging.info('xx', w, h) - return None - - if (w / (im_new.shape[1] * 1.) < thresh_wh) or (h / (im_new.shape[0] * 1.) < thresh_wh): # object shape strange: too narrow - # logging.info('yy', w, im_new.shape[1], h, im_new.shape[0]) - return None - - coord = [x, y, w, h] - - # convert back if input format is center. - if is_center: - coord = obj_box_coord_upleft_to_centroid(coord) - - return coord - - coords_new = list() - classes_new = list() - for i, _ in enumerate(coords): - coord = coords[i] - assert len(coord) == 4, "coordinate should be 4 values : [x, y, w, h]" - if is_rescale: - # for scaled coord, upscaled before process and scale back in the end. - coord = obj_box_coord_scale_to_pixelunit(coord, im.shape) - coord = _get_coord(coord) - if coord is not None: - coord = obj_box_coord_rescale(coord, im_new.shape) - coords_new.append(coord) - classes_new.append(classes[i]) - else: - coord = _get_coord(coord) - if coord is not None: - coords_new.append(coord) - classes_new.append(classes[i]) - return im_new, classes_new, coords_new - - -def pad_sequences(sequences, maxlen=None, dtype='int32', padding='post', truncating='pre', value=0.): - """Pads each sequence to the same length: - the length of the longest sequence. - If maxlen is provided, any sequence longer - than maxlen is truncated to maxlen. - Truncation happens off either the beginning (default) or - the end of the sequence. - Supports post-padding and pre-padding (default). - - Parameters - ---------- - sequences : list of list of int - All sequences where each row is a sequence. - maxlen : int - Maximum length. - dtype : numpy.dtype or str - Data type to cast the resulting sequence. - padding : str - Either 'pre' or 'post', pad either before or after each sequence. - truncating : str - Either 'pre' or 'post', remove values from sequences larger than maxlen either in the beginning or in the end of the sequence - value : float - Value to pad the sequences to the desired value. - - Returns - ---------- - x : numpy.array - With dimensions (number_of_sequences, maxlen) - - Examples - ---------- - >>> sequences = [[1,1,1,1,1],[2,2,2],[3,3]] - >>> sequences = pad_sequences(sequences, maxlen=None, dtype='int32', - ... padding='post', truncating='pre', value=0.) - ... [[1 1 1 1 1] - ... [2 2 2 0 0] - ... [3 3 0 0 0]] - - """ - lengths = [len(s) for s in sequences] - - nb_samples = len(sequences) - if maxlen is None: - maxlen = np.max(lengths) - - # take the sample shape from the first non empty sequence - # checking for consistency in the main loop below. - sample_shape = tuple() - for s in sequences: - if len(s) > 0: - sample_shape = np.asarray(s).shape[1:] - break - - x = (np.ones((nb_samples, maxlen) + sample_shape) * value).astype(dtype) - for idx, s in enumerate(sequences): - if len(s) == 0: - continue # empty list was found - if truncating == 'pre': - trunc = s[-maxlen:] - elif truncating == 'post': - trunc = s[:maxlen] - else: - raise ValueError('Truncating type "%s" not understood' % truncating) - - # check `trunc` has expected shape - trunc = np.asarray(trunc, dtype=dtype) - if trunc.shape[1:] != sample_shape: - raise ValueError('Shape of sample %s of sequence at position %s is different from expected shape %s' % (trunc.shape[1:], idx, sample_shape)) - - if padding == 'post': - x[idx, :len(trunc)] = trunc - elif padding == 'pre': - x[idx, -len(trunc):] = trunc - else: - raise ValueError('Padding type "%s" not understood' % padding) - return x.tolist() - - -def remove_pad_sequences(sequences, pad_id=0): - """Remove padding. - - Parameters - ----------- - sequences : list of list of int - All sequences where each row is a sequence. - pad_id : int - The pad ID. - - Returns - ---------- - list of list of int - The processed sequences. - - Examples - ---------- - >>> sequences = [[2,3,4,0,0], [5,1,2,3,4,0,0,0], [4,5,0,2,4,0,0,0]] - >>> print(remove_pad_sequences(sequences, pad_id=0)) - ... [[2, 3, 4], [5, 1, 2, 3, 4], [4, 5, 0, 2, 4]] - - """ - import copy - sequences_out = copy.deepcopy(sequences) - for i, _ in enumerate(sequences): - # for j in range(len(sequences[i])): - # if sequences[i][j] == pad_id: - # sequences_out[i] = sequences_out[i][:j] - # break - for j in range(1, len(sequences[i])): - if sequences[i][-j] != pad_id: - sequences_out[i] = sequences_out[i][0:-j + 1] - break - return sequences_out - - -def process_sequences(sequences, end_id=0, pad_val=0, is_shorten=True, remain_end_id=False): - """Set all tokens(ids) after END token to the padding value, and then shorten (option) it to the maximum sequence length in this batch. - - Parameters - ----------- - sequences : list of list of int - All sequences where each row is a sequence. - end_id : int - The special token for END. - pad_val : int - Replace the `end_id` and the IDs after `end_id` to this value. - is_shorten : boolean - Shorten the sequences. Default is True. - remain_end_id : boolean - Keep an `end_id` in the end. Default is False. - - Returns - ---------- - list of list of int - The processed sequences. - - Examples - --------- - >>> sentences_ids = [[4, 3, 5, 3, 2, 2, 2, 2], <-- end_id is 2 - ... [5, 3, 9, 4, 9, 2, 2, 3]] <-- end_id is 2 - >>> sentences_ids = precess_sequences(sentences_ids, end_id=vocab.end_id, pad_val=0, is_shorten=True) - ... [[4, 3, 5, 3, 0], [5, 3, 9, 4, 9]] - - """ - max_length = 0 - for _, seq in enumerate(sequences): - is_end = False - for i_w, n in enumerate(seq): - if n == end_id and is_end == False: # 1st time to see end_id - is_end = True - if max_length < i_w: - max_length = i_w - if remain_end_id is False: - seq[i_w] = pad_val # set end_id to pad_val - elif is_end == True: - seq[i_w] = pad_val - - if remain_end_id is True: - max_length += 1 - if is_shorten: - for i, seq in enumerate(sequences): - sequences[i] = seq[:max_length] - return sequences - - -def sequences_add_start_id(sequences, start_id=0, remove_last=False): - """Add special start token(id) in the beginning of each sequence. - - Parameters - ------------ - sequences : list of list of int - All sequences where each row is a sequence. - start_id : int - The start ID. - remove_last : boolean - Remove the last value of each sequences. Usually be used for removing the end ID. - - Returns - ---------- - list of list of int - The processed sequences. - - Examples - --------- - >>> sentences_ids = [[4,3,5,3,2,2,2,2], [5,3,9,4,9,2,2,3]] - >>> sentences_ids = sequences_add_start_id(sentences_ids, start_id=2) - ... [[2, 4, 3, 5, 3, 2, 2, 2, 2], [2, 5, 3, 9, 4, 9, 2, 2, 3]] - >>> sentences_ids = sequences_add_start_id(sentences_ids, start_id=2, remove_last=True) - ... [[2, 4, 3, 5, 3, 2, 2, 2], [2, 5, 3, 9, 4, 9, 2, 2]] - - For Seq2seq - - >>> input = [a, b, c] - >>> target = [x, y, z] - >>> decode_seq = [start_id, a, b] <-- sequences_add_start_id(input, start_id, True) - - """ - sequences_out = [[] for _ in range(len(sequences))] #[[]] * len(sequences) - for i, _ in enumerate(sequences): - if remove_last: - sequences_out[i] = [start_id] + sequences[i][:-1] - else: - sequences_out[i] = [start_id] + sequences[i] - return sequences_out - - -def sequences_add_end_id(sequences, end_id=888): - """Add special end token(id) in the end of each sequence. - - Parameters - ----------- - sequences : list of list of int - All sequences where each row is a sequence. - end_id : int - The end ID. - - Returns - ---------- - list of list of int - The processed sequences. - - Examples - --------- - >>> sequences = [[1,2,3],[4,5,6,7]] - >>> print(sequences_add_end_id(sequences, end_id=999)) - ... [[1, 2, 3, 999], [4, 5, 6, 999]] - - """ - sequences_out = [[] for _ in range(len(sequences))] #[[]] * len(sequences) - for i, _ in enumerate(sequences): - sequences_out[i] = sequences[i] + [end_id] - return sequences_out - - -def sequences_add_end_id_after_pad(sequences, end_id=888, pad_id=0): - """Add special end token(id) in the end of each sequence. - - Parameters - ----------- - sequences : list of list of int - All sequences where each row is a sequence. - end_id : int - The end ID. - pad_id : int - The pad ID. - - Returns - ---------- - list of list of int - The processed sequences. - - Examples - --------- - >>> sequences = [[1,2,0,0], [1,2,3,0], [1,2,3,4]] - >>> print(sequences_add_end_id_after_pad(sequences, end_id=99, pad_id=0)) - ... [[1, 2, 99, 0], [1, 2, 3, 99], [1, 2, 3, 4]] - - """ - # sequences_out = [[] for _ in range(len(sequences))]#[[]] * len(sequences) - import copy - sequences_out = copy.deepcopy(sequences) - # # add a pad to all - # for i in range(len(sequences)): - # for j in range(len(sequences[i])): - # sequences_out[i].append(pad_id) - # # pad -- > end - # max_len = 0 - for i, v in enumerate(sequences): - for j, _v2 in enumerate(v): - if sequences[i][j] == pad_id: - sequences_out[i][j] = end_id - # if j > max_len: - # max_len = j - break - # # remove pad if too long - # for i in range(len(sequences)): - # for j in range(len(sequences[i])): - # sequences_out[i] = sequences_out[i][:max_len+1] - return sequences_out - - -def sequences_get_mask(sequences, pad_val=0): - """Return mask for sequences. - - Parameters - ----------- - sequences : list of list of int - All sequences where each row is a sequence. - pad_val : int - The pad value. - - Returns - ---------- - list of list of int - The mask. - - Examples - --------- - >>> sentences_ids = [[4, 0, 5, 3, 0, 0], - ... [5, 3, 9, 4, 9, 0]] - >>> mask = sequences_get_mask(sentences_ids, pad_val=0) - ... [[1 1 1 1 0 0] - ... [1 1 1 1 1 0]] - - """ - mask = np.ones_like(sequences) - for i, seq in enumerate(sequences): - for i_w in reversed(range(len(seq))): - if seq[i_w] == pad_val: - mask[i, i_w] = 0 - else: - break # <-- exit the for loop, prepcess next sequence - return mask diff --git a/tensorlayer/rein.py b/tensorlayer/rein.py deleted file mode 100644 index 43882f6..0000000 --- a/tensorlayer/rein.py +++ /dev/null @@ -1,167 +0,0 @@ -#! /usr/bin/python -# -*- coding: utf-8 -*- - -import numpy as np -import tensorflow as tf -from six.moves import xrange - -__all__ = [ - 'discount_episode_rewards', - 'cross_entropy_reward_loss', - 'log_weight', - 'choice_action_by_probs', -] - - -def discount_episode_rewards(rewards=None, gamma=0.99, mode=0): - """Take 1D float array of rewards and compute discounted rewards for an - episode. When encount a non-zero value, consider as the end a of an episode. - - Parameters - ---------- - rewards : list - List of rewards - gamma : float - Discounted factor - mode : int - Mode for computing the discount rewards. - - If mode == 0, reset the discount process when encount a non-zero reward (Ping-pong game). - - If mode == 1, would not reset the discount process. - - Returns - -------- - list of float - The discounted rewards. - - Examples - ---------- - >>> rewards = np.asarray([0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1]) - >>> gamma = 0.9 - >>> discount_rewards = tl.rein.discount_episode_rewards(rewards, gamma) - >>> print(discount_rewards) - ... [ 0.72899997 0.81 0.89999998 1. 0.72899997 0.81 - ... 0.89999998 1. 0.72899997 0.81 0.89999998 1. ] - >>> discount_rewards = tl.rein.discount_episode_rewards(rewards, gamma, mode=1) - >>> print(discount_rewards) - ... [ 1.52110755 1.69011939 1.87791049 2.08656716 1.20729685 1.34144104 - ... 1.49048996 1.65610003 0.72899997 0.81 0.89999998 1. ] - - """ - if rewards is None: - raise Exception("rewards should be a list") - discounted_r = np.zeros_like(rewards, dtype=np.float32) - running_add = 0 - for t in reversed(xrange(0, rewards.size)): - if mode == 0: - if rewards[t] != 0: running_add = 0 - - running_add = running_add * gamma + rewards[t] - discounted_r[t] = running_add - return discounted_r - - -def cross_entropy_reward_loss(logits, actions, rewards, name=None): - """Calculate the loss for Policy Gradient Network. - - Parameters - ---------- - logits : tensor - The network outputs without softmax. This function implements softmax inside. - actions : tensor or placeholder - The agent actions. - rewards : tensor or placeholder - The rewards. - - Returns - -------- - Tensor - The TensorFlow loss function. - - Examples - ---------- - >>> states_batch_pl = tf.placeholder(tf.float32, shape=[None, D]) - >>> network = InputLayer(states_batch_pl, name='input') - >>> network = DenseLayer(network, n_units=H, act=tf.nn.relu, name='relu1') - >>> network = DenseLayer(network, n_units=3, name='out') - >>> probs = network.outputs - >>> sampling_prob = tf.nn.softmax(probs) - >>> actions_batch_pl = tf.placeholder(tf.int32, shape=[None]) - >>> discount_rewards_batch_pl = tf.placeholder(tf.float32, shape=[None]) - >>> loss = tl.rein.cross_entropy_reward_loss(probs, actions_batch_pl, discount_rewards_batch_pl) - >>> train_op = tf.train.RMSPropOptimizer(learning_rate, decay_rate).minimize(loss) - - """ - try: # TF 1.0+ - cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=actions, logits=logits, name=name) - except Exception: - cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, targets=actions) - # cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, actions) - - try: ## TF1.0+ - loss = tf.reduce_sum(tf.multiply(cross_entropy, rewards)) - except Exception: ## TF0.12 - loss = tf.reduce_sum(tf.mul(cross_entropy, rewards)) # element-wise mul - return loss - - -def log_weight(probs, weights, name='log_weight'): - """Log weight. - - Parameters - ----------- - probs : tensor - If it is a network output, usually we should scale it to [0, 1] via softmax. - weights : tensor - The weights. - - Returns - -------- - Tensor - The Tensor after appling the log weighted expression. - - """ - with tf.variable_scope(name): - exp_v = tf.reduce_mean(tf.log(probs) * weights) - return exp_v - - -def choice_action_by_probs(probs=(0.5, 0.5), action_list=None): - """Choice and return an an action by given the action probability distribution. - - Parameters - ------------ - probs : list of float. - The probability distribution of all actions. - action_list : None or a list of int or others - A list of action in integer, string or others. If None, returns an integer range between 0 and len(probs)-1. - - Returns - -------- - float int or str - The chosen action. - - Examples - ---------- - >>> for _ in range(5): - >>> a = choice_action_by_probs([0.2, 0.4, 0.4]) - >>> print(a) - ... 0 - ... 1 - ... 1 - ... 2 - ... 1 - >>> for _ in range(3): - >>> a = choice_action_by_probs([0.5, 0.5], ['a', 'b']) - >>> print(a) - ... a - ... b - ... b - - """ - if action_list is None: - n_action = len(probs) - action_list = np.arange(n_action) - else: - if len(action_list) != len(probs): - raise Exception("number of actions should equal to number of probabilities.") - return np.random.choice(action_list, p=probs) diff --git a/tensorlayer/third_party/__init__.py b/tensorlayer/third_party/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tensorlayer/third_party/roi_pooling/.gitignore b/tensorlayer/third_party/roi_pooling/.gitignore deleted file mode 100644 index 08030a8..0000000 --- a/tensorlayer/third_party/roi_pooling/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -.ipynb_checkpoints/ -build/ - diff --git a/tensorlayer/third_party/roi_pooling/README.md b/tensorlayer/third_party/roi_pooling/README.md deleted file mode 100644 index d597cea..0000000 --- a/tensorlayer/third_party/roi_pooling/README.md +++ /dev/null @@ -1,56 +0,0 @@ -# Hint from TensorLayer -- This implementation is from `https://github.com/deepsense-ai/roi-pooling`, date: 31 Aug 2017. -- To install this, you have to clone TensorLayer from Github instead of pip install. -- Remember to modify the `CUDA_LIB` in Makefile before running `python setup.py install` in this folder. -- Make sure `roi_pooling_example.py` and `test_roi_layer.py` is runable. - - ----- - - -## RoI pooling in TensorFlow - -This repo contains the implementation of **Region of Interest pooling** as a custom TensorFlow operation. The CUDA code responsible for the computations was largely taken from the original [Caffe implementation by Ross Girshick](https://github.com/rbgirshick/fast-rcnn). - -For more information about RoI pooling you can check out [Region of interest pooling explained](https://deepsense.io/region-of-interest-pooling-explained/) at our [deepsense.io](https://deepsense.io/) blog. - -![Region of Interest Pooling animation](roi_pooling_animation.gif) - - -## Requirements - -To compile and use `roi_pooling` layer you need to have: - -* [CUDA](https://developer.nvidia.com/cuda-toolkit) (tested with 8.0) -* [https://www.tensorflow.org/](TensorFlow) (tested with 0.12.0 and 1.0.0) - -Only official TensorFlow releases are currently supported. If you're using a custom built TensorFlow compiled with a different GCC version (e.g. 5.X) you may need to modify the makefile to [enable the new ABI version](https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html). - - -## Install - -Since it uses compilation - -```bash -$ git clone git@github.com:deepsense-io/roi-pooling.git -$ cd roi-pooling -$ python setup.py install -``` - -Right now we provide only GPU implementation (no CPU at this time). - - -## Usage - -After successful installation you can use the operation like this: - -```python -from roi_pooling.roi_pooling_ops import roi_pooling - -# here obtain feature map and regions of interest -rpooling = roi_pooling(feature_map, rois, 7, 7) -# continue the model -``` - -Working example in Jupyter Notebook: [examples/roi_pooling_minimal_example.ipynb](https://github.com/deepsense-io/roi-pooling/blob/master/examples/roi_pooling_minimal_example.ipynb) - diff --git a/tensorlayer/third_party/roi_pooling/examples/__init__.py b/tensorlayer/third_party/roi_pooling/examples/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tensorlayer/third_party/roi_pooling/examples/roi_pooling_minimal_example.ipynb b/tensorlayer/third_party/roi_pooling/examples/roi_pooling_minimal_example.ipynb deleted file mode 100644 index c1edc35..0000000 --- a/tensorlayer/third_party/roi_pooling/examples/roi_pooling_minimal_example.ipynb +++ /dev/null @@ -1,148 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "* blog post: [Region of interest pooling explained - deepsense.io](https://deepsense.io/region-of-interest-pooling-explained/)\n", - "* repository: [deepsense-io/roi-pooling](https://github.com/deepsense-io/roi-pooling)" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "from __future__ import print_function\n", - "\n", - "import tensorflow as tf\n", - "import numpy as np\n", - "\n", - "from roi_pooling.roi_pooling_ops import roi_pooling" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# 4x4 feature map with only 1 channel\n", - "input_value = [[\n", - " [[1], [2], [4], [4]],\n", - " [[3], [4], [1], [2]],\n", - " [[6], [2], [1], [7]],\n", - " [[1], [3], [2], [8]]\n", - "]]\n", - "input_value = np.asarray(input_value, dtype='float32')" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# regions of interest as lists of:\n", - "# feature map index, upper left, bottom right coordinates\n", - "rois_value = [\n", - " [0, 0, 0, 1, 3],\n", - " [0, 2, 2, 3, 3],\n", - " [0, 1, 0, 3, 2]\n", - "]\n", - "rois_value = np.asarray(rois_value, dtype='int32')\n", - "\n", - "# in this case we have 3 RoI pooling operations:\n", - "# * channel 0, rectangular region (0, 0) to (1, 3)\n", - "# xx..\n", - "# xx..\n", - "# xx..\n", - "# xx..\n", - "#\n", - "# * channel 0, rectangular region (2, 2) to (3, 3)\n", - "# ....\n", - "# ....\n", - "# ..xx\n", - "# ..xx\n", - "# * channel 0, rectangular region (1, 0) to (3, 2)\n", - "# ....\n", - "# xxx.\n", - "# xxx.\n", - "# xxx." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[[[ 3. 4.]\n", - " [ 6. 3.]]]\n", - "\n", - "\n", - " [[[ 1. 7.]\n", - " [ 2. 8.]]]\n", - "\n", - "\n", - " [[[ 4. 4.]\n", - " [ 4. 7.]]]]\n" - ] - } - ], - "source": [ - "input_featuremap = tf.placeholder(tf.float32)\n", - "rois = tf.placeholder(tf.int32)\n", - "input_const = tf.constant(input_value, tf.float32)\n", - "rois_const = tf.constant(rois_value, tf.int32)\n", - "y = roi_pooling(input_const, rois_const, pool_height=2, pool_width=2)\n", - "\n", - "with tf.Session('') as sess:\n", - " y_output = sess.run(y, feed_dict={input_featuremap: input_value, rois: rois_value})\n", - " print(y_output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/tensorlayer/third_party/roi_pooling/roi_pooling/Makefile b/tensorlayer/third_party/roi_pooling/roi_pooling/Makefile deleted file mode 100644 index db9de78..0000000 --- a/tensorlayer/third_party/roi_pooling/roi_pooling/Makefile +++ /dev/null @@ -1,18 +0,0 @@ -TF_INC = $(shell python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())') -CUDA_LIB = /usr/local/cuda-8.0/lib64 - -all: clean build test - -build: roi_pooling.so - -roi_pooling.cu.o: roi_pooling.cu.cc - nvcc -std=c++11 -c -o $@ $? -I $(TF_INC) -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -D _GLIBCXX_USE_CXX11_ABI=0 - -roi_pooling.so: roi_pooling.cc roi_pooling.cu.o - g++ -std=c++11 -shared -o $@ $? -I $(TF_INC) -fPIC -lcudart -L$(CUDA_LIB) -D _GLIBCXX_USE_CXX11_ABI=0 - -test: build - python roi_pooling_test.py - -clean: - rm -f *.o *.so *.pyc *.npy diff --git a/tensorlayer/third_party/roi_pooling/roi_pooling/__init__.py b/tensorlayer/third_party/roi_pooling/roi_pooling/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling.cc b/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling.cc deleted file mode 100644 index d1f123d..0000000 --- a/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling.cc +++ /dev/null @@ -1,162 +0,0 @@ -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/op_kernel.h" -#include -#include -#include - -using namespace tensorflow; -using namespace std; - -REGISTER_OP("RoiPooling") -.Input("input: float32") -.Input("rois: int32") -.Attr("pool_height: int") -.Attr("pool_width: int") -.Output("output: float32") -.Output("argmax_output: int32"); - - -#define Dtype float - -void RoiPoolingKernelLauncher(const float* input, const int* rois, int n_rois, int channels, int height, int width, - int pooled_height, int pooled_width, Dtype* output, int* argmax_output); - -// IMPORTANT(maciek): need info about storage of the data in memory, assumed something but need the docs confirming it - -class RoiPoolingOp : public OpKernel { - private: - int pool_height_, pool_width_; - public: - explicit RoiPoolingOp(OpKernelConstruction* context) : OpKernel(context) { - OP_REQUIRES_OK(context, - context->GetAttr("pool_height", &pool_height_)); - - OP_REQUIRES_OK(context, - context->GetAttr("pool_width", &pool_width_)); - } - - - void Compute(OpKernelContext* context) override { - // Grab the input tensor - const Tensor& input_tensor = context->input(0); - const Tensor& rois_tensor = context->input(1); - - auto input = input_tensor.flat(); - auto rois = rois_tensor.flat(); - - // Create an output tensor - Tensor* output_tensor = NULL; - Tensor* argmax_output_tensor = NULL; - - auto input_shape = input_tensor.shape(); - auto rois_shape = rois_tensor.shape(); - - int n_rois = rois_shape.dim_size(0); - int height = input_shape.dim_size(1); - int width = input_shape.dim_size(2); - int channels = input_shape.dim_size(3); - - TensorShape output_shape = TensorShape({static_cast(n_rois), - static_cast(channels), - static_cast(pool_height_), - static_cast(pool_width_)}); - - OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, - &output_tensor)); - - OP_REQUIRES_OK(context, context->allocate_output(1, output_shape, - &argmax_output_tensor)); - - auto output = output_tensor->template flat(); - auto argmax_output = argmax_output_tensor->template flat(); - - RoiPoolingKernelLauncher(input.data(), rois.data(), - n_rois, channels, - height, width, - pool_height_, pool_width_, - output.data(), argmax_output.data()); - } -}; - -REGISTER_KERNEL_BUILDER(Name("RoiPooling").Device(DEVICE_GPU), RoiPoolingOp); - -///////////// RoiPoolingGrad - - -REGISTER_OP("RoiPoolingGrad") -.Input("orig_input: float32") -.Input("orig_rois: int32") -.Input("orig_output: float32") -.Input("orig_argmax_output: int32") -.Input("orig_output_grad: float32") -.Attr("pool_height: int") -.Attr("pool_width: int") -.Output("output: float32") -.Doc(R"doc( - region of interest pooling grad -)doc"); - -#define Dtype float -void RoiPoolingGradKernelLauncher(const Dtype* orig_input, const int* orig_rois, - int mb_size, - int n_rois, int channels, int height, int width, - int pooled_height, int pooled_width, - const Dtype* orig_output, const int* orig_argmax_output, - const Dtype* orig_output_grad, - Dtype* output); - -// IMPORTANT(maciek): need info about storage of the data in memory, assumed something but need the docs confirming it - -class RoiPoolingGradOp : public OpKernel { - private: - int pool_height_, pool_width_; - public: - explicit RoiPoolingGradOp(OpKernelConstruction* context) : OpKernel(context) { - OP_REQUIRES_OK(context, - context->GetAttr("pool_height", &pool_height_)); - - OP_REQUIRES_OK(context, - context->GetAttr("pool_width", &pool_width_)); - } - - - void Compute(OpKernelContext* context) override { - // Grab the input tensor - const Tensor& orig_input_tensor = context->input(0); - const Tensor& orig_rois_tensor = context->input(1); - const Tensor& orig_output_tensor = context->input(2); - const Tensor& orig_argmax_output_tensor = context->input(3); - const Tensor& orig_output_grad_tensor = context->input(4); - - auto orig_input = orig_input_tensor.flat(); - auto orig_rois = orig_rois_tensor.flat(); - auto orig_output = orig_output_tensor.flat(); - auto orig_argmax_output = orig_argmax_output_tensor.flat(); - auto orig_output_grad = orig_output_grad_tensor.flat(); - - // Create an output tensor - Tensor* output_tensor = NULL; - auto orig_input_shape = orig_input_tensor.shape(); - auto orig_rois_shape = orig_rois_tensor.shape(); - auto grads_shape = orig_input_shape; - - int mb_size = orig_input_shape.dim_size(0); - int n_rois = orig_rois_shape.dim_size(0); - int height = orig_input_shape.dim_size(1); - int width = orig_input_shape.dim_size(2); - int channels = orig_input_shape.dim_size(3); - - OP_REQUIRES_OK(context, context->allocate_output(0, grads_shape, - &output_tensor)); - - auto output = output_tensor->template flat(); - - // Call the cuda kernel launcher - RoiPoolingGradKernelLauncher(orig_input.data(), orig_rois.data(), - mb_size, n_rois, channels, height, width, pool_height_, pool_width_, - orig_output.data(), orig_argmax_output.data(), orig_output_grad.data(), output.data()); - } -}; - - -REGISTER_KERNEL_BUILDER(Name("RoiPoolingGrad").Device(DEVICE_GPU), RoiPoolingGradOp); diff --git a/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling.cu.cc b/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling.cu.cc deleted file mode 100644 index bbacb55..0000000 --- a/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling.cu.cc +++ /dev/null @@ -1,214 +0,0 @@ -#if GOOGLE_CUDA - -#include -#include -#define EIGEN_USE_GPU -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" - -// CUDA: index helpers -#define idx4_4(index, d1, d2, d3, d4) (index % d4) -#define idx4_3(index, d1, d2, d3, d4) ((index / d4) % d3) -#define idx4_2(index, d1, d2, d3, d4) ((index / d4 / d3) % d2) -#define idx4_1(index, d1, d2, d3, d4) ((index / d4 / d3 / d2) %d1) - -// CUDA: various checks for different function calls. -#define CUDA_CHECK(condition) \ - /* Code block avoids redefinition of cudaError_t error */ \ - do { \ - cudaError_t error = condition; \ - if (error != cudaSuccess) { \ - return 1; \ - } \ - } while (0) - -// CUDA: grid stride looping -#define CUDA_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; \ - i < (n); \ - i += blockDim.x * gridDim.x) - -// CUDA: use 512 threads per block -const int CAFFE_CUDA_NUM_THREADS = 512; - -// CUDA: number of blocks for threads. -inline int CAFFE_GET_BLOCKS(const int N) { - // TODO rewrite this part to be consistent with tf conventions - int optimal_number_of_blocks = (N + CAFFE_CUDA_NUM_THREADS - 1) / CAFFE_CUDA_NUM_THREADS; - int max_number_of_blocks = 65000; - return std::min(optimal_number_of_blocks, max_number_of_blocks); -} - - -#define Dtype float - -__global__ void RoiPoolingKernel(const Dtype* input, const int* rois, - int n_rois, int channels, int height, int width, - int pooled_height, int pooled_width, - Dtype* output, int* argmax_output) { - int output_size = n_rois * channels * pooled_height * pooled_width; - - CUDA_KERNEL_LOOP(index, output_size) { - // (n, c, ph, pw) is an element in the pooled output - int pw = idx4_4(index, n_rois, channels, pooled_height, pooled_width); - int ph = idx4_3(index, n_rois, channels, pooled_height, pooled_width); - int c = idx4_2(index, n_rois, channels, pooled_height, pooled_width); - int n = idx4_1(index, n_rois, channels, pooled_height, pooled_width); - - auto bottom_rois_act = rois + n * 5; - - int roi_batch_ind = bottom_rois_act[0]; - int roi_start_w = bottom_rois_act[1]; - int roi_start_h = bottom_rois_act[2]; - int roi_end_w = bottom_rois_act[3]; - int roi_end_h = bottom_rois_act[4]; - - // Force malformed ROIs to be 1x1 - // NOTE(maciek): roi_start, roi_end seems to be inclusive - int roi_width = max(roi_end_w - roi_start_w + 1, 1); - int roi_height = max(roi_end_h - roi_start_h + 1, 1); - - // divide the ROIs into smaller regions for max pooling - Dtype bin_size_h = static_cast(roi_height) / static_cast(pooled_height); - Dtype bin_size_w = static_cast(roi_width) / static_cast(pooled_width); - - // compute the precise coordinates of each pooling subregion of the ROIs - int hstart = static_cast(floor(static_cast(ph) * bin_size_h)); - int wstart = static_cast(floor(static_cast(pw) * bin_size_w)); - int hend = static_cast(ceil(static_cast(ph + 1) * bin_size_h)); - int wend = static_cast(ceil(static_cast(pw + 1) * bin_size_w)); - - // Add roi offsets and clip to input boundaries - hstart = min(max(hstart + roi_start_h, 0), height); - hend = min(max(hend + roi_start_h, 0), height); - wstart = min(max(wstart + roi_start_w, 0), width); - wend = min(max(wend + roi_start_w, 0), width); - - //printf("%d %d %d %d %d %d %d %d\n", n, c, pw, ph, hstart, hend, wstart, wend); - - bool is_empty = (hend <= hstart) || (wend <= wstart); - - // Define an empty pooling region to be zero - - Dtype maxval = is_empty ? 0 : -999999999.0; - //Dtype maxval = is_empty ? 0 : -FLT_MAX; - // If nothing is pooled, argmax = -1 causes nothing to be backprop'd - - int maxidx = -1; - auto input_act = input + (roi_batch_ind * height * width * channels); - for (int h = hstart; h < hend; ++h) { - for (int w = wstart; w < wend; ++w) { - int bottom_index = (h * width + w) * channels + c; - - // bottom index is relative to 2d image only - if (input_act[bottom_index] > maxval) { - maxval = input_act[bottom_index]; - maxidx = bottom_index; - } - } - } - output[index] = maxval; - argmax_output[index] = maxidx; - } -} - - -void RoiPoolingKernelLauncher(const float* input, const int* rois, int n_rois, int channels, int height, int width, - int pooled_height, int pooled_width, Dtype* output, int* argmax_output) { - int out_size = n_rois * channels * pooled_height * pooled_width; - - RoiPoolingKernel<<>>(input, rois, n_rois, channels, height, width, - pooled_height, pooled_width, output, argmax_output); -} - - -/////////////// Grad -__global__ void RoiPoolingGradKernel(const Dtype* orig_input, const int* orig_rois, - int mb_size, - int n_rois, int channels, int height, int width, - int pooled_height, int pooled_width, - const Dtype* orig_output, const int* orig_argmax_output, - const Dtype* orig_output_grad, - Dtype* output) { - - int orig_input_size = mb_size * height * width * channels; - - CUDA_KERNEL_LOOP(index, orig_input_size) { - // (n, h, w, c) coords in bottom data - int c = idx4_4(index, mb_size, height, width, channels); - int w = idx4_3(index, mb_size, height, width, channels); - int h = idx4_2(index, mb_size, height, width, channels); - int n = idx4_1(index, mb_size, height, width, channels); - - Dtype gradient = 0; - // Accumulate gradient over all ROIs that pooled this element - for (int roi_n = 0; roi_n < n_rois; ++roi_n) { - const int* offset_bottom_rois = orig_rois + roi_n * 5; - int roi_batch_ind = offset_bottom_rois[0]; - // Skip if ROI's batch index doesn't match n - if (n != roi_batch_ind) { - continue; - } - - int roi_start_w = offset_bottom_rois[1]; - int roi_start_h = offset_bottom_rois[2]; - int roi_end_w = offset_bottom_rois[3]; - int roi_end_h = offset_bottom_rois[4]; - - // Skip if ROI doesn't include (h, w) - const bool in_roi = (w >= roi_start_w && w <= roi_end_w && - h >= roi_start_h && h <= roi_end_h); - if (!in_roi) { - continue; - } - - int offset = (roi_n * channels + c) * pooled_height * pooled_width; - const Dtype* offset_top_diff = orig_output_grad + offset; - const int* offset_argmax_data = orig_argmax_output + offset; - - // Compute feasible set of pooled units that could have pooled - // this bottom unit - - // Force malformed ROIs to be 1x1 - int roi_width = max(roi_end_w - roi_start_w + 1, 1); - int roi_height = max(roi_end_h - roi_start_h + 1, 1); - - Dtype bin_size_h = static_cast(roi_height) / static_cast(pooled_height); - Dtype bin_size_w = static_cast(roi_width) / static_cast(pooled_width); - - int phstart = floor(static_cast(h - roi_start_h) / bin_size_h); - int phend = ceil(static_cast(h - roi_start_h + 1) / bin_size_h); - int pwstart = floor(static_cast(w - roi_start_w) / bin_size_w); - int pwend = ceil(static_cast(w - roi_start_w + 1) / bin_size_w); - - phstart = min(max(phstart, 0), pooled_height); - phend = min(max(phend, 0), pooled_height); - pwstart = min(max(pwstart, 0), pooled_width); - pwend = min(max(pwend, 0), pooled_width); - - for (int ph = phstart; ph < phend; ++ph) { - for (int pw = pwstart; pw < pwend; ++pw) { - if (offset_argmax_data[ph * pooled_width + pw] == (h * width + w)) { - gradient += offset_top_diff[ph * pooled_width + pw]; - } - } - } - } - output[index] = gradient; - } - -} - -void RoiPoolingGradKernelLauncher(const Dtype* orig_input, const int* orig_rois, - int mb_size, - int n_rois, int channels, int height, int width, - int pooled_height, int pooled_width, - const Dtype* orig_output, const int* orig_argmax_output, - const Dtype* orig_output_grad, - Dtype* output) { - int out_size = mb_size * height * width * channels; - RoiPoolingGradKernel<<>>(orig_input, orig_rois, - mb_size, n_rois, channels, height, width, pooled_height, pooled_width, - orig_output, orig_argmax_output, orig_output_grad, output); -} - -#endif diff --git a/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling_ops.py b/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling_ops.py deleted file mode 100644 index be971c3..0000000 --- a/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling_ops.py +++ /dev/null @@ -1,50 +0,0 @@ -import os - -import tensorflow as tf -from tensorflow.python.framework import ops - -module_path = os.path.realpath(__file__) -module_dir = os.path.dirname(module_path) -lib_path = os.path.join(module_dir, 'roi_pooling.so') -roi_pooling_module = tf.load_op_library(lib_path) - - -def roi_pooling(input, rois, pool_height, pool_width): - """ - returns a tensorflow operation for computing the Region of Interest Pooling - - @arg input: feature maps on which to perform the pooling operation - @arg rois: list of regions of interest in the format (feature map index, upper left, bottom right) - @arg pool_width: size of the pooling sections - """ - # TODO(maciek): ops scope - out = roi_pooling_module.roi_pooling(input, rois, pool_height=pool_height, pool_width=pool_width) - output, argmax_output = out[0], out[1] - return output - - -@ops.RegisterGradient("RoiPooling") -def _RoiPoolingGrad(op, *grads): - orig_inputs = op.inputs[0] - orig_rois = op.inputs[1] - orig_output = op.outputs[0] - orig_argmax_output = op.outputs[1] - - orig_output_grad = grads[0] - output_grad = roi_pooling_module.roi_pooling_grad( - orig_inputs, orig_rois, orig_output, orig_argmax_output, orig_output_grad, pool_height=op.get_attr('pool_height'), pool_width=op.get_attr('pool_width')) - return [output_grad, None] - - -@ops.RegisterShape("RoiPooling") -def _RoiPoolingShape(op): - input = op.inputs[0] - rois = op.inputs[1] - - n_rois = rois.get_shape()[0] - n_channels = input.get_shape()[3] - pool_height = op.get_attr('pool_height') - pool_width = op.get_attr('pool_width') - - #TODO: check the width/hegiht order - return [tf.TensorShape([n_rois, n_channels, pool_width, pool_height]), tf.TensorShape(None)] diff --git a/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling_test.py b/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling_test.py deleted file mode 100644 index a21bd6a..0000000 --- a/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling_test.py +++ /dev/null @@ -1,99 +0,0 @@ -import numpy as np -import tensorflow as tf - -from roi_pooling_ops import roi_pooling - - -class RoiPoolingTest(tf.test.TestCase): - # TODO(maciek): add python, implementation and test outputs - # TODO(maciek): test pool_height != pool_width, height != width - - def test_roi_pooling_grad(self): - # TODO(maciek): corner cases - input_value = [[[[1], [2], [4], [4]], [[3], [4], [1], [2]], [[6], [2], [1], [7.0]], [[1], [3], [2], [8]]]] - input_value = np.asarray(input_value, dtype='float32') - - rois_value = [[0, 0, 0, 1, 1], [0, 1, 1, 2, 2], [0, 2, 2, 3, 3], [0, 0, 0, 2, 2], [0, 0, 0, 3, 3]] - rois_value = np.asarray(rois_value, dtype='int32') - - with tf.Session(''): - # NOTE(maciek): looks like we have to use consts here, based on tensorflow/python/ops/nn_test.py - input_const = tf.constant(input_value, tf.float32) - rois_const = tf.constant(rois_value, tf.int32) - y = roi_pooling(input_const, rois_const, pool_height=2, pool_width=2) - mean = tf.reduce_mean(y) - - numerical_grad_error_1 = tf.test.compute_gradient_error([input_const], [input_value.shape], y, [5, 2, 2, 1]) - - numerical_grad_error_2 = tf.test.compute_gradient_error([input_const], [input_value.shape], mean, []) - - self.assertLess(numerical_grad_error_1, 1e-4) - self.assertLess(numerical_grad_error_2, 1e-4) - - def test_shape_inference_1(self): - pooled_w, pooled_h = 2, 2 - input_w, input_h = 200, 200 - n_channels = 3 - n_batches = None - input = tf.placeholder(tf.float32, shape=[n_batches, input_w, input_h, n_channels]) - - n_rois = None - single_roi_dimension = 5 - rois = tf.placeholder(tf.int32, shape=[n_rois, single_roi_dimension]) - - y = roi_pooling(input, rois, pool_height=pooled_w, pool_width=pooled_h) - - self.assertEqual(y.get_shape().ndims, 4) - self.assertIs(y.get_shape()[0].value, n_rois) - self.assertIs(y.get_shape()[1].value, n_channels) - self.assertIs(y.get_shape()[2].value, pooled_h) - self.assertIs(y.get_shape()[3].value, pooled_w) - - def test_shape_inference_2(self): - pooled_w, pooled_h = 3, 4 - input_w, input_h = 200, 300 - n_channels = 3 - n_batches = None - input = tf.placeholder(tf.float32, shape=[n_batches, input_w, input_h, n_channels]) - - n_rois = None - single_roi_dimension = 5 - rois = tf.placeholder(tf.int32, shape=[n_rois, single_roi_dimension]) - - y = roi_pooling(input, rois, pool_height=pooled_w, pool_width=pooled_h) - - self.assertEqual(y.get_shape().ndims, 4) - self.assertIs(y.get_shape()[0].value, n_rois) - self.assertIs(y.get_shape()[1].value, n_channels) - self.assertIs(y.get_shape()[2].value, pooled_h) - self.assertIs(y.get_shape()[3].value, pooled_w) - - def test_very_big_output(self): - """ - This test checks whether the layer can handle a corner case - where the number of output pixels is very large, possibly larger - than the number of available GPU threads - """ - - pooled_w, pooled_h = 7, 7 - input_w, input_h = 72, 240 - n_channels = 512 - n_batches = 2 - x_input = np.ones(shape=(n_batches, input_w, input_h, n_channels)) - n_rois = 5000 - rois_input = np.ones(shape=(n_rois, 5)) - - input = tf.placeholder(tf.float32, shape=[n_batches, input_w, input_h, n_channels]) - single_roi_dimension = 5 - rois = tf.placeholder(tf.int32, shape=[n_rois, single_roi_dimension]) - - y = roi_pooling(input, rois, pool_height=pooled_w, pool_width=pooled_h) - - with tf.Session('') as sess: - y_output = sess.run(y, feed_dict={input: x_input, rois: rois_input}) - - self.assertTrue(np.all(y_output == 1)) - - -if __name__ == '__main__': - tf.test.main() diff --git a/tensorlayer/third_party/roi_pooling/roi_pooling_animation.gif b/tensorlayer/third_party/roi_pooling/roi_pooling_animation.gif deleted file mode 100644 index 9d35d21..0000000 Binary files a/tensorlayer/third_party/roi_pooling/roi_pooling_animation.gif and /dev/null differ diff --git a/tensorlayer/third_party/roi_pooling/roi_pooling_example.py b/tensorlayer/third_party/roi_pooling/roi_pooling_example.py deleted file mode 100644 index f5366c2..0000000 --- a/tensorlayer/third_party/roi_pooling/roi_pooling_example.py +++ /dev/null @@ -1,52 +0,0 @@ -from __future__ import print_function - -import numpy as np -import tensorflow as tf - -from roi_pooling.roi_pooling_ops import roi_pooling - -# input feature map going into the RoI pooling -input_value = [[[[1], [2], [4], [4]], [[3], [4], [1], [2]], [[6], [2], [1], [7.0]], [[1], [3], [2], [8]]]] -input_value = np.asarray(input_value, dtype='float32') - -# Regions of interest as lists of: -# feature map index, upper left, bottom right coordinates -rois_value = [[0, 0, 0, 1, 1], [0, 1, 1, 2, 2], [0, 2, 2, 3, 3], [0, 0, 0, 2, 2], [0, 0, 0, 3, 3]] -rois_value = np.asarray(rois_value, dtype='int32') - -# the pool_height and width are parameters of the ROI layer -pool_height, pool_width = (2, 2) -n_rois = len(rois_value) -y_shape = [n_rois, 1, pool_height, pool_width] - -print('Input: ', input_value, ', shape: ', input_value.shape) -print('ROIs: ', rois_value, ', shape: ', rois_value.shape) - -# precise semantics is now only defined by the kernel, need tests -input = tf.placeholder(tf.float32) -rois = tf.placeholder(tf.int32) - -y = roi_pooling(input, rois, pool_height=2, pool_width=2) -mean = tf.reduce_mean(y) - -grads = tf.gradients(mean, input) -print(type(grads)) -print(len(grads)) -print(grads) -print(input_value.shape) - -with tf.Session('') as sess: - input_const = tf.constant(input_value, tf.float32) - rois_const = tf.constant(rois_value, tf.int32) - y = roi_pooling(input_const, rois_const, pool_height=2, pool_width=2) - mean = tf.reduce_mean(y) - - numerical_grad_error_1 = tf.test.compute_gradient_error([input_const], [input_value.shape], y, y_shape) - numerical_grad_error_2 = tf.test.compute_gradient_error([input_const], [input_value.shape], mean, []) - print(numerical_grad_error_1, numerical_grad_error_2) - -with tf.Session('') as sess: - y_output = sess.run(y, feed_dict={input: input_value, rois: rois_value}) - print('y: ', y_output) - grads_output = sess.run(grads, feed_dict={input: input_value, rois: rois_value}) - print('grads: ', grads_output) diff --git a/tensorlayer/third_party/roi_pooling/setup.py b/tensorlayer/third_party/roi_pooling/setup.py deleted file mode 100644 index b262072..0000000 --- a/tensorlayer/third_party/roi_pooling/setup.py +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env python - -from __future__ import print_function - -import subprocess -import sys -from distutils.command.install import install as DistutilsInstall -from distutils.core import setup - -try: - import tensorflow -except ImportError: - print("Please install tensorflow 0.12.0 or later") - sys.exit() - - -class MyInstall(DistutilsInstall): - def run(self): - subprocess.call(['make', '-C', 'roi_pooling', 'build']) - DistutilsInstall.run(self) - - -setup( - name='roi-pooling', - version='1.0', - description='ROI pooling as a custom TensorFlow operation', - author='deepsense.io', - packages=['roi_pooling'], - package_data={'roi_pooling': ['roi_pooling.so']}, - cmdclass={'install': MyInstall}) diff --git a/tensorlayer/third_party/roi_pooling/test_roi_layer.py b/tensorlayer/third_party/roi_pooling/test_roi_layer.py deleted file mode 100644 index d0e2744..0000000 --- a/tensorlayer/third_party/roi_pooling/test_roi_layer.py +++ /dev/null @@ -1,54 +0,0 @@ -from tensorlayer.layers import * -from tensorlayer.third_party.roi_pooling.roi_pooling.roi_pooling_ops import \ - roi_pooling - -# from roi_pooling.roi_pooling_ops import roi_pooling - -# input feature map going into the RoI pooling -input_value = [[[[1], [2], [4], [4]], [[3], [4], [1], [2]], [[6], [2], [1], [7.0]], [[1], [3], [2], [8]]]] -input_value = np.asarray(input_value, dtype='float32') - -# Regions of interest as lists of: -# feature map index, upper left, bottom right coordinates -rois_value = [[0, 0, 0, 1, 1], [0, 1, 1, 2, 2], [0, 2, 2, 3, 3], [0, 0, 0, 2, 2], [0, 0, 0, 3, 3]] -rois_value = np.asarray(rois_value, dtype='int32') - -# the pool_height and width are parameters of the ROI layer -pool_height, pool_width = (2, 2) -n_rois = len(rois_value) -y_shape = [n_rois, 1, pool_height, pool_width] - -print('Input: ', input_value, ', shape: ', input_value.shape) -print('ROIs: ', rois_value, ', shape: ', rois_value.shape) - -# precise semantics is now only defined by the kernel, need tests -input = tf.placeholder(tf.float32) -rois = tf.placeholder(tf.int32) - -# y = roi_pooling(input, rois, pool_height=2, pool_width=2) -n = InputLayer(input, name='in') -n = ROIPoolingLayer(n, rois=rois, pool_height=2, pool_width=2, name='roi') -y = n.outputs -mean = tf.reduce_mean(y) - -grads = tf.gradients(mean, input) -print(type(grads)) -print(len(grads)) -print(grads) -print(input_value.shape) - -with tf.Session('') as sess: - input_const = tf.constant(input_value, tf.float32) - rois_const = tf.constant(rois_value, tf.int32) - y = roi_pooling(input_const, rois_const, pool_height=2, pool_width=2) - mean = tf.reduce_mean(y) - - numerical_grad_error_1 = tf.test.compute_gradient_error([input_const], [input_value.shape], y, y_shape) - numerical_grad_error_2 = tf.test.compute_gradient_error([input_const], [input_value.shape], mean, []) - print(numerical_grad_error_1, numerical_grad_error_2) - -with tf.Session('') as sess: - y_output = sess.run(y, feed_dict={input: input_value, rois: rois_value}) - print('y: ', y_output) - grads_output = sess.run(grads, feed_dict={input: input_value, rois: rois_value}) - print('grads: ', grads_output) diff --git a/tensorlayer/utils.py b/tensorlayer/utils.py deleted file mode 100644 index 70b288d..0000000 --- a/tensorlayer/utils.py +++ /dev/null @@ -1,641 +0,0 @@ -# -*- coding: utf-8 -*- -import os, random, subprocess, sys, time -from sys import exit as _exit -from sys import platform as _platform -import numpy as np -import tensorflow as tf -import tensorlayer as tl -from . import _logging as logging -from . import iterate - -__all__ = [ - 'fit', - 'test', - 'predict', - 'evaluation', - 'dict_to_one', - 'flatten_list', - 'class_balancing_oversample', - 'get_random_int', - 'list_string_to_dict', - 'exit_tensorflow', - 'open_tensorboard', - 'clear_all_placeholder_variables', - 'set_gpu_fraction', -] - - -def fit(sess, - network, - train_op, - cost, - X_train, - y_train, - x, - y_, - acc=None, - batch_size=100, - n_epoch=100, - print_freq=5, - X_val=None, - y_val=None, - eval_train=True, - tensorboard=False, - tensorboard_epoch_freq=5, - tensorboard_weight_histograms=True, - tensorboard_graph_vis=True): - """Training a given non time-series network by the given cost function, training data, batch_size, n_epoch etc. - - - MNIST example click `here `_. - - In order to control the training details, the authors HIGHLY recommend ``tl.iterate`` see two MNIST examples `1 `_, `2 `_. - - Parameters - ---------- - sess : Session - TensorFlow Session. - network : TensorLayer layer - the network to be trained. - train_op : TensorFlow optimizer - The optimizer for training e.g. tf.train.AdamOptimizer. - X_train : numpy.array - The input of training data - y_train : numpy.array - The target of training data - x : placeholder - For inputs. - y_ : placeholder - For targets. - acc : TensorFlow expression or None - Metric for accuracy or others. If None, would not print the information. - batch_size : int - The batch size for training and evaluating. - n_epoch : int - The number of training epochs. - print_freq : int - Print the training information every ``print_freq`` epochs. - X_val : numpy.array or None - The input of validation data. If None, would not perform validation. - y_val : numpy.array or None - The target of validation data. If None, would not perform validation. - eval_train : boolean - Whether to evaluate the model during training. - If X_val and y_val are not None, it reflects whether to evaluate the model on training data. - tensorboard : boolean - If True, summary data will be stored to the log/ directory for visualization with tensorboard. - See also detailed tensorboard_X settings for specific configurations of features. (default False) - Also runs `tl.layers.initialize_global_variables(sess)` internally in fit() to setup the summary nodes. - tensorboard_epoch_freq : int - How many epochs between storing tensorboard checkpoint for visualization to log/ directory (default 5). - tensorboard_weight_histograms : boolean - If True updates tensorboard data in the logs/ directory for visualization - of the weight histograms every tensorboard_epoch_freq epoch (default True). - tensorboard_graph_vis : boolean - If True stores the graph in the tensorboard summaries saved to log/ (default True). - - Examples - -------- - See `tutorial_mnist_simple.py `_ - - >>> tl.utils.fit(sess, network, train_op, cost, X_train, y_train, x, y_, - ... acc=acc, batch_size=500, n_epoch=200, print_freq=5, - ... X_val=X_val, y_val=y_val, eval_train=False) - >>> tl.utils.fit(sess, network, train_op, cost, X_train, y_train, x, y_, - ... acc=acc, batch_size=500, n_epoch=200, print_freq=5, - ... X_val=X_val, y_val=y_val, eval_train=False, - ... tensorboard=True, tensorboard_weight_histograms=True, tensorboard_graph_vis=True) - - Notes - -------- - If tensorboard=True, the `global_variables_initializer` will be run inside the fit function - in order to initialize the automatically generated summary nodes used for tensorboard visualization, - thus `tf.global_variables_initializer().run()` before the `fit()` call will be undefined. - - """ - assert X_train.shape[0] >= batch_size, "Number of training examples should be bigger than the batch size" - - if (tensorboard): - logging.info("Setting up tensorboard ...") - #Set up tensorboard summaries and saver - tl.files.exists_or_mkdir('logs/') - - #Only write summaries for more recent TensorFlow versions - if hasattr(tf, 'summary') and hasattr(tf.summary, 'FileWriter'): - if tensorboard_graph_vis: - train_writer = tf.summary.FileWriter('logs/train', sess.graph) - val_writer = tf.summary.FileWriter('logs/validation', sess.graph) - else: - train_writer = tf.summary.FileWriter('logs/train') - val_writer = tf.summary.FileWriter('logs/validation') - - #Set up summary nodes - if (tensorboard_weight_histograms): - for param in network.all_params: - if hasattr(tf, 'summary') and hasattr(tf.summary, 'histogram'): - logging.info('Param name %s' % param.name) - tf.summary.histogram(param.name, param) - - if hasattr(tf, 'summary') and hasattr(tf.summary, 'histogram'): - tf.summary.scalar('cost', cost) - - merged = tf.summary.merge_all() - - #Initalize all variables and summaries - tl.layers.initialize_global_variables(sess) - logging.info("Finished! use $tensorboard --logdir=logs/ to start server") - - logging.info("Start training the network ...") - start_time_begin = time.time() - tensorboard_train_index, tensorboard_val_index = 0, 0 - for epoch in range(n_epoch): - start_time = time.time() - loss_ep = 0 - n_step = 0 - for X_train_a, y_train_a in iterate.minibatches(X_train, y_train, batch_size, shuffle=True): - feed_dict = {x: X_train_a, y_: y_train_a} - feed_dict.update(network.all_drop) # enable noise layers - loss, _ = sess.run([cost, train_op], feed_dict=feed_dict) - loss_ep += loss - n_step += 1 - loss_ep = loss_ep / n_step - - if tensorboard and hasattr(tf, 'summary'): - if epoch + 1 == 1 or (epoch + 1) % tensorboard_epoch_freq == 0: - for X_train_a, y_train_a in iterate.minibatches(X_train, y_train, batch_size, shuffle=True): - dp_dict = dict_to_one(network.all_drop) # disable noise layers - feed_dict = {x: X_train_a, y_: y_train_a} - feed_dict.update(dp_dict) - result = sess.run(merged, feed_dict=feed_dict) - train_writer.add_summary(result, tensorboard_train_index) - tensorboard_train_index += 1 - if (X_val is not None) and (y_val is not None): - for X_val_a, y_val_a in iterate.minibatches(X_val, y_val, batch_size, shuffle=True): - dp_dict = dict_to_one(network.all_drop) # disable noise layers - feed_dict = {x: X_val_a, y_: y_val_a} - feed_dict.update(dp_dict) - result = sess.run(merged, feed_dict=feed_dict) - val_writer.add_summary(result, tensorboard_val_index) - tensorboard_val_index += 1 - - if epoch + 1 == 1 or (epoch + 1) % print_freq == 0: - if (X_val is not None) and (y_val is not None): - logging.info("Epoch %d of %d took %fs" % (epoch + 1, n_epoch, time.time() - start_time)) - if eval_train is True: - train_loss, train_acc, n_batch = 0, 0, 0 - for X_train_a, y_train_a in iterate.minibatches(X_train, y_train, batch_size, shuffle=True): - dp_dict = dict_to_one(network.all_drop) # disable noise layers - feed_dict = {x: X_train_a, y_: y_train_a} - feed_dict.update(dp_dict) - if acc is not None: - err, ac = sess.run([cost, acc], feed_dict=feed_dict) - train_acc += ac - else: - err = sess.run(cost, feed_dict=feed_dict) - train_loss += err - n_batch += 1 - logging.info(" train loss: %f" % (train_loss / n_batch)) - if acc is not None: - logging.info(" train acc: %f" % (train_acc / n_batch)) - val_loss, val_acc, n_batch = 0, 0, 0 - for X_val_a, y_val_a in iterate.minibatches(X_val, y_val, batch_size, shuffle=True): - dp_dict = dict_to_one(network.all_drop) # disable noise layers - feed_dict = {x: X_val_a, y_: y_val_a} - feed_dict.update(dp_dict) - if acc is not None: - err, ac = sess.run([cost, acc], feed_dict=feed_dict) - val_acc += ac - else: - err = sess.run(cost, feed_dict=feed_dict) - val_loss += err - n_batch += 1 - logging.info(" val loss: %f" % (val_loss / n_batch)) - if acc is not None: - logging.info(" val acc: %f" % (val_acc / n_batch)) - else: - logging.info("Epoch %d of %d took %fs, loss %f" % (epoch + 1, n_epoch, time.time() - start_time, loss_ep)) - logging.info("Total training time: %fs" % (time.time() - start_time_begin)) - - -def test(sess, network, acc, X_test, y_test, x, y_, batch_size, cost=None): - """ - Test a given non time-series network by the given test data and metric. - - Parameters - ---------- - sess : Session - TensorFlow session. - network : TensorLayer layer - The network. - acc : TensorFlow expression or None - Metric for accuracy or others. - - If None, would not print the information. - X_test : numpy.array - The input of testing data. - y_test : numpy array - The target of testing data - x : placeholder - For inputs. - y_ : placeholder - For targets. - batch_size : int or None - The batch size for testing, when dataset is large, we should use minibatche for testing; - if dataset is small, we can set it to None. - cost : TensorFlow expression or None - Metric for cost or others. If None, would not print the information. - - Examples - -------- - See `tutorial_mnist_simple.py `_ - - >>> tl.utils.test(sess, network, acc, X_test, y_test, x, y_, batch_size=None, cost=cost) - - """ - logging.info('Start testing the network ...') - if batch_size is None: - dp_dict = dict_to_one(network.all_drop) - feed_dict = {x: X_test, y_: y_test} - feed_dict.update(dp_dict) - if cost is not None: - logging.info(" test loss: %f" % sess.run(cost, feed_dict=feed_dict)) - logging.info(" test acc: %f" % sess.run(acc, feed_dict=feed_dict)) - # logging.info(" test acc: %f" % np.mean(y_test == sess.run(y_op, - # feed_dict=feed_dict))) - else: - test_loss, test_acc, n_batch = 0, 0, 0 - for X_test_a, y_test_a in iterate.minibatches(X_test, y_test, batch_size, shuffle=True): - dp_dict = dict_to_one(network.all_drop) # disable noise layers - feed_dict = {x: X_test_a, y_: y_test_a} - feed_dict.update(dp_dict) - if cost is not None: - err, ac = sess.run([cost, acc], feed_dict=feed_dict) - test_loss += err - else: - ac = sess.run(acc, feed_dict=feed_dict) - test_acc += ac - n_batch += 1 - if cost is not None: - logging.info(" test loss: %f" % (test_loss / n_batch)) - logging.info(" test acc: %f" % (test_acc / n_batch)) - - -def predict(sess, network, X, x, y_op, batch_size=None): - """ - Return the predict results of given non time-series network. - - Parameters - ---------- - sess : Session - TensorFlow Session. - network : TensorLayer layer - The network. - X : numpy.array - The inputs. - x : placeholder - For inputs. - y_op : placeholder - The argmax expression of softmax outputs. - batch_size : int or None - The batch size for prediction, when dataset is large, we should use minibatche for prediction; - if dataset is small, we can set it to None. - - Examples - -------- - See `tutorial_mnist_simple.py `_ - - >>> y = network.outputs - >>> y_op = tf.argmax(tf.nn.softmax(y), 1) - >>> print(tl.utils.predict(sess, network, X_test, x, y_op)) - - """ - if batch_size is None: - dp_dict = dict_to_one(network.all_drop) # disable noise layers - feed_dict = { - x: X, - } - feed_dict.update(dp_dict) - return sess.run(y_op, feed_dict=feed_dict) - else: - result = None - for X_a, _ in iterate.minibatches(X, X, batch_size, shuffle=False): - dp_dict = dict_to_one(network.all_drop) - feed_dict = { - x: X_a, - } - feed_dict.update(dp_dict) - result_a = sess.run(y_op, feed_dict=feed_dict) - if result is None: - result = result_a - else: - result = np.vstack((result, result_a)) # TODO: https://github.com/tensorlayer/tensorlayer/issues/288 - if result is None: - if len(X) % batch_size != 0: - dp_dict = dict_to_one(network.all_drop) - feed_dict = { - x: X[-(len(X) % batch_size):, :], - } - feed_dict.update(dp_dict) - result_a = sess.run(y_op, feed_dict=feed_dict) - result = result_a - else: - if len(X) != len(result) and len(X) % batch_size != 0: - dp_dict = dict_to_one(network.all_drop) - feed_dict = { - x: X[-(len(X) % batch_size):, :], - } - feed_dict.update(dp_dict) - result_a = sess.run(y_op, feed_dict=feed_dict) - result = np.vstack((result, result_a)) # TODO: https://github.com/tensorlayer/tensorlayer/issues/288 - return result - - -## Evaluation -def evaluation(y_test=None, y_predict=None, n_classes=None): - """ - Input the predicted results, targets results and - the number of class, return the confusion matrix, F1-score of each class, - accuracy and macro F1-score. - - Parameters - ---------- - y_test : list - The target results - y_predict : list - The predicted results - n_classes : int - The number of classes - - Examples - -------- - >>> c_mat, f1, acc, f1_macro = tl.utils.evaluation(y_test, y_predict, n_classes) - - """ - from sklearn.metrics import confusion_matrix, f1_score, accuracy_score - c_mat = confusion_matrix(y_test, y_predict, labels=[x for x in range(n_classes)]) - f1 = f1_score(y_test, y_predict, average=None, labels=[x for x in range(n_classes)]) - f1_macro = f1_score(y_test, y_predict, average='macro') - acc = accuracy_score(y_test, y_predict) - logging.info('confusion matrix: \n%s' % c_mat) - logging.info('f1-score : %s' % f1) - logging.info('f1-score(macro) : %f' % f1_macro) # same output with > f1_score(y_true, y_pred, average='macro') - logging.info('accuracy-score : %f' % acc) - return c_mat, f1, acc, f1_macro - - -def dict_to_one(dp_dict): - """Input a dictionary, return a dictionary that all items are set to one. - - Used for disable dropout, dropconnect layer and so on. - - Parameters - ---------- - dp_dict : dictionary - The dictionary contains key and number, e.g. keeping probabilities. - - Examples - -------- - >>> dp_dict = dict_to_one( network.all_drop ) - >>> dp_dict = dict_to_one( network.all_drop ) - >>> feed_dict.update(dp_dict) - - """ - return {x: 1 for x in dp_dict} - - -def flatten_list(list_of_list): - """Input a list of list, return a list that all items are in a list. - - Parameters - ---------- - list_of_list : a list of list - - Examples - -------- - >>> tl.utils.flatten_list([[1, 2, 3],[4, 5],[6]]) - ... [1, 2, 3, 4, 5, 6] - - """ - return sum(list_of_list, []) - - -def class_balancing_oversample(X_train=None, y_train=None, printable=True): - """Input the features and labels, return the features and labels after oversampling. - - Parameters - ---------- - X_train : numpy.array - The inputs. - y_train : numpy.array - The targets. - - Examples - -------- - One X - - >>> X_train, y_train = class_balancing_oversample(X_train, y_train, printable=True) - - Two X - - >>> X, y = tl.utils.class_balancing_oversample(X_train=np.hstack((X1, X2)), y_train=y, printable=False) - >>> X1 = X[:, 0:5] - >>> X2 = X[:, 5:] - - """ - # ======== Classes balancing - if printable: - logging.info("Classes balancing for training examples...") - from collections import Counter - c = Counter(y_train) - if printable: - logging.info('the occurrence number of each stage: %s' % c.most_common()) - logging.info('the least stage is Label %s have %s instances' % c.most_common()[-1]) - logging.info('the most stage is Label %s have %s instances' % c.most_common(1)[0]) - most_num = c.most_common(1)[0][1] - if printable: - logging.info('most num is %d, all classes tend to be this num' % most_num) - - locations = {} - number = {} - - for lab, num in c.most_common(): # find the index from y_train - number[lab] = num - locations[lab] = np.where(np.array(y_train) == lab)[0] - if printable: - logging.info('convert list(np.array) to dict format') - X = {} # convert list to dict - for lab, num in number.items(): - X[lab] = X_train[locations[lab]] - - # oversampling - if printable: - logging.info('start oversampling') - for key in X: - temp = X[key] - while True: - if len(X[key]) >= most_num: - break - X[key] = np.vstack((X[key], temp)) - if printable: - logging.info('first features of label 0 > %d' % len(X[0][0])) - logging.info('the occurrence num of each stage after oversampling') - for key in X: - logging.info("%s %d" % (key, len(X[key]))) - if printable: - logging.info('make each stage have same num of instances') - for key in X: - X[key] = X[key][0:most_num, :] - logging.info("%s %d" % (key, len(X[key]))) - - # convert dict to list - if printable: - logging.info('convert from dict to list format') - y_train = [] - X_train = np.empty(shape=(0, len(X[0][0]))) - for key in X: - X_train = np.vstack((X_train, X[key])) - y_train.extend([key for i in range(len(X[key]))]) - # logging.info(len(X_train), len(y_train)) - c = Counter(y_train) - if printable: - logging.info('the occurrence number of each stage after oversampling: %s' % c.most_common()) - # ================ End of Classes balancing - return X_train, y_train - - -## Random -def get_random_int(min_v=0, max_v=10, number=5, seed=None): - """Return a list of random integer by the given range and quantity. - - Parameters - ----------- - min_v : number - The minimum value. - max_v : number - The maximum value. - number : int - Number of value. - seed : int or None - The seed for random. - - Examples - --------- - >>> r = get_random_int(min_v=0, max_v=10, number=5) - ... [10, 2, 3, 3, 7] - - """ - rnd = random.Random() - if seed: - rnd = random.Random(seed) - # return [random.randint(min,max) for p in range(0, number)] - return [rnd.randint(min_v, max_v) for p in range(0, number)] - - -def list_string_to_dict(string): - """Inputs ``['a', 'b', 'c']``, returns ``{'a': 0, 'b': 1, 'c': 2}``.""" - dictionary = {} - for idx, c in enumerate(string): - dictionary.update({c: idx}) - return dictionary - - -def exit_tensorflow(sess=None, port=6006): - """Close TensorFlow session, TensorBoard and Nvidia-process if available. - - Parameters - ---------- - sess : Session - TensorFlow Session. - tb_port : int - TensorBoard port you want to close, `6006` as default. - - """ - text = "[TL] Close tensorboard and nvidia-process if available" - text2 = "[TL] Close tensorboard and nvidia-process not yet supported by this function (tl.ops.exit_tf) on " - if sess is not None: - sess.close() - # import time - # time.sleep(2) - if _platform == "linux" or _platform == "linux2": - logging.info('linux: %s' % text) - os.system('nvidia-smi') - os.system('fuser ' + port + '/tcp -k') # kill tensorboard 6006 - os.system("nvidia-smi | grep python |awk '{print $3}'|xargs kill") # kill all nvidia-smi python process - _exit() - elif _platform == "darwin": - logging.info('OS X: %s' % text) - subprocess.Popen("lsof -i tcp:" + str(port) + " | grep -v PID | awk '{print $2}' | xargs kill", shell=True) # kill tensorboard - elif _platform == "win32": - raise NotImplementedError("this function is not supported on the Windows platform") - else: - logging.info(text2 + _platform) - - -def open_tensorboard(log_dir='/tmp/tensorflow', port=6006): - """Open Tensorboard. - - Parameters - ---------- - log_dir : str - Directory where your tensorboard logs are saved - port : int - TensorBoard port you want to open, 6006 is tensorboard default - - """ - text = "[TL] Open tensorboard, go to localhost:" + str(port) + " to access" - text2 = " not yet supported by this function (tl.ops.open_tb)" - - if not tl.files.exists_or_mkdir(log_dir, verbose=False): - logging.info("[TL] Log reportory was created at %s" % log_dir) - - if _platform == "linux" or _platform == "linux2": - raise NotImplementedError() - elif _platform == "darwin": - logging.info('OS X: %s' % text) - subprocess.Popen( - sys.prefix + " | python -m tensorflow.tensorboard --logdir=" + log_dir + " --port=" + str(port), - shell=True) # open tensorboard in localhost:6006/ or whatever port you chose - elif _platform == "win32": - raise NotImplementedError("this function is not supported on the Windows platform") - else: - logging.info(_platform + text2) - - -def clear_all_placeholder_variables(printable=True): - """Clears all the placeholder variables of keep prob, - including keeping probabilities of all dropout, denoising, dropconnect etc. - - Parameters - ---------- - printable : boolean - If True, print all deleted variables. - - """ - logging.info('clear all .....................................') - gl = globals().copy() - for var in gl: - if var[0] == '_': continue - if 'func' in str(globals()[var]): continue - if 'module' in str(globals()[var]): continue - if 'class' in str(globals()[var]): continue - - if printable: - logging.info(" clear_all ------- %s" % str(globals()[var])) - - del globals()[var] - - -def set_gpu_fraction(gpu_fraction=0.3): - """Set the GPU memory fraction for the application. - - Parameters - ---------- - gpu_fraction : float - Fraction of GPU memory, (0 ~ 1] - - References - ---------- - - `TensorFlow using GPU `__ - - """ - logging.info("[TL]: GPU MEM Fraction %f" % gpu_fraction) - gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction) - sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) - return sess diff --git a/tensorlayer/visualize.py b/tensorlayer/visualize.py deleted file mode 100644 index daba94a..0000000 --- a/tensorlayer/visualize.py +++ /dev/null @@ -1,516 +0,0 @@ -# -*- coding: utf-8 -*- - -import os -import numpy as np -import scipy.misc # save/read image(s) -from . import _logging as logging -from . import prepro - -# Uncomment the following line if you got: _tkinter.TclError: no display name and no $DISPLAY environment variable -# import matplotlib -# matplotlib.use('Agg') - -__all__ = [ - 'read_image', - 'read_images', - 'save_image', - 'save_images', - 'draw_boxes_and_labels_to_image', - 'frame', - 'CNN2d', - 'images2d', - 'tsne_embedding', - 'draw_weights', - 'W', -] - - -def read_image(image, path=''): - """Read one image. - - Parameters - ----------- - image : str - The image file name. - path : str - The image folder path. - - Returns - ------- - numpy.array - The image. - - """ - return scipy.misc.imread(os.path.join(path, image)) - - -def read_images(img_list, path='', n_threads=10, printable=True): - """Returns all images in list by given path and name of each image file. - - Parameters - ------------- - img_list : list of str - The image file names. - path : str - The image folder path. - n_threads : int - The number of threads to read image. - printable : boolean - Whether to print information when reading images. - - Returns - ------- - list of numpy.array - The images. - - """ - imgs = [] - for idx in range(0, len(img_list), n_threads): - b_imgs_list = img_list[idx:idx + n_threads] - b_imgs = prepro.threading_data(b_imgs_list, fn=read_image, path=path) - # logging.info(b_imgs.shape) - imgs.extend(b_imgs) - if printable: - logging.info('read %d from %s' % (len(imgs), path)) - return imgs - - -def save_image(image, image_path=''): - """Save a image. - - Parameters - ----------- - image : numpy array - [w, h, c] - image_path : str - path - - """ - try: # RGB - scipy.misc.imsave(image_path, image) - except Exception: # Greyscale - scipy.misc.imsave(image_path, image[:, :, 0]) - - -def save_images(images, size, image_path=''): - """Save multiple images into one single image. - - Parameters - ----------- - images : numpy array - (batch, w, h, c) - size : list of 2 ints - row and column number. - number of images should be equal or less than size[0] * size[1] - image_path : str - save path - - Returns - ------- - numpy.array - The image. - - Examples - --------- - >>> images = np.random.rand(64, 100, 100, 3) - >>> tl.visualize.save_images(images, [8, 8], 'temp.png') - - """ - if len(images.shape) == 3: # Greyscale [batch, h, w] --> [batch, h, w, 1] - images = images[:, :, :, np.newaxis] - - def merge(images, size): - h, w = images.shape[1], images.shape[2] - img = np.zeros((h * size[0], w * size[1], 3)) - for idx, image in enumerate(images): - i = idx % size[1] - j = idx // size[1] - img[j * h:j * h + h, i * w:i * w + w, :] = image - return img - - def imsave(images, size, path): - return scipy.misc.imsave(path, merge(images, size)) - - assert len(images) <= size[0] * size[1], "number of images should be equal or less than size[0] * size[1] {}".format(len(images)) - return imsave(images, size, image_path) - - -def draw_boxes_and_labels_to_image(image, classes, coords, scores, classes_list, is_center=True, is_rescale=True, save_name=None): - """Draw bboxes and class labels on image. Return or save the image with bboxes, example in the docs of ``tl.prepro``. - - Parameters - ----------- - image : numpy.array - The RGB image [height, width, channel]. - classes : list of int - A list of class ID (int). - coords : list of int - A list of list for coordinates. - - Should be [x, y, x2, y2] (up-left and botton-right format) - - If [x_center, y_center, w, h] (set is_center to True). - scores : list of float - A list of score (float). (Optional) - classes_list : list of str - for converting ID to string on image. - is_center : boolean - Whether the coordinates is [x_center, y_center, w, h] - - If coordinates are [x_center, y_center, w, h], set it to True for converting it to [x, y, x2, y2] (up-left and botton-right) internally. - - If coordinates are [x1, x2, y1, y2], set it to False. - is_rescale : boolean - Whether to rescale the coordinates from pixel-unit format to ratio format. - - If True, the input coordinates are the portion of width and high, this API will scale the coordinates to pixel unit internally. - - If False, feed the coordinates with pixel unit format. - save_name : None or str - The name of image file (i.e. image.png), if None, not to save image. - - Returns - ------- - numpy.array - The saved image. - - References - ----------- - - OpenCV rectangle and putText. - - `scikit-image `__. - - """ - assert len(coords) == len(classes), "number of coordinates and classes are equal" - if len(scores) > 0: - assert len(scores) == len(classes), "number of scores and classes are equal" - - import cv2 - - # don't change the original image, and avoid error https://stackoverflow.com/questions/30249053/python-opencv-drawing-errors-after-manipulating-array-with-numpy - image = image.copy() - - imh, imw = image.shape[0:2] - thick = int((imh + imw) // 430) - - for i, _v in enumerate(coords): - if is_center: - x, y, x2, y2 = prepro.obj_box_coord_centroid_to_upleft_butright(coords[i]) - else: - x, y, x2, y2 = coords[i] - - if is_rescale: # scale back to pixel unit if the coords are the portion of width and high - x, y, x2, y2 = prepro.obj_box_coord_scale_to_pixelunit([x, y, x2, y2], (imh, imw)) - - cv2.rectangle( - image, - (int(x), int(y)), - (int(x2), int(y2)), # up-left and botton-right - [0, 255, 0], - thick) - - cv2.putText( - image, - classes_list[classes[i]] + ((" %.2f" % (scores[i])) if (len(scores) != 0) else " "), - (int(x), int(y)), # button left - 0, - 1.5e-3 * imh, # bigger = larger font - [0, 0, 256], # self.meta['colors'][max_indx], - int(thick / 2) + 1) # bold - - if save_name is not None: - # cv2.imwrite('_my.png', image) - save_image(image, save_name) - # if len(coords) == 0: - # logging.info("draw_boxes_and_labels_to_image: no bboxes exist, cannot draw !") - return image - - -def frame(I=None, second=5, saveable=True, name='frame', cmap=None, fig_idx=12836): - """Display a frame(image). Make sure OpenAI Gym render() is disable before using it. - - Parameters - ---------- - I : numpy.array - The image. - second : int - The display second(s) for the image(s), if saveable is False. - saveable : boolean - Save or plot the figure. - name : str - A name to save the image, if saveable is True. - cmap : None or str - 'gray' for greyscale, None for default, etc. - fig_idx : int - matplotlib figure index. - - Examples - -------- - >>> env = gym.make("Pong-v0") - >>> observation = env.reset() - >>> tl.visualize.frame(observation) - - """ - import matplotlib.pyplot as plt - if saveable is False: - plt.ion() - plt.figure(fig_idx) # show all feature images - - if len(I.shape) and I.shape[-1] == 1: # (10,10,1) --> (10,10) - I = I[:, :, 0] - - plt.imshow(I, cmap) - plt.title(name) - # plt.gca().xaxis.set_major_locator(plt.NullLocator()) # distable tick - # plt.gca().yaxis.set_major_locator(plt.NullLocator()) - - if saveable: - plt.savefig(name + '.pdf', format='pdf') - else: - plt.draw() - plt.pause(second) - - -def CNN2d(CNN=None, second=10, saveable=True, name='cnn', fig_idx=3119362): - """Display a group of RGB or Greyscale CNN masks. - - Parameters - ---------- - CNN : numpy.array - The image. e.g: 64 5x5 RGB images can be (5, 5, 3, 64). - second : int - The display second(s) for the image(s), if saveable is False. - saveable : boolean - Save or plot the figure. - name : str - A name to save the image, if saveable is True. - fig_idx : int - The matplotlib figure index. - - Examples - -------- - >>> tl.visualize.CNN2d(network.all_params[0].eval(), second=10, saveable=True, name='cnn1_mnist', fig_idx=2012) - - """ - import matplotlib.pyplot as plt - # logging.info(CNN.shape) # (5, 5, 3, 64) - # exit() - n_mask = CNN.shape[3] - n_row = CNN.shape[0] - n_col = CNN.shape[1] - n_color = CNN.shape[2] - row = int(np.sqrt(n_mask)) - col = int(np.ceil(n_mask / row)) - plt.ion() # active mode - fig = plt.figure(fig_idx) - count = 1 - for _ir in range(1, row + 1): - for _ic in range(1, col + 1): - if count > n_mask: - break - fig.add_subplot(col, row, count) - # logging.info(CNN[:,:,:,count-1].shape, n_row, n_col) # (5, 1, 32) 5 5 - # exit() - # plt.imshow( - # np.reshape(CNN[count-1,:,:,:], (n_row, n_col)), - # cmap='gray', interpolation="nearest") # theano - if n_color == 1: - plt.imshow(np.reshape(CNN[:, :, :, count - 1], (n_row, n_col)), cmap='gray', interpolation="nearest") - elif n_color == 3: - plt.imshow(np.reshape(CNN[:, :, :, count - 1], (n_row, n_col, n_color)), cmap='gray', interpolation="nearest") - else: - raise Exception("Unknown n_color") - plt.gca().xaxis.set_major_locator(plt.NullLocator()) # distable tick - plt.gca().yaxis.set_major_locator(plt.NullLocator()) - count = count + 1 - if saveable: - plt.savefig(name + '.pdf', format='pdf') - else: - plt.draw() - plt.pause(second) - - -def images2d(images=None, second=10, saveable=True, name='images', dtype=None, fig_idx=3119362): - """Display a group of RGB or Greyscale images. - - Parameters - ---------- - images : numpy.array - The images. - second : int - The display second(s) for the image(s), if saveable is False. - saveable : boolean - Save or plot the figure. - name : str - A name to save the image, if saveable is True. - dtype : None or numpy data type - The data type for displaying the images. - fig_idx : int - matplotlib figure index. - - Examples - -------- - >>> X_train, y_train, X_test, y_test = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3), plotable=False) - >>> tl.visualize.images2d(X_train[0:100,:,:,:], second=10, saveable=False, name='cifar10', dtype=np.uint8, fig_idx=20212) - - """ - import matplotlib.pyplot as plt - # logging.info(images.shape) # (50000, 32, 32, 3) - # exit() - if dtype: - images = np.asarray(images, dtype=dtype) - n_mask = images.shape[0] - n_row = images.shape[1] - n_col = images.shape[2] - n_color = images.shape[3] - row = int(np.sqrt(n_mask)) - col = int(np.ceil(n_mask / row)) - plt.ion() # active mode - fig = plt.figure(fig_idx) - count = 1 - for _ir in range(1, row + 1): - for _ic in range(1, col + 1): - if count > n_mask: - break - fig.add_subplot(col, row, count) - # logging.info(images[:,:,:,count-1].shape, n_row, n_col) # (5, 1, 32) 5 5 - # plt.imshow( - # np.reshape(images[count-1,:,:,:], (n_row, n_col)), - # cmap='gray', interpolation="nearest") # theano - if n_color == 1: - plt.imshow(np.reshape(images[count - 1, :, :], (n_row, n_col)), cmap='gray', interpolation="nearest") - # plt.title(name) - elif n_color == 3: - plt.imshow(images[count - 1, :, :], cmap='gray', interpolation="nearest") - # plt.title(name) - else: - raise Exception("Unknown n_color") - plt.gca().xaxis.set_major_locator(plt.NullLocator()) # distable tick - plt.gca().yaxis.set_major_locator(plt.NullLocator()) - count = count + 1 - if saveable: - plt.savefig(name + '.pdf', format='pdf') - else: - plt.draw() - plt.pause(second) - - -def tsne_embedding(embeddings, reverse_dictionary, plot_only=500, second=5, saveable=False, name='tsne', fig_idx=9862): - """Visualize the embeddings by using t-SNE. - - Parameters - ---------- - embeddings : numpy.array - The embedding matrix. - reverse_dictionary : dictionary - id_to_word, mapping id to unique word. - plot_only : int - The number of examples to plot, choice the most common words. - second : int - The display second(s) for the image(s), if saveable is False. - saveable : boolean - Save or plot the figure. - name : str - A name to save the image, if saveable is True. - fig_idx : int - matplotlib figure index. - - Examples - -------- - >>> see 'tutorial_word2vec_basic.py' - >>> final_embeddings = normalized_embeddings.eval() - >>> tl.visualize.tsne_embedding(final_embeddings, labels, reverse_dictionary, - ... plot_only=500, second=5, saveable=False, name='tsne') - - """ - import matplotlib.pyplot as plt - - def plot_with_labels(low_dim_embs, labels, figsize=(18, 18), second=5, saveable=True, name='tsne', fig_idx=9862): - assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings" - if saveable is False: - plt.ion() - plt.figure(fig_idx) - plt.figure(figsize=figsize) #in inches - for i, label in enumerate(labels): - x, y = low_dim_embs[i, :] - plt.scatter(x, y) - plt.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom') - if saveable: - plt.savefig(name + '.pdf', format='pdf') - else: - plt.draw() - plt.pause(second) - - try: - from sklearn.manifold import TSNE - from six.moves import xrange - - tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000) - # plot_only = 500 - low_dim_embs = tsne.fit_transform(embeddings[:plot_only, :]) - labels = [reverse_dictionary[i] for i in xrange(plot_only)] - plot_with_labels(low_dim_embs, labels, second=second, saveable=saveable, \ - name=name, fig_idx=fig_idx) - except ImportError: - logging.info("Please install sklearn and matplotlib to visualize embeddings.") - - -def draw_weights(W=None, second=10, saveable=True, shape=None, name='mnist', fig_idx=2396512): - """Visualize every columns of the weight matrix to a group of Greyscale img. - - Parameters - ---------- - W : numpy.array - The weight matrix - second : int - The display second(s) for the image(s), if saveable is False. - saveable : boolean - Save or plot the figure. - shape : a list with 2 int or None - The shape of feature image, MNIST is [28, 80]. - name : a string - A name to save the image, if saveable is True. - fig_idx : int - matplotlib figure index. - - Examples - -------- - >>> tl.visualize.draw_weights(network.all_params[0].eval(), second=10, saveable=True, name='weight_of_1st_layer', fig_idx=2012) - - """ - if shape is None: - shape = [28, 28] - - import matplotlib.pyplot as plt - if saveable is False: - plt.ion() - fig = plt.figure(fig_idx) # show all feature images - n_units = W.shape[1] - - num_r = int(np.sqrt(n_units)) # 每行显示的个数 若25个hidden unit -> 每行显示5个 - num_c = int(np.ceil(n_units / num_r)) - count = int(1) - for _row in range(1, num_r + 1): - for _col in range(1, num_c + 1): - if count > n_units: - break - fig.add_subplot(num_r, num_c, count) - # ------------------------------------------------------------ - # plt.imshow(np.reshape(W[:,count-1],(28,28)), cmap='gray') - # ------------------------------------------------------------ - feature = W[:, count - 1] / np.sqrt((W[:, count - 1]**2).sum()) - # feature[feature<0.0001] = 0 # value threshold - # if count == 1 or count == 2: - # print(np.mean(feature)) - # if np.std(feature) < 0.03: # condition threshold - # feature = np.zeros_like(feature) - # if np.mean(feature) < -0.015: # condition threshold - # feature = np.zeros_like(feature) - plt.imshow(np.reshape(feature, (shape[0], shape[1])), cmap='gray', interpolation="nearest") #, vmin=np.min(feature), vmax=np.max(feature)) - # plt.title(name) - # ------------------------------------------------------------ - # plt.imshow(np.reshape(W[:,count-1] ,(np.sqrt(size),np.sqrt(size))), cmap='gray', interpolation="nearest") - plt.gca().xaxis.set_major_locator(plt.NullLocator()) # distable tick - plt.gca().yaxis.set_major_locator(plt.NullLocator()) - count = count + 1 - if saveable: - plt.savefig(name + '.pdf', format='pdf') - else: - plt.draw() - plt.pause(second) - - -W = draw_weights diff --git a/utils.py b/utils.py index f26d34e..db13e9d 100755 --- a/utils.py +++ b/utils.py @@ -1,4 +1,5 @@ from random import shuffle + import scipy.misc import numpy as np @@ -11,14 +12,14 @@ def center_crop(x, crop_h, crop_w=None, resize_w=64): return scipy.misc.imresize(x[j:j+crop_h, i:i+crop_w], [resize_w, resize_w]) -# def merge(images, size): -# h, w = images.shape[1], images.shape[2] -# img = np.zeros((h * size[0], w * size[1], 3)) -# for idx, image in enumerate(images): -# i = idx % size[1] -# j = idx // size[1] -# img[j*h:j*h+h, i*w:i*w+w, :] = image -# return img +def merge(images, size): + h, w = images.shape[1], images.shape[2] + img = np.zeros((h * size[0], w * size[1], 3)) + for idx, image in enumerate(images): + i = idx % size[1] + j = idx // size[1] + img[j*h:j*h+h, i*w:i*w+w, :] = image + return img def transform(image, npx=64, is_crop=True, resize_w=64): if is_crop: @@ -27,8 +28,8 @@ def transform(image, npx=64, is_crop=True, resize_w=64): cropped_image = image return np.array(cropped_image)/127.5 - 1. -# def inverse_transform(images): -# return (images+1.)/2. +def inverse_transform(images): + return (images+1.)/2. def imread(path, is_grayscale = False): if (is_grayscale): @@ -36,11 +37,11 @@ def imread(path, is_grayscale = False): else: return scipy.misc.imread(path).astype(np.float) -# def imsave(images, size, path): -# return scipy.misc.imsave(path, merge(images, size)) +def imsave(images, size, path): + return scipy.misc.imsave(path, merge(images, size)) def get_image(image_path, image_size, is_crop=True, resize_w=64, is_grayscale = False): return transform(imread(image_path, is_grayscale), image_size, is_crop, resize_w) -# def save_images(images, size, image_path): -# return imsave(inverse_transform(images), size, image_path) +def save_images(images, size, image_path): + return imsave(inverse_transform(images), size, image_path)