From e9b7c8424fd3cc1eae9f2e1a76284d4858da747d Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Fri, 1 Jun 2018 11:59:23 -0700 Subject: [PATCH] Initial commit --- .gitignore | 12 + .travis.yml | 96 +++ CONTRIBUTING.md | 0 ISSUE_TEMPLATE.md | 0 LICENSE | 34 + README.md | 20 + keras_applications/__init__.py | 54 ++ keras_applications/densenet.py | 346 +++++++++++ keras_applications/imagenet_utils.py | 330 ++++++++++ keras_applications/inception_resnet_v2.py | 366 +++++++++++ keras_applications/inception_v3.py | 405 ++++++++++++ keras_applications/mobilenet.py | 472 ++++++++++++++ keras_applications/mobilenet_v2.py | 512 +++++++++++++++ keras_applications/nasnet.py | 725 ++++++++++++++++++++++ keras_applications/resnet50.py | 280 +++++++++ keras_applications/vgg16.py | 219 +++++++ keras_applications/vgg19.py | 231 +++++++ keras_applications/xception.py | 338 ++++++++++ pytest.ini | 18 + setup.py | 55 ++ tests/applications_test.py | 187 ++++++ tests/imagenet_utils_test.py | 252 ++++++++ tests/integration_tests.py | 1 + 23 files changed, 4953 insertions(+) create mode 100644 .gitignore create mode 100644 .travis.yml create mode 100644 CONTRIBUTING.md create mode 100644 ISSUE_TEMPLATE.md create mode 100644 LICENSE create mode 100644 README.md create mode 100644 keras_applications/__init__.py create mode 100644 keras_applications/densenet.py create mode 100644 keras_applications/imagenet_utils.py create mode 100644 keras_applications/inception_resnet_v2.py create mode 100644 keras_applications/inception_v3.py create mode 100644 keras_applications/mobilenet.py create mode 100644 keras_applications/mobilenet_v2.py create mode 100644 keras_applications/nasnet.py create mode 100644 keras_applications/resnet50.py create mode 100644 keras_applications/vgg16.py create mode 100644 keras_applications/vgg19.py create mode 100644 keras_applications/xception.py create mode 100644 pytest.ini create mode 100644 setup.py create mode 100644 tests/applications_test.py create mode 100644 tests/imagenet_utils_test.py create mode 100644 tests/integration_tests.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0ed3812 --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +*.DS_Store +*.pyc +temp/* +dist/* +build/* +tags +Keras_Applications.egg-info + +# test-related +.coverage +.cache +.pytest_cache diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..84124de --- /dev/null +++ b/.travis.yml @@ -0,0 +1,96 @@ +sudo: required +dist: trusty +language: python +matrix: + include: + - python: 2.7 + env: KERAS_BACKEND=tensorflow TEST_MODE=PEP8 + - python: 2.7 + env: KERAS_BACKEND=tensorflow TEST_MODE=INTEGRATION_TESTS + - python: 3.6 + env: KERAS_BACKEND=tensorflow TEST_MODE=DOC + - python: 2.7 + env: KERAS_BACKEND=tensorflow + - python: 3.6 + env: KERAS_BACKEND=tensorflow + - python: 2.7 + env: KERAS_BACKEND=theano THEANO_FLAGS=optimizer=fast_compile + - python: 3.6 + env: KERAS_BACKEND=theano THEANO_FLAGS=optimizer=fast_compile + - python: 2.7 + env: KERAS_BACKEND=cntk PYTHONWARNINGS=ignore + - python: 3.6 + env: KERAS_BACKEND=cntk PYTHONWARNINGS=ignore +install: + # code below is taken from http://conda.pydata.org/docs/travis.html + # We do this conditionally because it saves us some downloading if the + # version is the same. + - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then + wget https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh; + else + wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; + fi + - bash miniconda.sh -b -p $HOME/miniconda + - export PATH="$HOME/miniconda/bin:$PATH" + - hash -r + - conda config --set always_yes yes --set changeps1 no + - conda update -q conda + # Useful for debugging any issues with conda + - conda info -a + + - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION pytest pandas + - source activate test-environment + - pip install --only-binary=numpy,scipy numpy nose scipy matplotlib h5py theano + - conda install mkl mkl-service + + # set library path + - export LD_LIBRARY_PATH=$HOME/miniconda/envs/test-environment/lib/:$LD_LIBRARY_PATH + + # install PIL + - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then + conda install pil; + elif [[ "$TRAVIS_PYTHON_VERSION" == "3.6" ]]; then + conda install Pillow; + fi + + - pip install -e .[tests] + + # install TensorFlow (CPU version). + - pip install tensorflow==1.7 + + # install cntk + - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then + pip install https://cntk.ai/PythonWheel/CPU-Only/cntk-2.3.1-cp27-cp27mu-linux_x86_64.whl; + elif [[ "$TRAVIS_PYTHON_VERSION" == "3.6" ]]; then + pip install https://cntk.ai/PythonWheel/CPU-Only/cntk-2.3.1-cp36-cp36m-linux_x86_64.whl; + fi + + # install pydot for visualization tests + - conda install pydot graphviz + + #install open mpi + - rm -rf ~/mpi + - mkdir ~/mpi + - pushd ~/mpi + - wget http://cntk.ai/PythonWheel/ForKeras/depends/openmpi_1.10-3.zip + - unzip ./openmpi_1.10-3.zip + - sudo dpkg -i openmpi_1.10-3.deb + - popd + +# command to run tests +script: + - export MKL_THREADING_LAYER="GNU" + # run keras backend init to initialize backend config + - python -c "import keras.backend" + # create models directory to avoid concurrent directory creation at runtime + - mkdir ~/.keras/models + # set up keras backend + - sed -i -e 's/"backend":[[:space:]]*"[^"]*/"backend":\ "'$KERAS_BACKEND'/g' ~/.keras/keras.json; + - echo -e "Running tests with the following config:\n$(cat ~/.keras/keras.json)" + - if [[ "$TEST_MODE" == "INTEGRATION_TESTS" ]]; then + PYTHONPATH=$PWD:$PYTHONPATH py.test tests/integration_tests; + elif [[ "$TEST_MODE" == "PEP8" ]]; then + PYTHONPATH=$PWD:$PYTHONPATH py.test --pep8 -m pep8 -n0; + else + PYTHONPATH=$PWD:$PYTHONPATH py.test tests/ --ignore=tests/integration_tests --cov-config .coveragerc --cov=keras_applications tests/; + fi diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..e69de29 diff --git a/ISSUE_TEMPLATE.md b/ISSUE_TEMPLATE.md new file mode 100644 index 0000000..e69de29 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..3229a59 --- /dev/null +++ b/LICENSE @@ -0,0 +1,34 @@ +COPYRIGHT + +Copyright (c) 2016 - 2018, the respective contributors. +All rights reserved. + +Each contributor holds copyright over their respective contributions. +The project versioning (Git) records all such contribution source information. +The initial code of this repository came from https://github.com/keras-team/keras +(the Keras repository), hence, for author information regarding commits +that occured earlier than the first commit in the present repository, +please see the original Keras repository. + +LICENSE + +The MIT License (MIT) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/README.md b/README.md new file mode 100644 index 0000000..708ba34 --- /dev/null +++ b/README.md @@ -0,0 +1,20 @@ +# Keras Applications + +[![Build Status](https://travis-ci.org/keras-team/keras-applications.svg?branch=master)](https://travis-ci.org/keras-team/keras-applications) + +Keras Applications is the `applications` module of +the Keras deep learning library. +It provides model definitions and pre-trained weights for a number +of popular archictures, such as VGG16, ResNet50, Xception, MobileNet, and more. + +Read the documentation at: https://keras.io/applications/ + +Keras Applications may be imported directly +from an up-to-date installation of Keras: + +``` +from keras import applications +``` + +Keras Applications is compatible with Python 2.7-3.6 +and is distributed under the MIT license. diff --git a/keras_applications/__init__.py b/keras_applications/__init__.py new file mode 100644 index 0000000..1241b40 --- /dev/null +++ b/keras_applications/__init__.py @@ -0,0 +1,54 @@ +"""Enables dynamic setting of underlying Keras module. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +_KERAS_BACKEND = None +_KERAS_ENGINE = None +_KERAS_LAYERS = None +_KERAS_MODELS = None +_KERAS_UTILS = None + + +def set_keras_submodules(backend, engine, layers, models, utils): + global _KERAS_BACKEND + global _KERAS_ENGINE + global _KERAS_LAYERS + global _KERAS_MODELS + global _KERAS_UTILS + _KERAS_BACKEND = backend + _KERAS_ENGINE = engine + _KERAS_LAYERS = layers + _KERAS_MODELS = models + _KERAS_UTILS = utils + + +def get_keras_submodule(name): + if name not in {'backend', 'engine', 'layers', 'models', 'utils'}: + raise ImportError( + 'Can only retrieve one of "backend", ' + '"engine", "layers", "models", or "utils". ' + 'Requested: %s' % name) + if _KERAS_BACKEND is None: + raise ImportError('You need to first `import keras` ' + 'in order to use `keras_applications`. ' + 'For instance, you can do:\n\n' + '```\n' + 'import keras\n' + 'from keras_applications import vgg16\n' + '```\n\n' + 'Or, preferably, this equivalent formulation:\n\n' + '```\n' + 'from keras import applications\n' + '```\n') + if name == 'backend': + return _KERAS_BACKEND + elif name == 'engine': + return _KERAS_ENGINE + elif name == 'layers': + return _KERAS_LAYERS + elif name == 'models': + return _KERAS_MODELS + elif name == 'utils': + return _KERAS_UTILS diff --git a/keras_applications/densenet.py b/keras_applications/densenet.py new file mode 100644 index 0000000..4a81f4e --- /dev/null +++ b/keras_applications/densenet.py @@ -0,0 +1,346 @@ +"""DenseNet models for Keras. + +# Reference paper + +- [Densely Connected Convolutional Networks] + (https://arxiv.org/abs/1608.06993) (CVPR 2017 Best Paper Award) + +# Reference implementation + +- [Torch DenseNets] + (https://github.com/liuzhuang13/DenseNet/blob/master/models/densenet.lua) +- [TensorNets] + (https://github.com/taehoonlee/tensornets/blob/master/tensornets/densenets.py) +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from . import get_keras_submodule + +backend = get_keras_submodule('backend') +engine = get_keras_submodule('engine') +layers = get_keras_submodule('layers') +models = get_keras_submodule('models') +keras_utils = get_keras_submodule('utils') + +from . import imagenet_utils +from .imagenet_utils import decode_predictions +from .imagenet_utils import _obtain_input_shape + + +BASE_WEIGTHS_PATH = ( + 'https://github.com/fchollet/deep-learning-models/' + 'releases/download/v0.8/') +DENSENET121_WEIGHT_PATH = ( + BASE_WEIGTHS_PATH + + 'densenet121_weights_tf_dim_ordering_tf_kernels.h5') +DENSENET121_WEIGHT_PATH_NO_TOP = ( + BASE_WEIGTHS_PATH + + 'densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5') +DENSENET169_WEIGHT_PATH = ( + BASE_WEIGTHS_PATH + + 'densenet169_weights_tf_dim_ordering_tf_kernels.h5') +DENSENET169_WEIGHT_PATH_NO_TOP = ( + BASE_WEIGTHS_PATH + + 'densenet169_weights_tf_dim_ordering_tf_kernels_notop.h5') +DENSENET201_WEIGHT_PATH = ( + BASE_WEIGTHS_PATH + + 'densenet201_weights_tf_dim_ordering_tf_kernels.h5') +DENSENET201_WEIGHT_PATH_NO_TOP = ( + BASE_WEIGTHS_PATH + + 'densenet201_weights_tf_dim_ordering_tf_kernels_notop.h5') + + +def dense_block(x, blocks, name): + """A dense block. + + # Arguments + x: input tensor. + blocks: integer, the number of building blocks. + name: string, block label. + + # Returns + output tensor for the block. + """ + for i in range(blocks): + x = conv_block(x, 32, name=name + '_block' + str(i + 1)) + return x + + +def transition_block(x, reduction, name): + """A transition block. + + # Arguments + x: input tensor. + reduction: float, compression rate at transition layers. + name: string, block label. + + # Returns + output tensor for the block. + """ + bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 + x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, + name=name + '_bn')(x) + x = layers.Activation('relu', name=name + '_relu')(x) + x = layers.Conv2D(int(backend.int_shape(x)[bn_axis] * reduction), 1, + use_bias=False, + name=name + '_conv')(x) + x = layers.AveragePooling2D(2, strides=2, name=name + '_pool')(x) + return x + + +def conv_block(x, growth_rate, name): + """A building block for a dense block. + + # Arguments + x: input tensor. + growth_rate: float, growth rate at dense layers. + name: string, block label. + + # Returns + Output tensor for the block. + """ + bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 + x1 = layers.BatchNormalization(axis=bn_axis, + epsilon=1.001e-5, + name=name + '_0_bn')(x) + x1 = layers.Activation('relu', name=name + '_0_relu')(x1) + x1 = layers.Conv2D(4 * growth_rate, 1, + use_bias=False, + name=name + '_1_conv')(x1) + x1 = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, + name=name + '_1_bn')(x1) + x1 = layers.Activation('relu', name=name + '_1_relu')(x1) + x1 = layers.Conv2D(growth_rate, 3, + padding='same', + use_bias=False, + name=name + '_2_conv')(x1) + x = layers.Concatenate(axis=bn_axis, name=name + '_concat')([x, x1]) + return x + + +def DenseNet(blocks, + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000): + """Instantiates the DenseNet architecture. + + Optionally loads weights pre-trained on ImageNet. + Note that the data format convention used by the model is + the one specified in your Keras config at `~/.keras/keras.json`. + + # Arguments + blocks: numbers of building blocks for the four dense layers. + include_top: whether to include the fully-connected + layer at the top of the network. + weights: one of `None` (random initialization), + 'imagenet' (pre-training on ImageNet), + or the path to the weights file to be loaded. + input_tensor: optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: optional shape tuple, only to be specified + if `include_top` is False (otherwise the input shape + has to be `(224, 224, 3)` (with `channels_last` data format) + or `(3, 224, 224)` (with `channels_first` data format). + It should have exactly 3 inputs channels. + pooling: optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. + + # Returns + A Keras model instance. + + # Raises + ValueError: in case of invalid argument for `weights`, + or invalid input shape. + """ + if not (weights in {'imagenet', None} or os.path.exists(weights)): + raise ValueError('The `weights` argument should be either ' + '`None` (random initialization), `imagenet` ' + '(pre-training on ImageNet), ' + 'or the path to the weights file to be loaded.') + + if weights == 'imagenet' and include_top and classes != 1000: + raise ValueError('If using `weights` as imagenet with `include_top`' + ' as true, `classes` should be 1000') + + # Determine proper input shape + input_shape = _obtain_input_shape(input_shape, + default_size=224, + min_size=221, + data_format=backend.image_data_format(), + require_flatten=include_top, + weights=weights) + + if input_tensor is None: + img_input = layers.Input(shape=input_shape) + else: + if not backend.is_keras_tensor(input_tensor): + img_input = layers.Input(tensor=input_tensor, shape=input_shape) + else: + img_input = input_tensor + + bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 + + x = layers.ZeroPadding2D(padding=((3, 3), (3, 3)))(img_input) + x = layers.Conv2D(64, 7, strides=2, use_bias=False, name='conv1/conv')(x) + x = layers.BatchNormalization( + axis=bn_axis, epsilon=1.001e-5, name='conv1/bn')(x) + x = layers.Activation('relu', name='conv1/relu')(x) + x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)))(x) + x = layers.MaxPooling2D(3, strides=2, name='pool1')(x) + + x = dense_block(x, blocks[0], name='conv2') + x = transition_block(x, 0.5, name='pool2') + x = dense_block(x, blocks[1], name='conv3') + x = transition_block(x, 0.5, name='pool3') + x = dense_block(x, blocks[2], name='conv4') + x = transition_block(x, 0.5, name='pool4') + x = dense_block(x, blocks[3], name='conv5') + + x = layers.BatchNormalization( + axis=bn_axis, epsilon=1.001e-5, name='bn')(x) + + if include_top: + x = layers.GlobalAveragePooling2D(name='avg_pool')(x) + x = layers.Dense(classes, activation='softmax', name='fc1000')(x) + else: + if pooling == 'avg': + x = layers.GlobalAveragePooling2D(name='avg_pool')(x) + elif pooling == 'max': + x = layers.GlobalMaxPooling2D(name='max_pool')(x) + + # Ensure that the model takes into account + # any potential predecessors of `input_tensor`. + if input_tensor is not None: + inputs = engine.get_source_inputs(input_tensor) + else: + inputs = img_input + + # Create model. + if blocks == [6, 12, 24, 16]: + model = models.Model(inputs, x, name='densenet121') + elif blocks == [6, 12, 32, 32]: + model = models.Model(inputs, x, name='densenet169') + elif blocks == [6, 12, 48, 32]: + model = models.Model(inputs, x, name='densenet201') + else: + model = models.Model(inputs, x, name='densenet') + + # Load weights. + if weights == 'imagenet': + if include_top: + if blocks == [6, 12, 24, 16]: + weights_path = keras_utils.get_file( + 'densenet121_weights_tf_dim_ordering_tf_kernels.h5', + DENSENET121_WEIGHT_PATH, + cache_subdir='models', + file_hash='0962ca643bae20f9b6771cb844dca3b0') + elif blocks == [6, 12, 32, 32]: + weights_path = keras_utils.get_file( + 'densenet169_weights_tf_dim_ordering_tf_kernels.h5', + DENSENET169_WEIGHT_PATH, + cache_subdir='models', + file_hash='bcf9965cf5064a5f9eb6d7dc69386f43') + elif blocks == [6, 12, 48, 32]: + weights_path = keras_utils.get_file( + 'densenet201_weights_tf_dim_ordering_tf_kernels.h5', + DENSENET201_WEIGHT_PATH, + cache_subdir='models', + file_hash='7bb75edd58cb43163be7e0005fbe95ef') + else: + if blocks == [6, 12, 24, 16]: + weights_path = keras_utils.get_file( + 'densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5', + DENSENET121_WEIGHT_PATH_NO_TOP, + cache_subdir='models', + file_hash='4912a53fbd2a69346e7f2c0b5ec8c6d3') + elif blocks == [6, 12, 32, 32]: + weights_path = keras_utils.get_file( + 'densenet169_weights_tf_dim_ordering_tf_kernels_notop.h5', + DENSENET169_WEIGHT_PATH_NO_TOP, + cache_subdir='models', + file_hash='50662582284e4cf834ce40ab4dfa58c6') + elif blocks == [6, 12, 48, 32]: + weights_path = keras_utils.get_file( + 'densenet201_weights_tf_dim_ordering_tf_kernels_notop.h5', + DENSENET201_WEIGHT_PATH_NO_TOP, + cache_subdir='models', + file_hash='1c2de60ee40562448dbac34a0737e798') + model.load_weights(weights_path) + elif weights is not None: + model.load_weights(weights) + + return model + + +def DenseNet121(include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000): + return DenseNet([6, 12, 24, 16], + include_top, weights, + input_tensor, input_shape, + pooling, classes) + + +def DenseNet169(include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000): + return DenseNet([6, 12, 32, 32], + include_top, weights, + input_tensor, input_shape, + pooling, classes) + + +def DenseNet201(include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000): + return DenseNet([6, 12, 48, 32], + include_top, weights, + input_tensor, input_shape, + pooling, classes) + + +def preprocess_input(x, data_format=None): + """Preprocesses a numpy array encoding a batch of images. + + # Arguments + x: a 3D or 4D numpy array consists of RGB values within [0, 255]. + data_format: data format of the image tensor. + + # Returns + Preprocessed array. + """ + return imagenet_utils.preprocess_input(x, data_format, mode='torch') + + +setattr(DenseNet121, '__doc__', DenseNet.__doc__) +setattr(DenseNet169, '__doc__', DenseNet.__doc__) +setattr(DenseNet201, '__doc__', DenseNet.__doc__) diff --git a/keras_applications/imagenet_utils.py b/keras_applications/imagenet_utils.py new file mode 100644 index 0000000..c7d4d13 --- /dev/null +++ b/keras_applications/imagenet_utils.py @@ -0,0 +1,330 @@ +"""Utilities for ImageNet data preprocessing & prediction decoding. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json +import warnings +import numpy as np + +from . import get_keras_submodule + +backend = get_keras_submodule('backend') +keras_utils = get_keras_submodule('utils') + +CLASS_INDEX = None +CLASS_INDEX_PATH = ('https://s3.amazonaws.com/deep-learning-models/' + 'image-models/imagenet_class_index.json') + +# Global tensor of imagenet mean for preprocessing symbolic inputs +_IMAGENET_MEAN = None + + +def _preprocess_numpy_input(x, data_format, mode): + """Preprocesses a Numpy array encoding a batch of images. + + # Arguments + x: Input array, 3D or 4D. + data_format: Data format of the image array. + mode: One of "caffe", "tf" or "torch". + - caffe: will convert the images from RGB to BGR, + then will zero-center each color channel with + respect to the ImageNet dataset, + without scaling. + - tf: will scale pixels between -1 and 1, + sample-wise. + - torch: will scale pixels between 0 and 1 and then + will normalize each channel with respect to the + ImageNet dataset. + + # Returns + Preprocessed Numpy array. + """ + if not issubclass(x.dtype.type, np.floating): + x = x.astype(backend.floatx(), copy=False) + + if mode == 'tf': + x /= 127.5 + x -= 1. + return x + + if mode == 'torch': + x /= 255. + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + else: + if data_format == 'channels_first': + # 'RGB'->'BGR' + if x.ndim == 3: + x = x[::-1, ...] + else: + x = x[:, ::-1, ...] + else: + # 'RGB'->'BGR' + x = x[..., ::-1] + mean = [103.939, 116.779, 123.68] + std = None + + # Zero-center by mean pixel + if data_format == 'channels_first': + if x.ndim == 3: + x[0, :, :] -= mean[0] + x[1, :, :] -= mean[1] + x[2, :, :] -= mean[2] + if std is not None: + x[0, :, :] /= std[0] + x[1, :, :] /= std[1] + x[2, :, :] /= std[2] + else: + x[:, 0, :, :] -= mean[0] + x[:, 1, :, :] -= mean[1] + x[:, 2, :, :] -= mean[2] + if std is not None: + x[:, 0, :, :] /= std[0] + x[:, 1, :, :] /= std[1] + x[:, 2, :, :] /= std[2] + else: + x[..., 0] -= mean[0] + x[..., 1] -= mean[1] + x[..., 2] -= mean[2] + if std is not None: + x[..., 0] /= std[0] + x[..., 1] /= std[1] + x[..., 2] /= std[2] + return x + + +def _preprocess_symbolic_input(x, data_format, mode): + """Preprocesses a tensor encoding a batch of images. + + # Arguments + x: Input tensor, 3D or 4D. + data_format: Data format of the image tensor. + mode: One of "caffe", "tf" or "torch". + - caffe: will convert the images from RGB to BGR, + then will zero-center each color channel with + respect to the ImageNet dataset, + without scaling. + - tf: will scale pixels between -1 and 1, + sample-wise. + - torch: will scale pixels between 0 and 1 and then + will normalize each channel with respect to the + ImageNet dataset. + + # Returns + Preprocessed tensor. + """ + global _IMAGENET_MEAN + + if mode == 'tf': + x /= 127.5 + x -= 1. + return x + + if mode == 'torch': + x /= 255. + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + else: + if data_format == 'channels_first': + # 'RGB'->'BGR' + if backend.ndim(x) == 3: + x = x[::-1, ...] + else: + x = x[:, ::-1, ...] + else: + # 'RGB'->'BGR' + x = x[..., ::-1] + mean = [103.939, 116.779, 123.68] + std = None + + if _IMAGENET_MEAN is None: + _IMAGENET_MEAN = backend.constant(-np.array(mean)) + + # Zero-center by mean pixel + if backend.dtype(x) != backend.dtype(_IMAGENET_MEAN): + x = backend.bias_add( + x, backend.cast(_IMAGENET_MEAN, backend.dtype(x)), + data_format=data_format) + else: + x = backend.bias_add(x, _IMAGENET_MEAN, data_format) + if std is not None: + x /= std + return x + + +def preprocess_input(x, data_format=None, mode='caffe'): + """Preprocesses a tensor or Numpy array encoding a batch of images. + + # Arguments + x: Input Numpy or symbolic tensor, 3D or 4D. + The preprocessed data is written over the input data + if the data types are compatible. To avoid this + behaviour, `numpy.copy(x)` can be used. + data_format: Data format of the image tensor/array. + mode: One of "caffe", "tf" or "torch". + - caffe: will convert the images from RGB to BGR, + then will zero-center each color channel with + respect to the ImageNet dataset, + without scaling. + - tf: will scale pixels between -1 and 1, + sample-wise. + - torch: will scale pixels between 0 and 1 and then + will normalize each channel with respect to the + ImageNet dataset. + + # Returns + Preprocessed tensor or Numpy array. + + # Raises + ValueError: In case of unknown `data_format` argument. + """ + if data_format is None: + data_format = backend.image_data_format() + if data_format not in {'channels_first', 'channels_last'}: + raise ValueError('Unknown data_format ' + str(data_format)) + + if isinstance(x, np.ndarray): + return _preprocess_numpy_input(x, data_format=data_format, mode=mode) + else: + return _preprocess_symbolic_input(x, data_format=data_format, + mode=mode) + + +def decode_predictions(preds, top=5): + """Decodes the prediction of an ImageNet model. + + # Arguments + preds: Numpy tensor encoding a batch of predictions. + top: Integer, how many top-guesses to return. + + # Returns + A list of lists of top class prediction tuples + `(class_name, class_description, score)`. + One list of tuples per sample in batch input. + + # Raises + ValueError: In case of invalid shape of the `pred` array + (must be 2D). + """ + global CLASS_INDEX + if len(preds.shape) != 2 or preds.shape[1] != 1000: + raise ValueError('`decode_predictions` expects ' + 'a batch of predictions ' + '(i.e. a 2D array of shape (samples, 1000)). ' + 'Found array with shape: ' + str(preds.shape)) + if CLASS_INDEX is None: + fpath = keras_utils.get_file( + 'imagenet_class_index.json', + CLASS_INDEX_PATH, + cache_subdir='models', + file_hash='c2c37ea517e94d9795004a39431a14cb') + with open(fpath) as f: + CLASS_INDEX = json.load(f) + results = [] + for pred in preds: + top_indices = pred.argsort()[-top:][::-1] + result = [tuple(CLASS_INDEX[str(i)]) + (pred[i],) for i in top_indices] + result.sort(key=lambda x: x[2], reverse=True) + results.append(result) + return results + + +def _obtain_input_shape(input_shape, + default_size, + min_size, + data_format, + require_flatten, + weights=None): + """Internal utility to compute/validate a model's input shape. + + # Arguments + input_shape: Either None (will return the default network input shape), + or a user-provided shape to be validated. + default_size: Default input width/height for the model. + min_size: Minimum input width/height accepted by the model. + data_format: Image data format to use. + require_flatten: Whether the model is expected to + be linked to a classifier via a Flatten layer. + weights: One of `None` (random initialization) + or 'imagenet' (pre-training on ImageNet). + If weights='imagenet' input channels must be equal to 3. + + # Returns + An integer shape tuple (may include None entries). + + # Raises + ValueError: In case of invalid argument values. + """ + if weights != 'imagenet' and input_shape and len(input_shape) == 3: + if data_format == 'channels_first': + if input_shape[0] not in {1, 3}: + warnings.warn( + 'This model usually expects 1 or 3 input channels. ' + 'However, it was passed an input_shape with ' + + str(input_shape[0]) + ' input channels.') + default_shape = (input_shape[0], default_size, default_size) + else: + if input_shape[-1] not in {1, 3}: + warnings.warn( + 'This model usually expects 1 or 3 input channels. ' + 'However, it was passed an input_shape with ' + + str(input_shape[-1]) + ' input channels.') + default_shape = (default_size, default_size, input_shape[-1]) + else: + if data_format == 'channels_first': + default_shape = (3, default_size, default_size) + else: + default_shape = (default_size, default_size, 3) + if weights == 'imagenet' and require_flatten: + if input_shape is not None: + if input_shape != default_shape: + raise ValueError('When setting`include_top=True` ' + 'and loading `imagenet` weights, ' + '`input_shape` should be ' + + str(default_shape) + '.') + return default_shape + if input_shape: + if data_format == 'channels_first': + if input_shape is not None: + if len(input_shape) != 3: + raise ValueError( + '`input_shape` must be a tuple of three integers.') + if input_shape[0] != 3 and weights == 'imagenet': + raise ValueError('The input must have 3 channels; got ' + '`input_shape=' + str(input_shape) + '`') + if ((input_shape[1] is not None and input_shape[1] < min_size) or + (input_shape[2] is not None and input_shape[2] < min_size)): + raise ValueError('Input size must be at least ' + + str(min_size) + 'x' + str(min_size) + + '; got `input_shape=' + + str(input_shape) + '`') + else: + if input_shape is not None: + if len(input_shape) != 3: + raise ValueError( + '`input_shape` must be a tuple of three integers.') + if input_shape[-1] != 3 and weights == 'imagenet': + raise ValueError('The input must have 3 channels; got ' + '`input_shape=' + str(input_shape) + '`') + if ((input_shape[0] is not None and input_shape[0] < min_size) or + (input_shape[1] is not None and input_shape[1] < min_size)): + raise ValueError('Input size must be at least ' + + str(min_size) + 'x' + str(min_size) + + '; got `input_shape=' + + str(input_shape) + '`') + else: + if require_flatten: + input_shape = default_shape + else: + if data_format == 'channels_first': + input_shape = (3, None, None) + else: + input_shape = (None, None, 3) + if require_flatten: + if None in input_shape: + raise ValueError('If `include_top` is True, ' + 'you should specify a static `input_shape`. ' + 'Got `input_shape=' + str(input_shape) + '`') + return input_shape diff --git a/keras_applications/inception_resnet_v2.py b/keras_applications/inception_resnet_v2.py new file mode 100644 index 0000000..96e1239 --- /dev/null +++ b/keras_applications/inception_resnet_v2.py @@ -0,0 +1,366 @@ +"""Inception-ResNet V2 model for Keras. + +Model naming and structure follows TF-slim implementation +(which has some additional layers and different number of +filters from the original arXiv paper): +https://github.com/tensorflow/models/blob/master/research/slim/nets/inception_resnet_v2.py + +Pre-trained ImageNet weights are also converted from TF-slim, +which can be found in: +https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models + +# Reference +- [Inception-v4, Inception-ResNet and the Impact of + Residual Connections on Learning](https://arxiv.org/abs/1602.07261) + +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from . import get_keras_submodule + +backend = get_keras_submodule('backend') +engine = get_keras_submodule('engine') +layers = get_keras_submodule('layers') +models = get_keras_submodule('models') +keras_utils = get_keras_submodule('utils') + +from . import imagenet_utils +from .imagenet_utils import decode_predictions +from .imagenet_utils import _obtain_input_shape + + +BASE_WEIGHT_URL = ('https://github.com/fchollet/deep-learning-models/' + 'releases/download/v0.7/') + + +def preprocess_input(x): + """Preprocesses a numpy array encoding a batch of images. + + # Arguments + x: a 4D numpy array consists of RGB values within [0, 255]. + + # Returns + Preprocessed array. + """ + return imagenet_utils.preprocess_input(x, mode='tf') + + +def conv2d_bn(x, + filters, + kernel_size, + strides=1, + padding='same', + activation='relu', + use_bias=False, + name=None): + """Utility function to apply conv + BN. + + # Arguments + x: input tensor. + filters: filters in `Conv2D`. + kernel_size: kernel size as in `Conv2D`. + strides: strides in `Conv2D`. + padding: padding mode in `Conv2D`. + activation: activation in `Conv2D`. + use_bias: whether to use a bias in `Conv2D`. + name: name of the ops; will become `name + '_ac'` for the activation + and `name + '_bn'` for the batch norm layer. + + # Returns + Output tensor after applying `Conv2D` and `BatchNormalization`. + """ + x = layers.Conv2D(filters, + kernel_size, + strides=strides, + padding=padding, + use_bias=use_bias, + name=name)(x) + if not use_bias: + bn_axis = 1 if backend.image_data_format() == 'channels_first' else 3 + bn_name = None if name is None else name + '_bn' + x = layers.BatchNormalization(axis=bn_axis, + scale=False, + name=bn_name)(x) + if activation is not None: + ac_name = None if name is None else name + '_ac' + x = layers.Activation(activation, name=ac_name)(x) + return x + + +def inception_resnet_block(x, scale, block_type, block_idx, activation='relu'): + """Adds a Inception-ResNet block. + + This function builds 3 types of Inception-ResNet blocks mentioned + in the paper, controlled by the `block_type` argument (which is the + block name used in the official TF-slim implementation): + - Inception-ResNet-A: `block_type='block35'` + - Inception-ResNet-B: `block_type='block17'` + - Inception-ResNet-C: `block_type='block8'` + + # Arguments + x: input tensor. + scale: scaling factor to scale the residuals (i.e., the output of + passing `x` through an inception module) before adding them + to the shortcut branch. + Let `r` be the output from the residual branch, + the output of this block will be `x + scale * r`. + block_type: `'block35'`, `'block17'` or `'block8'`, determines + the network structure in the residual branch. + block_idx: an `int` used for generating layer names. + The Inception-ResNet blocks + are repeated many times in this network. + We use `block_idx` to identify + each of the repetitions. For example, + the first Inception-ResNet-A block + will have `block_type='block35', block_idx=0`, + and the layer names will have + a common prefix `'block35_0'`. + activation: activation function to use at the end of the block + (see [activations](../activations.md)). + When `activation=None`, no activation is applied + (i.e., "linear" activation: `a(x) = x`). + + # Returns + Output tensor for the block. + + # Raises + ValueError: if `block_type` is not one of `'block35'`, + `'block17'` or `'block8'`. + """ + if block_type == 'block35': + branch_0 = conv2d_bn(x, 32, 1) + branch_1 = conv2d_bn(x, 32, 1) + branch_1 = conv2d_bn(branch_1, 32, 3) + branch_2 = conv2d_bn(x, 32, 1) + branch_2 = conv2d_bn(branch_2, 48, 3) + branch_2 = conv2d_bn(branch_2, 64, 3) + branches = [branch_0, branch_1, branch_2] + elif block_type == 'block17': + branch_0 = conv2d_bn(x, 192, 1) + branch_1 = conv2d_bn(x, 128, 1) + branch_1 = conv2d_bn(branch_1, 160, [1, 7]) + branch_1 = conv2d_bn(branch_1, 192, [7, 1]) + branches = [branch_0, branch_1] + elif block_type == 'block8': + branch_0 = conv2d_bn(x, 192, 1) + branch_1 = conv2d_bn(x, 192, 1) + branch_1 = conv2d_bn(branch_1, 224, [1, 3]) + branch_1 = conv2d_bn(branch_1, 256, [3, 1]) + branches = [branch_0, branch_1] + else: + raise ValueError('Unknown Inception-ResNet block type. ' + 'Expects "block35", "block17" or "block8", ' + 'but got: ' + str(block_type)) + + block_name = block_type + '_' + str(block_idx) + channel_axis = 1 if backend.image_data_format() == 'channels_first' else 3 + mixed = layers.Concatenate( + axis=channel_axis, name=block_name + '_mixed')(branches) + up = conv2d_bn(mixed, + backend.int_shape(x)[channel_axis], + 1, + activation=None, + use_bias=True, + name=block_name + '_conv') + + x = layers.Lambda(lambda inputs, scale: inputs[0] + inputs[1] * scale, + output_shape=backend.int_shape(x)[1:], + arguments={'scale': scale}, + name=block_name)([x, up]) + if activation is not None: + x = layers.Activation(activation, name=block_name + '_ac')(x) + return x + + +def InceptionResNetV2(include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000): + """Instantiates the Inception-ResNet v2 architecture. + + Optionally loads weights pre-trained on ImageNet. + Note that the data format convention used by the model is + the one specified in your Keras config at `~/.keras/keras.json`. + + # Arguments + include_top: whether to include the fully-connected + layer at the top of the network. + weights: one of `None` (random initialization), + 'imagenet' (pre-training on ImageNet), + or the path to the weights file to be loaded. + input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: optional shape tuple, only to be specified + if `include_top` is `False` (otherwise the input shape + has to be `(299, 299, 3)` (with `'channels_last'` data format) + or `(3, 299, 299)` (with `'channels_first'` data format). + It should have exactly 3 inputs channels, + and width and height should be no smaller than 139. + E.g. `(150, 150, 3)` would be one valid value. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model will be + the 4D tensor output of the last convolutional layer. + - `'avg'` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `'max'` means that global max pooling will be applied. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is `True`, and + if no `weights` argument is specified. + + # Returns + A Keras `Model` instance. + + # Raises + ValueError: in case of invalid argument for `weights`, + or invalid input shape. + """ + if not (weights in {'imagenet', None} or os.path.exists(weights)): + raise ValueError('The `weights` argument should be either ' + '`None` (random initialization), `imagenet` ' + '(pre-training on ImageNet), ' + 'or the path to the weights file to be loaded.') + + if weights == 'imagenet' and include_top and classes != 1000: + raise ValueError('If using `weights` as imagenet with `include_top`' + ' as true, `classes` should be 1000') + + # Determine proper input shape + input_shape = _obtain_input_shape( + input_shape, + default_size=299, + min_size=139, + data_format=backend.image_data_format(), + require_flatten=False, + weights=weights) + + if input_tensor is None: + img_input = layers.Input(shape=input_shape) + else: + if not backend.is_keras_tensor(input_tensor): + img_input = layers.Input(tensor=input_tensor, shape=input_shape) + else: + img_input = input_tensor + + # Stem block: 35 x 35 x 192 + x = conv2d_bn(img_input, 32, 3, strides=2, padding='valid') + x = conv2d_bn(x, 32, 3, padding='valid') + x = conv2d_bn(x, 64, 3) + x = layers.MaxPooling2D(3, strides=2)(x) + x = conv2d_bn(x, 80, 1, padding='valid') + x = conv2d_bn(x, 192, 3, padding='valid') + x = layers.MaxPooling2D(3, strides=2)(x) + + # Mixed 5b (Inception-A block): 35 x 35 x 320 + branch_0 = conv2d_bn(x, 96, 1) + branch_1 = conv2d_bn(x, 48, 1) + branch_1 = conv2d_bn(branch_1, 64, 5) + branch_2 = conv2d_bn(x, 64, 1) + branch_2 = conv2d_bn(branch_2, 96, 3) + branch_2 = conv2d_bn(branch_2, 96, 3) + branch_pool = layers.AveragePooling2D(3, strides=1, padding='same')(x) + branch_pool = conv2d_bn(branch_pool, 64, 1) + branches = [branch_0, branch_1, branch_2, branch_pool] + channel_axis = 1 if backend.image_data_format() == 'channels_first' else 3 + x = layers.Concatenate(axis=channel_axis, name='mixed_5b')(branches) + + # 10x block35 (Inception-ResNet-A block): 35 x 35 x 320 + for block_idx in range(1, 11): + x = inception_resnet_block(x, + scale=0.17, + block_type='block35', + block_idx=block_idx) + + # Mixed 6a (Reduction-A block): 17 x 17 x 1088 + branch_0 = conv2d_bn(x, 384, 3, strides=2, padding='valid') + branch_1 = conv2d_bn(x, 256, 1) + branch_1 = conv2d_bn(branch_1, 256, 3) + branch_1 = conv2d_bn(branch_1, 384, 3, strides=2, padding='valid') + branch_pool = layers.MaxPooling2D(3, strides=2, padding='valid')(x) + branches = [branch_0, branch_1, branch_pool] + x = layers.Concatenate(axis=channel_axis, name='mixed_6a')(branches) + + # 20x block17 (Inception-ResNet-B block): 17 x 17 x 1088 + for block_idx in range(1, 21): + x = inception_resnet_block(x, + scale=0.1, + block_type='block17', + block_idx=block_idx) + + # Mixed 7a (Reduction-B block): 8 x 8 x 2080 + branch_0 = conv2d_bn(x, 256, 1) + branch_0 = conv2d_bn(branch_0, 384, 3, strides=2, padding='valid') + branch_1 = conv2d_bn(x, 256, 1) + branch_1 = conv2d_bn(branch_1, 288, 3, strides=2, padding='valid') + branch_2 = conv2d_bn(x, 256, 1) + branch_2 = conv2d_bn(branch_2, 288, 3) + branch_2 = conv2d_bn(branch_2, 320, 3, strides=2, padding='valid') + branch_pool = layers.MaxPooling2D(3, strides=2, padding='valid')(x) + branches = [branch_0, branch_1, branch_2, branch_pool] + x = layers.Concatenate(axis=channel_axis, name='mixed_7a')(branches) + + # 10x block8 (Inception-ResNet-C block): 8 x 8 x 2080 + for block_idx in range(1, 10): + x = inception_resnet_block(x, + scale=0.2, + block_type='block8', + block_idx=block_idx) + x = inception_resnet_block(x, + scale=1., + activation=None, + block_type='block8', + block_idx=10) + + # Final convolution block: 8 x 8 x 1536 + x = conv2d_bn(x, 1536, 1, name='conv_7b') + + if include_top: + # Classification block + x = layers.GlobalAveragePooling2D(name='avg_pool')(x) + x = layers.Dense(classes, activation='softmax', name='predictions')(x) + else: + if pooling == 'avg': + x = layers.GlobalAveragePooling2D()(x) + elif pooling == 'max': + x = layers.GlobalMaxPooling2D()(x) + + # Ensure that the model takes into account + # any potential predecessors of `input_tensor` + if input_tensor is not None: + inputs = engine.get_source_inputs(input_tensor) + else: + inputs = img_input + + # Create model + model = models.Model(inputs, x, name='inception_resnet_v2') + + # Load weights + if weights == 'imagenet': + if include_top: + fname = 'inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5' + weights_path = keras_utils.get_file( + fname, + BASE_WEIGHT_URL + fname, + cache_subdir='models', + file_hash='e693bd0210a403b3192acc6073ad2e96') + else: + fname = ('inception_resnet_v2_weights_' + 'tf_dim_ordering_tf_kernels_notop.h5') + weights_path = keras_utils.get_file( + fname, + BASE_WEIGHT_URL + fname, + cache_subdir='models', + file_hash='d19885ff4a710c122648d3b5c3b684e4') + model.load_weights(weights_path) + elif weights is not None: + model.load_weights(weights) + + return model diff --git a/keras_applications/inception_v3.py b/keras_applications/inception_v3.py new file mode 100644 index 0000000..97036fe --- /dev/null +++ b/keras_applications/inception_v3.py @@ -0,0 +1,405 @@ +"""Inception V3 model for Keras. + +Note that the input image format for this model is different than for +the VGG16 and ResNet models (299x299 instead of 224x224), +and that the input preprocessing function is also different (same as Xception). + +# Reference + +- [Rethinking the Inception Architecture for Computer Vision]( + http://arxiv.org/abs/1512.00567) + +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from . import get_keras_submodule + +backend = get_keras_submodule('backend') +engine = get_keras_submodule('engine') +layers = get_keras_submodule('layers') +models = get_keras_submodule('models') +keras_utils = get_keras_submodule('utils') + +from . import imagenet_utils +from .imagenet_utils import decode_predictions +from .imagenet_utils import _obtain_input_shape + + +WEIGHTS_PATH = ( + 'https://github.com/fchollet/deep-learning-models/' + 'releases/download/v0.5/' + 'inception_v3_weights_tf_dim_ordering_tf_kernels.h5') +WEIGHTS_PATH_NO_TOP = ( + 'https://github.com/fchollet/deep-learning-models/' + 'releases/download/v0.5/' + 'inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5') + + +def conv2d_bn(x, + filters, + num_row, + num_col, + padding='same', + strides=(1, 1), + name=None): + """Utility function to apply conv + BN. + + # Arguments + x: input tensor. + filters: filters in `Conv2D`. + num_row: height of the convolution kernel. + num_col: width of the convolution kernel. + padding: padding mode in `Conv2D`. + strides: strides in `Conv2D`. + name: name of the ops; will become `name + '_conv'` + for the convolution and `name + '_bn'` for the + batch norm layer. + + # Returns + Output tensor after applying `Conv2D` and `BatchNormalization`. + """ + if name is not None: + bn_name = name + '_bn' + conv_name = name + '_conv' + else: + bn_name = None + conv_name = None + if backend.image_data_format() == 'channels_first': + bn_axis = 1 + else: + bn_axis = 3 + x = layers.Conv2D( + filters, (num_row, num_col), + strides=strides, + padding=padding, + use_bias=False, + name=conv_name)(x) + x = layers.BatchNormalization(axis=bn_axis, scale=False, name=bn_name)(x) + x = layers.Activation('relu', name=name)(x) + return x + + +def InceptionV3(include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000): + """Instantiates the Inception v3 architecture. + + Optionally loads weights pre-trained on ImageNet. + Note that the data format convention used by the model is + the one specified in your Keras config at `~/.keras/keras.json`. + + # Arguments + include_top: whether to include the fully-connected + layer at the top of the network. + weights: one of `None` (random initialization), + 'imagenet' (pre-training on ImageNet), + or the path to the weights file to be loaded. + input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: optional shape tuple, only to be specified + if `include_top` is False (otherwise the input shape + has to be `(299, 299, 3)` (with `channels_last` data format) + or `(3, 299, 299)` (with `channels_first` data format). + It should have exactly 3 inputs channels, + and width and height should be no smaller than 139. + E.g. `(150, 150, 3)` would be one valid value. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. + + # Returns + A Keras model instance. + + # Raises + ValueError: in case of invalid argument for `weights`, + or invalid input shape. + """ + if not (weights in {'imagenet', None} or os.path.exists(weights)): + raise ValueError('The `weights` argument should be either ' + '`None` (random initialization), `imagenet` ' + '(pre-training on ImageNet), ' + 'or the path to the weights file to be loaded.') + + if weights == 'imagenet' and include_top and classes != 1000: + raise ValueError('If using `weights` as imagenet with `include_top`' + ' as true, `classes` should be 1000') + + # Determine proper input shape + input_shape = _obtain_input_shape( + input_shape, + default_size=299, + min_size=139, + data_format=backend.image_data_format(), + require_flatten=False, + weights=weights) + + if input_tensor is None: + img_input = layers.Input(shape=input_shape) + else: + if not backend.is_keras_tensor(input_tensor): + img_input = layers.Input(tensor=input_tensor, shape=input_shape) + else: + img_input = input_tensor + + if backend.image_data_format() == 'channels_first': + channel_axis = 1 + else: + channel_axis = 3 + + x = conv2d_bn(img_input, 32, 3, 3, strides=(2, 2), padding='valid') + x = conv2d_bn(x, 32, 3, 3, padding='valid') + x = conv2d_bn(x, 64, 3, 3) + x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x) + + x = conv2d_bn(x, 80, 1, 1, padding='valid') + x = conv2d_bn(x, 192, 3, 3, padding='valid') + x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x) + + # mixed 0, 1, 2: 35 x 35 x 256 + branch1x1 = conv2d_bn(x, 64, 1, 1) + + branch5x5 = conv2d_bn(x, 48, 1, 1) + branch5x5 = conv2d_bn(branch5x5, 64, 5, 5) + + branch3x3dbl = conv2d_bn(x, 64, 1, 1) + branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) + branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) + + branch_pool = layers.AveragePooling2D((3, 3), + strides=(1, 1), + padding='same')(x) + branch_pool = conv2d_bn(branch_pool, 32, 1, 1) + x = layers.concatenate( + [branch1x1, branch5x5, branch3x3dbl, branch_pool], + axis=channel_axis, + name='mixed0') + + # mixed 1: 35 x 35 x 256 + branch1x1 = conv2d_bn(x, 64, 1, 1) + + branch5x5 = conv2d_bn(x, 48, 1, 1) + branch5x5 = conv2d_bn(branch5x5, 64, 5, 5) + + branch3x3dbl = conv2d_bn(x, 64, 1, 1) + branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) + branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) + + branch_pool = layers.AveragePooling2D((3, 3), + strides=(1, 1), + padding='same')(x) + branch_pool = conv2d_bn(branch_pool, 64, 1, 1) + x = layers.concatenate( + [branch1x1, branch5x5, branch3x3dbl, branch_pool], + axis=channel_axis, + name='mixed1') + + # mixed 2: 35 x 35 x 256 + branch1x1 = conv2d_bn(x, 64, 1, 1) + + branch5x5 = conv2d_bn(x, 48, 1, 1) + branch5x5 = conv2d_bn(branch5x5, 64, 5, 5) + + branch3x3dbl = conv2d_bn(x, 64, 1, 1) + branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) + branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) + + branch_pool = layers.AveragePooling2D((3, 3), + strides=(1, 1), + padding='same')(x) + branch_pool = conv2d_bn(branch_pool, 64, 1, 1) + x = layers.concatenate( + [branch1x1, branch5x5, branch3x3dbl, branch_pool], + axis=channel_axis, + name='mixed2') + + # mixed 3: 17 x 17 x 768 + branch3x3 = conv2d_bn(x, 384, 3, 3, strides=(2, 2), padding='valid') + + branch3x3dbl = conv2d_bn(x, 64, 1, 1) + branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) + branch3x3dbl = conv2d_bn( + branch3x3dbl, 96, 3, 3, strides=(2, 2), padding='valid') + + branch_pool = layers.MaxPooling2D((3, 3), strides=(2, 2))(x) + x = layers.concatenate( + [branch3x3, branch3x3dbl, branch_pool], + axis=channel_axis, + name='mixed3') + + # mixed 4: 17 x 17 x 768 + branch1x1 = conv2d_bn(x, 192, 1, 1) + + branch7x7 = conv2d_bn(x, 128, 1, 1) + branch7x7 = conv2d_bn(branch7x7, 128, 1, 7) + branch7x7 = conv2d_bn(branch7x7, 192, 7, 1) + + branch7x7dbl = conv2d_bn(x, 128, 1, 1) + branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1) + branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 1, 7) + branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1) + branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) + + branch_pool = layers.AveragePooling2D((3, 3), + strides=(1, 1), + padding='same')(x) + branch_pool = conv2d_bn(branch_pool, 192, 1, 1) + x = layers.concatenate( + [branch1x1, branch7x7, branch7x7dbl, branch_pool], + axis=channel_axis, + name='mixed4') + + # mixed 5, 6: 17 x 17 x 768 + for i in range(2): + branch1x1 = conv2d_bn(x, 192, 1, 1) + + branch7x7 = conv2d_bn(x, 160, 1, 1) + branch7x7 = conv2d_bn(branch7x7, 160, 1, 7) + branch7x7 = conv2d_bn(branch7x7, 192, 7, 1) + + branch7x7dbl = conv2d_bn(x, 160, 1, 1) + branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1) + branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 1, 7) + branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1) + branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) + + branch_pool = layers.AveragePooling2D( + (3, 3), strides=(1, 1), padding='same')(x) + branch_pool = conv2d_bn(branch_pool, 192, 1, 1) + x = layers.concatenate( + [branch1x1, branch7x7, branch7x7dbl, branch_pool], + axis=channel_axis, + name='mixed' + str(5 + i)) + + # mixed 7: 17 x 17 x 768 + branch1x1 = conv2d_bn(x, 192, 1, 1) + + branch7x7 = conv2d_bn(x, 192, 1, 1) + branch7x7 = conv2d_bn(branch7x7, 192, 1, 7) + branch7x7 = conv2d_bn(branch7x7, 192, 7, 1) + + branch7x7dbl = conv2d_bn(x, 192, 1, 1) + branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1) + branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) + branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1) + branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) + + branch_pool = layers.AveragePooling2D((3, 3), + strides=(1, 1), + padding='same')(x) + branch_pool = conv2d_bn(branch_pool, 192, 1, 1) + x = layers.concatenate( + [branch1x1, branch7x7, branch7x7dbl, branch_pool], + axis=channel_axis, + name='mixed7') + + # mixed 8: 8 x 8 x 1280 + branch3x3 = conv2d_bn(x, 192, 1, 1) + branch3x3 = conv2d_bn(branch3x3, 320, 3, 3, + strides=(2, 2), padding='valid') + + branch7x7x3 = conv2d_bn(x, 192, 1, 1) + branch7x7x3 = conv2d_bn(branch7x7x3, 192, 1, 7) + branch7x7x3 = conv2d_bn(branch7x7x3, 192, 7, 1) + branch7x7x3 = conv2d_bn( + branch7x7x3, 192, 3, 3, strides=(2, 2), padding='valid') + + branch_pool = layers.MaxPooling2D((3, 3), strides=(2, 2))(x) + x = layers.concatenate( + [branch3x3, branch7x7x3, branch_pool], + axis=channel_axis, + name='mixed8') + + # mixed 9: 8 x 8 x 2048 + for i in range(2): + branch1x1 = conv2d_bn(x, 320, 1, 1) + + branch3x3 = conv2d_bn(x, 384, 1, 1) + branch3x3_1 = conv2d_bn(branch3x3, 384, 1, 3) + branch3x3_2 = conv2d_bn(branch3x3, 384, 3, 1) + branch3x3 = layers.concatenate( + [branch3x3_1, branch3x3_2], + axis=channel_axis, + name='mixed9_' + str(i)) + + branch3x3dbl = conv2d_bn(x, 448, 1, 1) + branch3x3dbl = conv2d_bn(branch3x3dbl, 384, 3, 3) + branch3x3dbl_1 = conv2d_bn(branch3x3dbl, 384, 1, 3) + branch3x3dbl_2 = conv2d_bn(branch3x3dbl, 384, 3, 1) + branch3x3dbl = layers.concatenate( + [branch3x3dbl_1, branch3x3dbl_2], axis=channel_axis) + + branch_pool = layers.AveragePooling2D( + (3, 3), strides=(1, 1), padding='same')(x) + branch_pool = conv2d_bn(branch_pool, 192, 1, 1) + x = layers.concatenate( + [branch1x1, branch3x3, branch3x3dbl, branch_pool], + axis=channel_axis, + name='mixed' + str(9 + i)) + if include_top: + # Classification block + x = layers.GlobalAveragePooling2D(name='avg_pool')(x) + x = layers.Dense(classes, activation='softmax', name='predictions')(x) + else: + if pooling == 'avg': + x = layers.GlobalAveragePooling2D()(x) + elif pooling == 'max': + x = layers.GlobalMaxPooling2D()(x) + + # Ensure that the model takes into account + # any potential predecessors of `input_tensor`. + if input_tensor is not None: + inputs = engine.get_source_inputs(input_tensor) + else: + inputs = img_input + # Create model. + model = models.Model(inputs, x, name='inception_v3') + + # load weights + if weights == 'imagenet': + if include_top: + weights_path = keras_utils.get_file( + 'inception_v3_weights_tf_dim_ordering_tf_kernels.h5', + WEIGHTS_PATH, + cache_subdir='models', + file_hash='9a0d58056eeedaa3f26cb7ebd46da564') + else: + weights_path = keras_utils.get_file( + 'inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5', + WEIGHTS_PATH_NO_TOP, + cache_subdir='models', + file_hash='bcbd6486424b2319ff4ef7d526e38f63') + model.load_weights(weights_path) + elif weights is not None: + model.load_weights(weights) + + return model + + +def preprocess_input(x): + """Preprocesses a numpy array encoding a batch of images. + + # Arguments + x: a 4D numpy array consists of RGB values within [0, 255]. + + # Returns + Preprocessed array. + """ + return imagenet_utils.preprocess_input(x, mode='tf') diff --git a/keras_applications/mobilenet.py b/keras_applications/mobilenet.py new file mode 100644 index 0000000..77a6cff --- /dev/null +++ b/keras_applications/mobilenet.py @@ -0,0 +1,472 @@ +"""MobileNet v1 models for Keras. + +MobileNet is a general architecture and can be used for multiple use cases. +Depending on the use case, it can use different input layer size and +different width factors. This allows different width models to reduce +the number of multiply-adds and thereby +reduce inference cost on mobile devices. + +MobileNets support any input size greater than 32 x 32, with larger image sizes +offering better performance. +The number of parameters and number of multiply-adds +can be modified by using the `alpha` parameter, +which increases/decreases the number of filters in each layer. +By altering the image size and `alpha` parameter, +all 16 models from the paper can be built, with ImageNet weights provided. + +The paper demonstrates the performance of MobileNets using `alpha` values of +1.0 (also called 100 % MobileNet), 0.75, 0.5 and 0.25. +For each of these `alpha` values, weights for 4 different input image sizes +are provided (224, 192, 160, 128). + +The following table describes the size and accuracy of the 100% MobileNet +on size 224 x 224: +---------------------------------------------------------------------------- +Width Multiplier (alpha) | ImageNet Acc | Multiply-Adds (M) | Params (M) +---------------------------------------------------------------------------- +| 1.0 MobileNet-224 | 70.6 % | 529 | 4.2 | +| 0.75 MobileNet-224 | 68.4 % | 325 | 2.6 | +| 0.50 MobileNet-224 | 63.7 % | 149 | 1.3 | +| 0.25 MobileNet-224 | 50.6 % | 41 | 0.5 | +---------------------------------------------------------------------------- + +The following table describes the performance of +the 100 % MobileNet on various input sizes: +------------------------------------------------------------------------ + Resolution | ImageNet Acc | Multiply-Adds (M) | Params (M) +------------------------------------------------------------------------ +| 1.0 MobileNet-224 | 70.6 % | 529 | 4.2 | +| 1.0 MobileNet-192 | 69.1 % | 529 | 4.2 | +| 1.0 MobileNet-160 | 67.2 % | 529 | 4.2 | +| 1.0 MobileNet-128 | 64.4 % | 529 | 4.2 | +------------------------------------------------------------------------ + +The weights for all 16 models are obtained and translated +from TensorFlow checkpoints found at +https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.md + +# Reference + +- [MobileNets: Efficient Convolutional Neural Networks for + Mobile Vision Applications](https://arxiv.org/pdf/1704.04861.pdf)) +""" +from __future__ import print_function +from __future__ import absolute_import +from __future__ import division + +import os +import warnings + +from . import get_keras_submodule + +backend = get_keras_submodule('backend') +engine = get_keras_submodule('engine') +layers = get_keras_submodule('layers') +models = get_keras_submodule('models') +keras_utils = get_keras_submodule('utils') + +from . import imagenet_utils +from .imagenet_utils import decode_predictions +from .imagenet_utils import _obtain_input_shape + + +BASE_WEIGHT_PATH = ('https://github.com/fchollet/deep-learning-models/' + 'releases/download/v0.6/') + + +def relu6(x): + return backend.relu(x, max_value=6) + + +def preprocess_input(x): + """Preprocesses a numpy array encoding a batch of images. + + # Arguments + x: a 4D numpy array consists of RGB values within [0, 255]. + + # Returns + Preprocessed array. + """ + return imagenet_utils.preprocess_input(x, mode='tf') + + +def MobileNet(input_shape=None, + alpha=1.0, + depth_multiplier=1, + dropout=1e-3, + include_top=True, + weights='imagenet', + input_tensor=None, + pooling=None, + classes=1000): + """Instantiates the MobileNet architecture. + + To load a MobileNet model via `load_model`, import the custom + objects `relu6` and pass them to the `custom_objects` parameter. + E.g. + model = load_model('mobilenet.h5', custom_objects={ + 'relu6': mobilenet.relu6}) + + # Arguments + input_shape: optional shape tuple, only to be specified + if `include_top` is False (otherwise the input shape + has to be `(224, 224, 3)` + (with `channels_last` data format) + or (3, 224, 224) (with `channels_first` data format). + It should have exactly 3 inputs channels, + and width and height should be no smaller than 32. + E.g. `(200, 200, 3)` would be one valid value. + alpha: controls the width of the network. + - If `alpha` < 1.0, proportionally decreases the number + of filters in each layer. + - If `alpha` > 1.0, proportionally increases the number + of filters in each layer. + - If `alpha` = 1, default number of filters from the paper + are used at each layer. + depth_multiplier: depth multiplier for depthwise convolution + (also called the resolution multiplier) + dropout: dropout rate + include_top: whether to include the fully-connected + layer at the top of the network. + weights: one of `None` (random initialization), + 'imagenet' (pre-training on ImageNet), + or the path to the weights file to be loaded. + input_tensor: optional Keras tensor (i.e. output of + `layers.Input()`) + to use as image input for the model. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model + will be the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a + 2D tensor. + - `max` means that global max pooling will + be applied. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. + + # Returns + A Keras model instance. + + # Raises + ValueError: in case of invalid argument for `weights`, + or invalid input shape. + RuntimeError: If attempting to run this model with a + backend that does not support separable convolutions. + """ + + if not (weights in {'imagenet', None} or os.path.exists(weights)): + raise ValueError('The `weights` argument should be either ' + '`None` (random initialization), `imagenet` ' + '(pre-training on ImageNet), ' + 'or the path to the weights file to be loaded.') + + if weights == 'imagenet' and include_top and classes != 1000: + raise ValueError('If using `weights` as ImageNet with `include_top` ' + 'as true, `classes` should be 1000') + + # Determine proper input shape and default size. + if input_shape is None: + default_size = 224 + else: + if backend.image_data_format() == 'channels_first': + rows = input_shape[1] + cols = input_shape[2] + else: + rows = input_shape[0] + cols = input_shape[1] + + if rows == cols and rows in [128, 160, 192, 224]: + default_size = rows + else: + default_size = 224 + + input_shape = _obtain_input_shape(input_shape, + default_size=default_size, + min_size=32, + data_format=backend.image_data_format(), + require_flatten=include_top, + weights=weights) + + if backend.image_data_format() == 'channels_last': + row_axis, col_axis = (0, 1) + else: + row_axis, col_axis = (1, 2) + rows = input_shape[row_axis] + cols = input_shape[col_axis] + + if weights == 'imagenet': + if depth_multiplier != 1: + raise ValueError('If imagenet weights are being loaded, ' + 'depth multiplier must be 1') + + if alpha not in [0.25, 0.50, 0.75, 1.0]: + raise ValueError('If imagenet weights are being loaded, ' + 'alpha can be one of' + '`0.25`, `0.50`, `0.75` or `1.0` only.') + + if rows != cols or rows not in [128, 160, 192, 224]: + if rows is None: + rows = 224 + warnings.warn('MobileNet shape is undefined.' + ' Weights for input shape ' + '(224, 224) will be loaded.') + else: + raise ValueError('If imagenet weights are being loaded, ' + 'input must have a static square shape ' + '(one of (128, 128), (160, 160), ' + '(192, 192), or (224, 224)). ' + 'Input shape provided = %s' % (input_shape,)) + + if backend.image_data_format() != 'channels_last': + warnings.warn('The MobileNet family of models is only available ' + 'for the input data format "channels_last" ' + '(width, height, channels). ' + 'However your settings specify the default ' + 'data format "channels_first" (channels, width, height).' + ' You should set `image_data_format="channels_last"` ' + 'in your Keras config located at ~/.keras/keras.json. ' + 'The model being returned right now will expect inputs ' + 'to follow the "channels_last" data format.') + backend.set_image_data_format('channels_last') + old_data_format = 'channels_first' + else: + old_data_format = None + + if input_tensor is None: + img_input = layers.Input(shape=input_shape) + else: + if not backend.is_keras_tensor(input_tensor): + img_input = layers.Input(tensor=input_tensor, shape=input_shape) + else: + img_input = input_tensor + + x = _conv_block(img_input, 32, alpha, strides=(2, 2)) + x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1) + + x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, + strides=(2, 2), block_id=2) + x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3) + + x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, + strides=(2, 2), block_id=4) + x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5) + + x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, + strides=(2, 2), block_id=6) + x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7) + x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8) + x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9) + x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10) + x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11) + + x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, + strides=(2, 2), block_id=12) + x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13) + + if include_top: + if backend.image_data_format() == 'channels_first': + shape = (int(1024 * alpha), 1, 1) + else: + shape = (1, 1, int(1024 * alpha)) + + x = layers.GlobalAveragePooling2D()(x) + x = layers.Reshape(shape, name='reshape_1')(x) + x = layers.Dropout(dropout, name='dropout')(x) + x = layers.Conv2D(classes, (1, 1), + padding='same', + name='conv_preds')(x) + x = layers.Activation('softmax', name='act_softmax')(x) + x = layers.Reshape((classes,), name='reshape_2')(x) + else: + if pooling == 'avg': + x = layers.GlobalAveragePooling2D()(x) + elif pooling == 'max': + x = layers.GlobalMaxPooling2D()(x) + + # Ensure that the model takes into account + # any potential predecessors of `input_tensor`. + if input_tensor is not None: + inputs = engine.get_source_inputs(input_tensor) + else: + inputs = img_input + + # Create model. + model = models.Model(inputs, x, name='mobilenet_%0.2f_%s' % (alpha, rows)) + + # load weights + if weights == 'imagenet': + if backend.image_data_format() == 'channels_first': + raise ValueError('Weights for "channels_first" format ' + 'are not available.') + if alpha == 1.0: + alpha_text = '1_0' + elif alpha == 0.75: + alpha_text = '7_5' + elif alpha == 0.50: + alpha_text = '5_0' + else: + alpha_text = '2_5' + + if include_top: + model_name = 'mobilenet_%s_%d_tf.h5' % (alpha_text, rows) + weight_path = BASE_WEIGHT_PATH + model_name + weights_path = keras_utils.get_file(model_name, + weight_path, + cache_subdir='models') + else: + model_name = 'mobilenet_%s_%d_tf_no_top.h5' % (alpha_text, rows) + weight_path = BASE_WEIGHT_PATH + model_name + weights_path = keras_utils.get_file(model_name, + weight_path, + cache_subdir='models') + model.load_weights(weights_path) + elif weights is not None: + model.load_weights(weights) + + if old_data_format: + backend.set_image_data_format(old_data_format) + return model + + +def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)): + """Adds an initial convolution layer (with batch normalization and relu6). + + # Arguments + inputs: Input tensor of shape `(rows, cols, 3)` + (with `channels_last` data format) or + (3, rows, cols) (with `channels_first` data format). + It should have exactly 3 inputs channels, + and width and height should be no smaller than 32. + E.g. `(224, 224, 3)` would be one valid value. + filters: Integer, the dimensionality of the output space + (i.e. the number of output filters in the convolution). + alpha: controls the width of the network. + - If `alpha` < 1.0, proportionally decreases the number + of filters in each layer. + - If `alpha` > 1.0, proportionally increases the number + of filters in each layer. + - If `alpha` = 1, default number of filters from the paper + are used at each layer. + kernel: An integer or tuple/list of 2 integers, specifying the + width and height of the 2D convolution window. + Can be a single integer to specify the same value for + all spatial dimensions. + strides: An integer or tuple/list of 2 integers, + specifying the strides of the convolution + along the width and height. + Can be a single integer to specify the same value for + all spatial dimensions. + Specifying any stride value != 1 is incompatible with specifying + any `dilation_rate` value != 1. + + # Input shape + 4D tensor with shape: + `(samples, channels, rows, cols)` if data_format='channels_first' + or 4D tensor with shape: + `(samples, rows, cols, channels)` if data_format='channels_last'. + + # Output shape + 4D tensor with shape: + `(samples, filters, new_rows, new_cols)` + if data_format='channels_first' + or 4D tensor with shape: + `(samples, new_rows, new_cols, filters)` + if data_format='channels_last'. + `rows` and `cols` values might have changed due to stride. + + # Returns + Output tensor of block. + """ + channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 + filters = int(filters * alpha) + x = layers.ZeroPadding2D(padding=(1, 1), name='conv1_pad')(inputs) + x = layers.Conv2D(filters, kernel, + padding='valid', + use_bias=False, + strides=strides, + name='conv1')(x) + x = layers.BatchNormalization(axis=channel_axis, name='conv1_bn')(x) + return layers.Activation(relu6, name='conv1_relu')(x) + + +def _depthwise_conv_block(inputs, pointwise_conv_filters, alpha, + depth_multiplier=1, strides=(1, 1), block_id=1): + """Adds a depthwise convolution block. + + A depthwise convolution block consists of a depthwise conv, + batch normalization, relu6, pointwise convolution, + batch normalization and relu6 activation. + + # Arguments + inputs: Input tensor of shape `(rows, cols, channels)` + (with `channels_last` data format) or + (channels, rows, cols) (with `channels_first` data format). + pointwise_conv_filters: Integer, the dimensionality of the output space + (i.e. the number of output filters in the pointwise convolution). + alpha: controls the width of the network. + - If `alpha` < 1.0, proportionally decreases the number + of filters in each layer. + - If `alpha` > 1.0, proportionally increases the number + of filters in each layer. + - If `alpha` = 1, default number of filters from the paper + are used at each layer. + depth_multiplier: The number of depthwise convolution output channels + for each input channel. + The total number of depthwise convolution output + channels will be equal to `filters_in * depth_multiplier`. + strides: An integer or tuple/list of 2 integers, + specifying the strides of the convolution + along the width and height. + Can be a single integer to specify the same value for + all spatial dimensions. + Specifying any stride value != 1 is incompatible with specifying + any `dilation_rate` value != 1. + block_id: Integer, a unique identification designating + the block number. + + # Input shape + 4D tensor with shape: + `(batch, channels, rows, cols)` if data_format='channels_first' + or 4D tensor with shape: + `(batch, rows, cols, channels)` if data_format='channels_last'. + + # Output shape + 4D tensor with shape: + `(batch, filters, new_rows, new_cols)` + if data_format='channels_first' + or 4D tensor with shape: + `(batch, new_rows, new_cols, filters)` + if data_format='channels_last'. + `rows` and `cols` values might have changed due to stride. + + # Returns + Output tensor of block. + """ + channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 + pointwise_conv_filters = int(pointwise_conv_filters * alpha) + + x = layers.ZeroPadding2D((1, 1), name='conv_pad_%d' % block_id)(inputs) + x = layers.DepthwiseConv2D((3, 3), + padding='valid', + depth_multiplier=depth_multiplier, + strides=strides, + use_bias=False, + name='conv_dw_%d' % block_id)(x) + x = layers.BatchNormalization( + axis=channel_axis, name='conv_dw_%d_bn' % block_id)(x) + x = layers.Activation(relu6, name='conv_dw_%d_relu' % block_id)(x) + + x = layers.Conv2D(pointwise_conv_filters, (1, 1), + padding='same', + use_bias=False, + strides=(1, 1), + name='conv_pw_%d' % block_id)(x) + x = layers.BatchNormalization(axis=channel_axis, + name='conv_pw_%d_bn' % block_id)(x) + return layers.Activation(relu6, name='conv_pw_%d_relu' % block_id)(x) diff --git a/keras_applications/mobilenet_v2.py b/keras_applications/mobilenet_v2.py new file mode 100644 index 0000000..aa0a8ed --- /dev/null +++ b/keras_applications/mobilenet_v2.py @@ -0,0 +1,512 @@ +"""MobileNet v2 models for Keras. + +MobileNetV2 is a general architecture and can be used for multiple use cases. +Depending on the use case, it can use different input layer size and +different width factors. This allows different width models to reduce +the number of multiply-adds and thereby +reduce inference cost on mobile devices. + +MobileNetV2 is very similar to the original MobileNet, +except that it uses inverted residual blocks with +bottlenecking features. It has a drastically lower +parameter count than the original MobileNet. +MobileNets support any input size greater +than 32 x 32, with larger image sizes +offering better performance. + +The number of parameters and number of multiply-adds +can be modified by using the `alpha` parameter, +which increases/decreases the number of filters in each layer. +By altering the image size and `alpha` parameter, +all 22 models from the paper can be built, with ImageNet weights provided. + +The paper demonstrates the performance of MobileNets using `alpha` values of +1.0 (also called 100 % MobileNet), 0.35, 0.5, 0.75, 1.0, 1.3, and 1.4 + +For each of these `alpha` values, weights for 5 different input image sizes +are provided (224, 192, 160, 128, and 96). + + +The following table describes the performance of +MobileNet on various input sizes: +------------------------------------------------------------------------ +MACs stands for Multiply Adds + + Classification Checkpoint| MACs (M) | Parameters (M)| Top 1 Accuracy| Top 5 Accuracy +--------------------------|------------|---------------|---------|----|------------- +| [mobilenet_v2_1.4_224] | 582 | 6.06 | 75.0 | 92.5 | +| [mobilenet_v2_1.3_224] | 509 | 5.34 | 74.4 | 92.1 | +| [mobilenet_v2_1.0_224] | 300 | 3.47 | 71.8 | 91.0 | +| [mobilenet_v2_1.0_192] | 221 | 3.47 | 70.7 | 90.1 | +| [mobilenet_v2_1.0_160] | 154 | 3.47 | 68.8 | 89.0 | +| [mobilenet_v2_1.0_128] | 99 | 3.47 | 65.3 | 86.9 | +| [mobilenet_v2_1.0_96] | 56 | 3.47 | 60.3 | 83.2 | +| [mobilenet_v2_0.75_224] | 209 | 2.61 | 69.8 | 89.6 | +| [mobilenet_v2_0.75_192] | 153 | 2.61 | 68.7 | 88.9 | +| [mobilenet_v2_0.75_160] | 107 | 2.61 | 66.4 | 87.3 | +| [mobilenet_v2_0.75_128] | 69 | 2.61 | 63.2 | 85.3 | +| [mobilenet_v2_0.75_96] | 39 | 2.61 | 58.8 | 81.6 | +| [mobilenet_v2_0.5_224] | 97 | 1.95 | 65.4 | 86.4 | +| [mobilenet_v2_0.5_192] | 71 | 1.95 | 63.9 | 85.4 | +| [mobilenet_v2_0.5_160] | 50 | 1.95 | 61.0 | 83.2 | +| [mobilenet_v2_0.5_128] | 32 | 1.95 | 57.7 | 80.8 | +| [mobilenet_v2_0.5_96] | 18 | 1.95 | 51.2 | 75.8 | +| [mobilenet_v2_0.35_224] | 59 | 1.66 | 60.3 | 82.9 | +| [mobilenet_v2_0.35_192] | 43 | 1.66 | 58.2 | 81.2 | +| [mobilenet_v2_0.35_160] | 30 | 1.66 | 55.7 | 79.1 | +| [mobilenet_v2_0.35_128] | 20 | 1.66 | 50.8 | 75.0 | +| [mobilenet_v2_0.35_96] | 11 | 1.66 | 45.5 | 70.4 | + +The weights for all 16 models are obtained and +translated from the Tensorflow checkpoints +from TensorFlow checkpoints found [here] +(https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/README.md). + +# Reference + +This file contains building code for MobileNetV2, based on +[MobileNetV2: Inverted Residuals and Linear Bottlenecks] +(https://arxiv.org/abs/1801.04381) + +Tests comparing this model to the existing Tensorflow model can be +found at [mobilenet_v2_keras] +(https://github.com/JonathanCMitchell/mobilenet_v2_keras) +""" +from __future__ import print_function +from __future__ import absolute_import +from __future__ import division + +import os +import warnings +import numpy as np + +from . import get_keras_submodule + +backend = get_keras_submodule('backend') +engine = get_keras_submodule('engine') +layers = get_keras_submodule('layers') +models = get_keras_submodule('models') +keras_utils = get_keras_submodule('utils') + +from . import imagenet_utils +from .imagenet_utils import decode_predictions +from .imagenet_utils import _obtain_input_shape + +# TODO Change path to v1.1 +BASE_WEIGHT_PATH = ('https://github.com/JonathanCMitchell/mobilenet_v2_keras/' + 'releases/download/v1.1/') + + +def relu6(x): + return backend.relu(x, max_value=6) + + +def preprocess_input(x): + """Preprocesses a numpy array encoding a batch of images. + + This function applies the "Inception" preprocessing which converts + the RGB values from [0, 255] to [-1, 1]. Note that this preprocessing + function is different from `imagenet_utils.preprocess_input()`. + + # Arguments + x: a 4D numpy array consists of RGB values within [0, 255]. + + # Returns + Preprocessed array. + """ + x /= 128. + x -= 1. + return x.astype(np.float32) + + +# This function is taken from the original tf repo. +# It ensures that all layers have a channel number that is divisible by 8 +# It can be seen here: +# https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py + + +def _make_divisible(v, divisor, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +def MobileNetV2(input_shape=None, + alpha=1.0, + depth_multiplier=1, + include_top=True, + weights='imagenet', + input_tensor=None, + pooling=None, + classes=1000): + """Instantiates the MobileNetV2 architecture. + + To load a MobileNetV2 model via `load_model`, import the custom + objects `relu6` and pass them to the `custom_objects` parameter. + E.g. + model = load_model('mobilenet.h5', custom_objects={ + 'relu6': mobilenet.relu6}) + + # Arguments + input_shape: optional shape tuple, to be specified if you would + like to use a model with an input img resolution that is not + (224, 224, 3). + It should have exactly 3 inputs channels (224, 224, 3). + You can also omit this option if you would like + to infer input_shape from an input_tensor. + If you choose to include both input_tensor and input_shape then + input_shape will be used if they match, if the shapes + do not match then we will throw an error. + E.g. `(160, 160, 3)` would be one valid value. + alpha: controls the width of the network. This is known as the + width multiplier in the MobileNetV2 paper. + - If `alpha` < 1.0, proportionally decreases the number + of filters in each layer. + - If `alpha` > 1.0, proportionally increases the number + of filters in each layer. + - If `alpha` = 1, default number of filters from the paper + are used at each layer. + depth_multiplier: depth multiplier for depthwise convolution + (also called the resolution multiplier) + include_top: whether to include the fully-connected + layer at the top of the network. + weights: one of `None` (random initialization), + 'imagenet' (pre-training on ImageNet), + or the path to the weights file to be loaded. + input_tensor: optional Keras tensor (i.e. output of + `layers.Input()`) + to use as image input for the model. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model + will be the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a + 2D tensor. + - `max` means that global max pooling will + be applied. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. + + # Returns + A Keras model instance. + + # Raises + ValueError: in case of invalid argument for `weights`, + or invalid input shape or invalid depth_multiplier, alpha, + rows when weights='imagenet' + """ + + if not (weights in {'imagenet', None} or os.path.exists(weights)): + raise ValueError('The `weights` argument should be either ' + '`None` (random initialization), `imagenet` ' + '(pre-training on ImageNet), ' + 'or the path to the weights file to be loaded.') + + if weights == 'imagenet' and include_top and classes != 1000: + raise ValueError('If using `weights` as ImageNet with `include_top` ' + 'as true, `classes` should be 1000') + + # Determine proper input shape and default size. + # If both input_shape and input_tensor are used, they should match + if input_shape is not None and input_tensor is not None: + try: + is_input_t_tensor = backend.is_keras_tensor(input_tensor) + except ValueError: + try: + is_input_t_tensor = backend.is_keras_tensor( + engine.get_source_inputs(input_tensor)) + except ValueError: + raise ValueError('input_tensor: ', input_tensor, + 'is not type input_tensor') + if is_input_t_tensor: + if backend.image_data_format == 'channels_first': + if input_tensor._keras_shape[1] != input_shape[1]: + raise ValueError('input_shape: ', input_shape, + 'and input_tensor: ', input_tensor, + 'do not meet the same shape requirements') + else: + if input_tensor._keras_shape[2] != input_shape[1]: + raise ValueError('input_shape: ', input_shape, + 'and input_tensor: ', input_tensor, + 'do not meet the same shape requirements') + else: + raise ValueError('input_tensor specified: ', input_tensor, + 'is not a keras tensor') + + # If input_shape is None, infer shape from input_tensor + if input_shape is None and input_tensor is not None: + + try: + backend.is_keras_tensor(input_tensor) + except ValueError: + raise ValueError('input_tensor: ', input_tensor, + 'is type: ', type(input_tensor), + 'which is not a valid type') + + if input_shape is None and not backend.is_keras_tensor(input_tensor): + default_size = 224 + elif input_shape is None and backend.is_keras_tensor(input_tensor): + if backend.image_data_format() == 'channels_first': + rows = input_tensor._keras_shape[2] + cols = input_tensor._keras_shape[3] + else: + rows = input_tensor._keras_shape[1] + cols = input_tensor._keras_shape[2] + + if rows == cols and rows in [96, 128, 160, 192, 224]: + default_size = rows + else: + default_size = 224 + + # If input_shape is None and no input_tensor + elif input_shape is None: + default_size = 224 + + # If input_shape is not None, assume default size + else: + if backend.image_data_format() == 'channels_first': + rows = input_shape[1] + cols = input_shape[2] + else: + rows = input_shape[0] + cols = input_shape[1] + + if rows == cols and rows in [96, 128, 160, 192, 224]: + default_size = rows + else: + default_size = 224 + + input_shape = _obtain_input_shape(input_shape, + default_size=default_size, + min_size=32, + data_format=backend.image_data_format(), + require_flatten=include_top, + weights=weights) + + if backend.image_data_format() == 'channels_last': + row_axis, col_axis = (0, 1) + else: + row_axis, col_axis = (1, 2) + rows = input_shape[row_axis] + cols = input_shape[col_axis] + + if weights == 'imagenet': + if depth_multiplier != 1: + raise ValueError('If imagenet weights are being loaded, ' + 'depth multiplier must be 1') + + if alpha not in [0.35, 0.50, 0.75, 1.0, 1.3, 1.4]: + raise ValueError('If imagenet weights are being loaded, ' + 'alpha can be one of' + '`0.25`, `0.50`, `0.75` or `1.0` only.') + + if rows != cols or rows not in [96, 128, 160, 192, 224]: + if rows is None: + rows = 224 + warnings.warn('MobileNet shape is undefined.' + ' Weights for input shape' + '(224, 224) will be loaded.') + else: + raise ValueError('If imagenet weights are being loaded, ' + 'input must have a static square shape' + '(one of (96, 96), (128, 128), (160, 160),' + '(192, 192), or (224, 224)).' + 'Input shape provided = %s' % (input_shape,)) + + if backend.image_data_format() != 'channels_last': + warnings.warn('The MobileNet family of models is only available ' + 'for the input data format "channels_last" ' + '(width, height, channels). ' + 'However your settings specify the default ' + 'data format "channels_first" (channels, width, height).' + ' You should set `image_data_format="channels_last"` ' + 'in your Keras config located at ~/.keras/keras.json. ' + 'The model being returned right now will expect inputs ' + 'to follow the "channels_last" data format.') + backend.set_image_data_format('channels_last') + old_data_format = 'channels_first' + else: + old_data_format = None + + if input_tensor is None: + img_input = layers.Input(shape=input_shape) + else: + if not backend.is_keras_tensor(input_tensor): + img_input = layers.Input(tensor=input_tensor, shape=input_shape) + else: + img_input = input_tensor + + first_block_filters = _make_divisible(32 * alpha, 8) + x = layers.Conv2D(first_block_filters, + kernel_size=3, + strides=(2, 2), + padding='same', + use_bias=False, + name='Conv1')(img_input) + x = layers.BatchNormalization( + epsilon=1e-3, momentum=0.999, name='bn_Conv1')(x) + x = layers.Activation(relu6, name='Conv1_relu')(x) + + x = _inverted_res_block(x, filters=16, alpha=alpha, stride=1, + expansion=1, block_id=0) + + x = _inverted_res_block(x, filters=24, alpha=alpha, stride=2, + expansion=6, block_id=1) + x = _inverted_res_block(x, filters=24, alpha=alpha, stride=1, + expansion=6, block_id=2) + + x = _inverted_res_block(x, filters=32, alpha=alpha, stride=2, + expansion=6, block_id=3) + x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, + expansion=6, block_id=4) + x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, + expansion=6, block_id=5) + + x = _inverted_res_block(x, filters=64, alpha=alpha, stride=2, + expansion=6, block_id=6) + x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, + expansion=6, block_id=7) + x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, + expansion=6, block_id=8) + x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, + expansion=6, block_id=9) + + x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, + expansion=6, block_id=10) + x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, + expansion=6, block_id=11) + x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, + expansion=6, block_id=12) + + x = _inverted_res_block(x, filters=160, alpha=alpha, stride=2, + expansion=6, block_id=13) + x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, + expansion=6, block_id=14) + x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, + expansion=6, block_id=15) + + x = _inverted_res_block(x, filters=320, alpha=alpha, stride=1, + expansion=6, block_id=16) + + # no alpha applied to last conv as stated in the paper: + # if the width multiplier is greater than 1 we + # increase the number of output channels + if alpha > 1.0: + last_block_filters = _make_divisible(1280 * alpha, 8) + else: + last_block_filters = 1280 + + x = layers.Conv2D(last_block_filters, + kernel_size=1, + use_bias=False, + name='Conv_1')(x) + x = layers.BatchNormalization(epsilon=1e-3, + momentum=0.999, + name='Conv_1_bn')(x) + x = layers.Activation(relu6, name='out_relu')(x) + + if include_top: + x = layers.GlobalAveragePooling2D()(x) + x = layers.Dense(classes, activation='softmax', + use_bias=True, name='Logits')(x) + else: + if pooling == 'avg': + x = layers.GlobalAveragePooling2D()(x) + elif pooling == 'max': + x = layers.GlobalMaxPooling2D()(x) + + # Ensure that the model takes into account + # any potential predecessors of `input_tensor`. + if input_tensor is not None: + inputs = engine.get_source_inputs(input_tensor) + else: + inputs = img_input + + # Create model. + model = models.Model(inputs, x, + name='mobilenetv2_%0.2f_%s' % (alpha, rows)) + + # load weights + if weights == 'imagenet': + if backend.image_data_format() == 'channels_first': + raise ValueError('Weights for "channels_first" format ' + 'are not available.') + + if include_top: + model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' + + str(alpha) + '_' + str(rows) + '.h5') + weigh_path = BASE_WEIGHT_PATH + model_name + weights_path = keras_utils.get_file( + model_name, weigh_path, cache_subdir='models') + else: + model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' + + str(alpha) + '_' + str(rows) + '_no_top' + '.h5') + weigh_path = BASE_WEIGHT_PATH + model_name + weights_path = keras_utils.get_file( + model_name, weigh_path, cache_subdir='models') + model.load_weights(weights_path) + elif weights is not None: + model.load_weights(weights) + + if old_data_format: + backend.set_image_data_format(old_data_format) + return model + + +def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id): + in_channels = inputs._keras_shape[-1] + pointwise_conv_filters = int(filters * alpha) + pointwise_filters = _make_divisible(pointwise_conv_filters, 8) + x = inputs + prefix = 'block_{}_'.format(block_id) + + if block_id: + # Expand + x = layers.Conv2D(expansion * in_channels, + kernel_size=1, + padding='same', + use_bias=False, + activation=None, + name=prefix + 'expand')(x) + x = layers.BatchNormalization(epsilon=1e-3, + momentum=0.999, + name=prefix + 'expand_BN')(x) + x = layers.Activation(relu6, name=prefix + 'expand_relu')(x) + else: + prefix = 'expanded_conv_' + + # Depthwise + x = layers.DepthwiseConv2D(kernel_size=3, + strides=stride, + activation=None, + use_bias=False, + padding='same', + name=prefix + 'depthwise')(x) + x = layers.BatchNormalization(epsilon=1e-3, + momentum=0.999, + name=prefix + 'depthwise_BN')(x) + + x = layers.Activation(relu6, name=prefix + 'depthwise_relu')(x) + + # Project + x = layers.Conv2D(pointwise_filters, + kernel_size=1, + padding='same', + use_bias=False, + activation=None, + name=prefix + 'project')(x) + x = layers.BatchNormalization( + epsilon=1e-3, momentum=0.999, name=prefix + 'project_BN')(x) + + if in_channels == pointwise_filters and stride == 1: + return layers.Add(name=prefix + 'add')([inputs, x]) + return x diff --git a/keras_applications/nasnet.py b/keras_applications/nasnet.py new file mode 100644 index 0000000..8a82a78 --- /dev/null +++ b/keras_applications/nasnet.py @@ -0,0 +1,725 @@ +"""NASNet-A models for Keras. + +NASNet refers to Neural Architecture Search Network, a family of models +that were designed automatically by learning the model architectures +directly on the dataset of interest. + +Here we consider NASNet-A, the highest performance model that was found +for the CIFAR-10 dataset, and then extended to ImageNet 2012 dataset, +obtaining state of the art performance on CIFAR-10 and ImageNet 2012. +Only the NASNet-A models, and their respective weights, which are suited +for ImageNet 2012 are provided. + +The below table describes the performance on ImageNet 2012: +-------------------------------------------------------------------------------- + Architecture | Top-1 Acc | Top-5 Acc | Multiply-Adds | Params (M) +-------------------------------------------------------------------------------- +| NASNet-A (4 @ 1056) | 74.0 % | 91.6 % | 564 M | 5.3 | +| NASNet-A (6 @ 4032) | 82.7 % | 96.2 % | 23.8 B | 88.9 | +-------------------------------------------------------------------------------- + +Weights obtained from the official TensorFlow repository found at +https://github.com/tensorflow/models/tree/master/research/slim/nets/nasnet + +# References + + - [Learning Transferable Architectures for Scalable Image Recognition] + (https://arxiv.org/abs/1707.07012) + +This model is based on the following implementations: + + - [TF Slim Implementation] + (https://github.com/tensorflow/models/blob/master/research/slim/nets/nasnet/nasnet.py) + - [TensorNets implementation] + (https://github.com/taehoonlee/tensornets/blob/master/tensornets/nasnets.py) +""" +from __future__ import print_function +from __future__ import absolute_import +from __future__ import division + +import os +import warnings + +from . import get_keras_submodule + +backend = get_keras_submodule('backend') +engine = get_keras_submodule('engine') +layers = get_keras_submodule('layers') +models = get_keras_submodule('models') +keras_utils = get_keras_submodule('utils') + +from . import imagenet_utils +from .imagenet_utils import decode_predictions +from .imagenet_utils import _obtain_input_shape + +BASE_WEIGHTS_PATH = ('https://github.com/titu1994/Keras-NASNet/' + 'releases/download/v1.2/') +NASNET_MOBILE_WEIGHT_PATH = BASE_WEIGHTS_PATH + 'NASNet-mobile.h5' +NASNET_MOBILE_WEIGHT_PATH_NO_TOP = BASE_WEIGHTS_PATH + 'NASNet-mobile-no-top.h5' +NASNET_LARGE_WEIGHT_PATH = BASE_WEIGHTS_PATH + 'NASNet-large.h5' +NASNET_LARGE_WEIGHT_PATH_NO_TOP = BASE_WEIGHTS_PATH + 'NASNet-large-no-top.h5' + + +def NASNet(input_shape=None, + penultimate_filters=4032, + num_blocks=6, + stem_block_filters=96, + skip_reduction=True, + filter_multiplier=2, + include_top=True, + weights=None, + input_tensor=None, + pooling=None, + classes=1000, + default_size=None): + '''Instantiates a NASNet model. + + Optionally loads weights pre-trained on ImageNet. + Note that the data format convention used by the model is + the one specified in your Keras config at `~/.keras/keras.json`. + + # Arguments + input_shape: Optional shape tuple, the input shape + is by default `(331, 331, 3)` for NASNetLarge and + `(224, 224, 3)` for NASNetMobile. + It should have exactly 3 input channels, + and width and height should be no smaller than 32. + E.g. `(224, 224, 3)` would be one valid value. + penultimate_filters: Number of filters in the penultimate layer. + NASNet models use the notation `NASNet (N @ P)`, where: + - N is the number of blocks + - P is the number of penultimate filters + num_blocks: Number of repeated blocks of the NASNet model. + NASNet models use the notation `NASNet (N @ P)`, where: + - N is the number of blocks + - P is the number of penultimate filters + stem_block_filters: Number of filters in the initial stem block + skip_reduction: Whether to skip the reduction step at the tail + end of the network. + filter_multiplier: Controls the width of the network. + - If `filter_multiplier` < 1.0, proportionally decreases the number + of filters in each layer. + - If `filter_multiplier` > 1.0, proportionally increases the number + of filters in each layer. + - If `filter_multiplier` = 1, default number of filters from the + paper are used at each layer. + include_top: Whether to include the fully-connected + layer at the top of the network. + weights: `None` (random initialization) or + `imagenet` (ImageNet weights) + input_tensor: Optional Keras tensor (i.e. output of + `layers.Input()`) + to use as image input for the model. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model + will be the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a + 2D tensor. + - `max` means that global max pooling will + be applied. + classes: Optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. + default_size: Specifies the default image size of the model + + # Returns + A Keras model instance. + + # Raises + ValueError: In case of invalid argument for `weights`, + invalid input shape or invalid `penultimate_filters` value. + ''' + if not (weights in {'imagenet', None} or os.path.exists(weights)): + raise ValueError('The `weights` argument should be either ' + '`None` (random initialization), `imagenet` ' + '(pre-training on ImageNet), ' + 'or the path to the weights file to be loaded.') + + if weights == 'imagenet' and include_top and classes != 1000: + raise ValueError('If using `weights` as ImageNet with `include_top` ' + 'as true, `classes` should be 1000') + + if (isinstance(input_shape, tuple) and + None in input_shape and + weights == 'imagenet'): + raise ValueError('When specifying the input shape of a NASNet' + ' and loading `ImageNet` weights, ' + 'the input_shape argument must be static ' + '(no None entries). Got: `input_shape=' + + str(input_shape) + '`.') + + if default_size is None: + default_size = 331 + + # Determine proper input shape and default size. + input_shape = _obtain_input_shape(input_shape, + default_size=default_size, + min_size=32, + data_format=backend.image_data_format(), + require_flatten=False, + weights=weights) + + if backend.image_data_format() != 'channels_last': + warnings.warn('The NASNet family of models is only available ' + 'for the input data format "channels_last" ' + '(width, height, channels). ' + 'However your settings specify the default ' + 'data format "channels_first" (channels, width, height).' + ' You should set `image_data_format="channels_last"` ' + 'in your Keras config located at ~/.keras/keras.json. ' + 'The model being returned right now will expect inputs ' + 'to follow the "channels_last" data format.') + backend.set_image_data_format('channels_last') + old_data_format = 'channels_first' + else: + old_data_format = None + + if input_tensor is None: + img_input = layers.Input(shape=input_shape) + else: + if not backend.is_keras_tensor(input_tensor): + img_input = layers.Input(tensor=input_tensor, shape=input_shape) + else: + img_input = input_tensor + + if penultimate_filters % 24 != 0: + raise ValueError( + 'For NASNet-A models, the value of `penultimate_filters` ' + 'needs to be divisible by 24. Current value: %d' % + penultimate_filters) + + channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1 + filters = penultimate_filters // 24 + + x = layers.Conv2D(stem_block_filters, (3, 3), + strides=(2, 2), + padding='valid', + use_bias=False, + name='stem_conv1', + kernel_initializer='he_normal')(img_input) + + x = layers.BatchNormalization( + axis=channel_dim, momentum=0.9997, epsilon=1e-3, name='stem_bn1')(x) + + p = None + x, p = _reduction_a_cell(x, p, filters // (filter_multiplier ** 2), + block_id='stem_1') + x, p = _reduction_a_cell(x, p, filters // filter_multiplier, + block_id='stem_2') + + for i in range(num_blocks): + x, p = _normal_a_cell(x, p, filters, block_id='%d' % (i)) + + x, p0 = _reduction_a_cell(x, p, filters * filter_multiplier, + block_id='reduce_%d' % (num_blocks)) + + p = p0 if not skip_reduction else p + + for i in range(num_blocks): + x, p = _normal_a_cell(x, p, filters * filter_multiplier, + block_id='%d' % (num_blocks + i + 1)) + + x, p0 = _reduction_a_cell(x, p, filters * filter_multiplier ** 2, + block_id='reduce_%d' % (2 * num_blocks)) + + p = p0 if not skip_reduction else p + + for i in range(num_blocks): + x, p = _normal_a_cell(x, p, filters * filter_multiplier ** 2, + block_id='%d' % (2 * num_blocks + i + 1)) + + x = layers.Activation('relu')(x) + + if include_top: + x = layers.GlobalAveragePooling2D()(x) + x = layers.Dense(classes, activation='softmax', name='predictions')(x) + else: + if pooling == 'avg': + x = layers.GlobalAveragePooling2D()(x) + elif pooling == 'max': + x = layers.GlobalMaxPooling2D()(x) + + # Ensure that the model takes into account + # any potential predecessors of `input_tensor`. + if input_tensor is not None: + inputs = engine.get_source_inputs(input_tensor) + else: + inputs = img_input + + model = models.Model(inputs, x, name='NASNet') + + # load weights + if weights == 'imagenet': + if default_size == 224: # mobile version + if include_top: + weights_path = keras_utils.get_file( + 'nasnet_mobile.h5', + NASNET_MOBILE_WEIGHT_PATH, + cache_subdir='models', + file_hash='020fb642bf7360b370c678b08e0adf61') + else: + weights_path = keras_utils.get_file( + 'nasnet_mobile_no_top.h5', + NASNET_MOBILE_WEIGHT_PATH_NO_TOP, + cache_subdir='models', + file_hash='1ed92395b5b598bdda52abe5c0dbfd63') + model.load_weights(weights_path) + elif default_size == 331: # large version + if include_top: + weights_path = keras_utils.get_file( + 'nasnet_large.h5', + NASNET_LARGE_WEIGHT_PATH, + cache_subdir='models', + file_hash='11577c9a518f0070763c2b964a382f17') + else: + weights_path = keras_utils.get_file( + 'nasnet_large_no_top.h5', + NASNET_LARGE_WEIGHT_PATH_NO_TOP, + cache_subdir='models', + file_hash='d81d89dc07e6e56530c4e77faddd61b5') + model.load_weights(weights_path) + else: + raise ValueError( + 'ImageNet weights can only be loaded with NASNetLarge' + ' or NASNetMobile') + elif weights is not None: + model.load_weights(weights) + + if old_data_format: + backend.set_image_data_format(old_data_format) + + return model + + +def NASNetLarge(input_shape=None, + include_top=True, + weights='imagenet', + input_tensor=None, + pooling=None, + classes=1000): + '''Instantiates a NASNet model in ImageNet mode. + + Optionally loads weights pre-trained on ImageNet. + Note that the data format convention used by the model is + the one specified in your Keras config at `~/.keras/keras.json`. + + # Arguments + input_shape: Optional shape tuple, only to be specified + if `include_top` is False (otherwise the input shape + has to be `(331, 331, 3)` for NASNetLarge. + It should have exactly 3 inputs channels, + and width and height should be no smaller than 32. + E.g. `(224, 224, 3)` would be one valid value. + include_top: Whether to include the fully-connected + layer at the top of the network. + weights: `None` (random initialization) or + `imagenet` (ImageNet weights) + input_tensor: Optional Keras tensor (i.e. output of + `layers.Input()`) + to use as image input for the model. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model + will be the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a + 2D tensor. + - `max` means that global max pooling will + be applied. + classes: Optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. + + # Returns + A Keras model instance. + + # Raises + ValueError: in case of invalid argument for `weights`, + or invalid input shape. + RuntimeError: If attempting to run this model with a + backend that does not support separable convolutions. + ''' + return NASNet(input_shape, + penultimate_filters=4032, + num_blocks=6, + stem_block_filters=96, + skip_reduction=True, + filter_multiplier=2, + include_top=include_top, + weights=weights, + input_tensor=input_tensor, + pooling=pooling, + classes=classes, + default_size=331) + + +def NASNetMobile(input_shape=None, + include_top=True, + weights='imagenet', + input_tensor=None, + pooling=None, + classes=1000): + '''Instantiates a Mobile NASNet model in ImageNet mode. + + Optionally loads weights pre-trained on ImageNet. + Note that the data format convention used by the model is + the one specified in your Keras config at `~/.keras/keras.json`. + + # Arguments + input_shape: Optional shape tuple, only to be specified + if `include_top` is False (otherwise the input shape + has to be `(224, 224, 3)` for NASNetMobile + It should have exactly 3 inputs channels, + and width and height should be no smaller than 32. + E.g. `(224, 224, 3)` would be one valid value. + include_top: Whether to include the fully-connected + layer at the top of the network. + weights: `None` (random initialization) or + `imagenet` (ImageNet weights) + input_tensor: Optional Keras tensor (i.e. output of + `layers.Input()`) + to use as image input for the model. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model + will be the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a + 2D tensor. + - `max` means that global max pooling will + be applied. + classes: Optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. + + # Returns + A Keras model instance. + + # Raises + ValueError: In case of invalid argument for `weights`, + or invalid input shape. + RuntimeError: If attempting to run this model with a + backend that does not support separable convolutions. + ''' + return NASNet(input_shape, + penultimate_filters=1056, + num_blocks=4, + stem_block_filters=32, + skip_reduction=False, + filter_multiplier=2, + include_top=include_top, + weights=weights, + input_tensor=input_tensor, + pooling=pooling, + classes=classes, + default_size=224) + + +def _separable_conv_block(ip, filters, + kernel_size=(3, 3), + strides=(1, 1), + block_id=None): + '''Adds 2 blocks of [relu-separable conv-batchnorm]. + + # Arguments + ip: Input tensor + filters: Number of output filters per layer + kernel_size: Kernel size of separable convolutions + strides: Strided convolution for downsampling + block_id: String block_id + + # Returns + A Keras tensor + ''' + channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1 + + with backend.name_scope('separable_conv_block_%s' % block_id): + x = layers.Activation('relu')(ip) + x = layers.SeparableConv2D(filters, kernel_size, + strides=strides, + name='separable_conv_1_%s' % block_id, + padding='same', use_bias=False, + kernel_initializer='he_normal')(x) + x = layers.BatchNormalization( + axis=channel_dim, + momentum=0.9997, + epsilon=1e-3, + name='separable_conv_1_bn_%s' % (block_id))(x) + x = layers.Activation('relu')(x) + x = layers.SeparableConv2D(filters, kernel_size, + name='separable_conv_2_%s' % block_id, + padding='same', + use_bias=False, + kernel_initializer='he_normal')(x) + x = layers.BatchNormalization( + axis=channel_dim, + momentum=0.9997, + epsilon=1e-3, + name='separable_conv_2_bn_%s' % (block_id))(x) + return x + + +def _adjust_block(p, ip, filters, block_id=None): + '''Adjusts the input `previous path` to match the shape of the `input`. + + Used in situations where the output number of filters needs to be changed. + + # Arguments + p: Input tensor which needs to be modified + ip: Input tensor whose shape needs to be matched + filters: Number of output filters to be matched + block_id: String block_id + + # Returns + Adjusted Keras tensor + ''' + channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1 + img_dim = 2 if backend.image_data_format() == 'channels_first' else -2 + + ip_shape = backend.int_shape(ip) + + if p is not None: + p_shape = backend.int_shape(p) + + with backend.name_scope('adjust_block'): + if p is None: + p = ip + + elif p_shape[img_dim] != ip_shape[img_dim]: + with backend.name_scope('adjust_reduction_block_%s' % block_id): + p = layers.Activation('relu', + name='adjust_relu_1_%s' % block_id)(p) + p1 = layers.AveragePooling2D( + (1, 1), + strides=(2, 2), + padding='valid', + name='adjust_avg_pool_1_%s' % block_id)(p) + p1 = layers.Conv2D( + filters // 2, (1, 1), + padding='same', + use_bias=False, name='adjust_conv_1_%s' % block_id, + kernel_initializer='he_normal')(p1) + + p2 = layers.ZeroPadding2D(padding=((0, 1), (0, 1)))(p) + p2 = layers.Cropping2D(cropping=((1, 0), (1, 0)))(p2) + p2 = layers.AveragePooling2D( + (1, 1), + strides=(2, 2), + padding='valid', + name='adjust_avg_pool_2_%s' % block_id)(p2) + p2 = layers.Conv2D( + filters // 2, (1, 1), + padding='same', + use_bias=False, + name='adjust_conv_2_%s' % block_id, + kernel_initializer='he_normal')(p2) + + p = layers.concatenate([p1, p2], axis=channel_dim) + p = layers.BatchNormalization( + axis=channel_dim, + momentum=0.9997, + epsilon=1e-3, + name='adjust_bn_%s' % block_id)(p) + + elif p_shape[channel_dim] != filters: + with backend.name_scope('adjust_projection_block_%s' % block_id): + p = layers.Activation('relu')(p) + p = layers.Conv2D( + filters, + (1, 1), + strides=(1, 1), + padding='same', + name='adjust_conv_projection_%s' % block_id, + use_bias=False, + kernel_initializer='he_normal')(p) + p = layers.BatchNormalization( + axis=channel_dim, + momentum=0.9997, + epsilon=1e-3, + name='adjust_bn_%s' % block_id)(p) + return p + + +def _normal_a_cell(ip, p, filters, block_id=None): + '''Adds a Normal cell for NASNet-A (Fig. 4 in the paper). + + # Arguments + ip: Input tensor `x` + p: Input tensor `p` + filters: Number of output filters + block_id: String block_id + + # Returns + A Keras tensor + ''' + channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1 + + with backend.name_scope('normal_A_block_%s' % block_id): + p = _adjust_block(p, ip, filters, block_id) + + h = layers.Activation('relu')(ip) + h = layers.Conv2D( + filters, (1, 1), + strides=(1, 1), + padding='same', + name='normal_conv_1_%s' % block_id, + use_bias=False, + kernel_initializer='he_normal')(h) + h = layers.BatchNormalization( + axis=channel_dim, + momentum=0.9997, + epsilon=1e-3, + name='normal_bn_1_%s' % block_id)(h) + + with backend.name_scope('block_1'): + x1_1 = _separable_conv_block( + h, filters, + kernel_size=(5, 5), + block_id='normal_left1_%s' % block_id) + x1_2 = _separable_conv_block( + p, filters, + block_id='normal_right1_%s' % block_id) + x1 = layers.add([x1_1, x1_2], name='normal_add_1_%s' % block_id) + + with backend.name_scope('block_2'): + x2_1 = _separable_conv_block( + p, filters, (5, 5), + block_id='normal_left2_%s' % block_id) + x2_2 = _separable_conv_block( + p, filters, (3, 3), + block_id='normal_right2_%s' % block_id) + x2 = layers.add([x2_1, x2_2], name='normal_add_2_%s' % block_id) + + with backend.name_scope('block_3'): + x3 = layers.AveragePooling2D( + (3, 3), + strides=(1, 1), + padding='same', + name='normal_left3_%s' % (block_id))(h) + x3 = layers.add([x3, p], name='normal_add_3_%s' % block_id) + + with backend.name_scope('block_4'): + x4_1 = layers.AveragePooling2D( + (3, 3), + strides=(1, 1), + padding='same', + name='normal_left4_%s' % (block_id))(p) + x4_2 = layers.AveragePooling2D( + (3, 3), + strides=(1, 1), + padding='same', + name='normal_right4_%s' % (block_id))(p) + x4 = layers.add([x4_1, x4_2], name='normal_add_4_%s' % block_id) + + with backend.name_scope('block_5'): + x5 = _separable_conv_block(h, filters, + block_id='normal_left5_%s' % block_id) + x5 = layers.add([x5, h], name='normal_add_5_%s' % block_id) + + x = layers.concatenate([p, x1, x2, x3, x4, x5], + axis=channel_dim, + name='normal_concat_%s' % block_id) + return x, ip + + +def _reduction_a_cell(ip, p, filters, block_id=None): + '''Adds a Reduction cell for NASNet-A (Fig. 4 in the paper). + + # Arguments + ip: Input tensor `x` + p: Input tensor `p` + filters: Number of output filters + block_id: String block_id + + # Returns + A Keras tensor + ''' + channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1 + + with backend.name_scope('reduction_A_block_%s' % block_id): + p = _adjust_block(p, ip, filters, block_id) + + h = layers.Activation('relu')(ip) + h = layers.Conv2D( + filters, (1, 1), + strides=(1, 1), + padding='same', + name='reduction_conv_1_%s' % block_id, + use_bias=False, + kernel_initializer='he_normal')(h) + h = layers.BatchNormalization( + axis=channel_dim, + momentum=0.9997, + epsilon=1e-3, + name='reduction_bn_1_%s' % block_id)(h) + + with backend.name_scope('block_1'): + x1_1 = _separable_conv_block( + h, filters, (5, 5), + strides=(2, 2), + block_id='reduction_left1_%s' % block_id) + x1_2 = _separable_conv_block( + p, filters, (7, 7), + strides=(2, 2), + block_id='reduction_right1_%s' % block_id) + x1 = layers.add([x1_1, x1_2], name='reduction_add_1_%s' % block_id) + + with backend.name_scope('block_2'): + x2_1 = layers.MaxPooling2D( + (3, 3), + strides=(2, 2), + padding='same', + name='reduction_left2_%s' % block_id)(h) + x2_2 = _separable_conv_block( + p, filters, (7, 7), + strides=(2, 2), + block_id='reduction_right2_%s' % block_id) + x2 = layers.add([x2_1, x2_2], name='reduction_add_2_%s' % block_id) + + with backend.name_scope('block_3'): + x3_1 = layers.AveragePooling2D( + (3, 3), + strides=(2, 2), + padding='same', + name='reduction_left3_%s' % block_id)(h) + x3_2 = _separable_conv_block( + p, filters, (5, 5), + strides=(2, 2), + block_id='reduction_right3_%s' % block_id) + x3 = layers.add([x3_1, x3_2], name='reduction_add3_%s' % block_id) + + with backend.name_scope('block_4'): + x4 = layers.AveragePooling2D( + (3, 3), + strides=(1, 1), + padding='same', + name='reduction_left4_%s' % block_id)(x1) + x4 = layers.add([x2, x4]) + + with backend.name_scope('block_5'): + x5_1 = _separable_conv_block( + x1, filters, (3, 3), + block_id='reduction_left4_%s' % block_id) + x5_2 = layers.MaxPooling2D( + (3, 3), + strides=(2, 2), + padding='same', + name='reduction_right5_%s' % block_id)(h) + x5 = layers.add([x5_1, x5_2], name='reduction_add4_%s' % block_id) + + x = layers.concatenate( + [x2, x3, x4, x5], + axis=channel_dim, + name='reduction_concat_%s' % block_id) + return x, ip diff --git a/keras_applications/resnet50.py b/keras_applications/resnet50.py new file mode 100644 index 0000000..ebc4860 --- /dev/null +++ b/keras_applications/resnet50.py @@ -0,0 +1,280 @@ +"""ResNet50 model for Keras. + +# Reference: + +- [Deep Residual Learning for Image Recognition]( + https://arxiv.org/abs/1512.03385) + +Adapted from code contributed by BigMoyan. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from . import get_keras_submodule + +backend = get_keras_submodule('backend') +engine = get_keras_submodule('engine') +layers = get_keras_submodule('layers') +models = get_keras_submodule('models') +keras_utils = get_keras_submodule('utils') + +from . import imagenet_utils +from .imagenet_utils import decode_predictions +from .imagenet_utils import _obtain_input_shape + + +WEIGHTS_PATH = ('https://github.com/fchollet/deep-learning-models/' + 'releases/download/v0.2/' + 'resnet50_weights_tf_dim_ordering_tf_kernels.h5') +WEIGHTS_PATH_NO_TOP = ('https://github.com/fchollet/deep-learning-models/' + 'releases/download/v0.2/' + 'resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5') + + +def identity_block(input_tensor, kernel_size, filters, stage, block): + """The identity block is the block that has no conv layer at shortcut. + + # Arguments + input_tensor: input tensor + kernel_size: default 3, the kernel size of + middle conv layer at main path + filters: list of integers, the filters of 3 conv layer at main path + stage: integer, current stage label, used for generating layer names + block: 'a','b'..., current block label, used for generating layer names + + # Returns + Output tensor for the block. + """ + filters1, filters2, filters3 = filters + if backend.image_data_format() == 'channels_last': + bn_axis = 3 + else: + bn_axis = 1 + conv_name_base = 'res' + str(stage) + block + '_branch' + bn_name_base = 'bn' + str(stage) + block + '_branch' + + x = layers.Conv2D(filters1, (1, 1), + name=conv_name_base + '2a')(input_tensor) + x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) + x = layers.Activation('relu')(x) + + x = layers.Conv2D(filters2, kernel_size, + padding='same', name=conv_name_base + '2b')(x) + x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) + x = layers.Activation('relu')(x) + + x = layers.Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x) + x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) + + x = layers.add([x, input_tensor]) + x = layers.Activation('relu')(x) + return x + + +def conv_block(input_tensor, + kernel_size, + filters, + stage, + block, + strides=(2, 2)): + """A block that has a conv layer at shortcut. + + # Arguments + input_tensor: input tensor + kernel_size: default 3, the kernel size of + middle conv layer at main path + filters: list of integers, the filters of 3 conv layer at main path + stage: integer, current stage label, used for generating layer names + block: 'a','b'..., current block label, used for generating layer names + strides: Strides for the first conv layer in the block. + + # Returns + Output tensor for the block. + + Note that from stage 3, + the first conv layer at main path is with strides=(2, 2) + And the shortcut should have strides=(2, 2) as well + """ + filters1, filters2, filters3 = filters + if backend.image_data_format() == 'channels_last': + bn_axis = 3 + else: + bn_axis = 1 + conv_name_base = 'res' + str(stage) + block + '_branch' + bn_name_base = 'bn' + str(stage) + block + '_branch' + + x = layers.Conv2D(filters1, (1, 1), strides=strides, + name=conv_name_base + '2a')(input_tensor) + x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) + x = layers.Activation('relu')(x) + + x = layers.Conv2D(filters2, kernel_size, padding='same', + name=conv_name_base + '2b')(x) + x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) + x = layers.Activation('relu')(x) + + x = layers.Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x) + x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) + + shortcut = layers.Conv2D(filters3, (1, 1), strides=strides, + name=conv_name_base + '1')(input_tensor) + shortcut = layers.BatchNormalization( + axis=bn_axis, name=bn_name_base + '1')(shortcut) + + x = layers.add([x, shortcut]) + x = layers.Activation('relu')(x) + return x + + +def ResNet50(include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000): + """Instantiates the ResNet50 architecture. + + Optionally loads weights pre-trained on ImageNet. + Note that the data format convention used by the model is + the one specified in your Keras config at `~/.keras/keras.json`. + + # Arguments + include_top: whether to include the fully-connected + layer at the top of the network. + weights: one of `None` (random initialization), + 'imagenet' (pre-training on ImageNet), + or the path to the weights file to be loaded. + input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: optional shape tuple, only to be specified + if `include_top` is False (otherwise the input shape + has to be `(224, 224, 3)` (with `channels_last` data format) + or `(3, 224, 224)` (with `channels_first` data format). + It should have exactly 3 inputs channels, + and width and height should be no smaller than 197. + E.g. `(200, 200, 3)` would be one valid value. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. + + # Returns + A Keras model instance. + + # Raises + ValueError: in case of invalid argument for `weights`, + or invalid input shape. + """ + if not (weights in {'imagenet', None} or os.path.exists(weights)): + raise ValueError('The `weights` argument should be either ' + '`None` (random initialization), `imagenet` ' + '(pre-training on ImageNet), ' + 'or the path to the weights file to be loaded.') + + if weights == 'imagenet' and include_top and classes != 1000: + raise ValueError('If using `weights` as imagenet with `include_top`' + ' as true, `classes` should be 1000') + + # Determine proper input shape + input_shape = _obtain_input_shape(input_shape, + default_size=224, + min_size=197, + data_format=backend.image_data_format(), + require_flatten=include_top, + weights=weights) + + if input_tensor is None: + img_input = layers.Input(shape=input_shape) + else: + if not backend.is_keras_tensor(input_tensor): + img_input = layers.Input(tensor=input_tensor, shape=input_shape) + else: + img_input = input_tensor + if backend.image_data_format() == 'channels_last': + bn_axis = 3 + else: + bn_axis = 1 + + x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(img_input) + x = layers.Conv2D(64, (7, 7), + strides=(2, 2), + padding='valid', + name='conv1')(x) + x = layers.BatchNormalization(axis=bn_axis, name='bn_conv1')(x) + x = layers.Activation('relu')(x) + x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x) + + x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) + x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') + x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') + + x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') + x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') + x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') + x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') + + x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') + + x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') + x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') + x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') + + x = layers.AveragePooling2D((7, 7), name='avg_pool')(x) + + if include_top: + x = layers.Flatten()(x) + x = layers.Dense(classes, activation='softmax', name='fc1000')(x) + else: + if pooling == 'avg': + x = layers.GlobalAveragePooling2D()(x) + elif pooling == 'max': + x = layers.GlobalMaxPooling2D()(x) + + # Ensure that the model takes into account + # any potential predecessors of `input_tensor`. + if input_tensor is not None: + inputs = engine.get_source_inputs(input_tensor) + else: + inputs = img_input + # Create model. + model = models.Model(inputs, x, name='resnet50') + + # load weights + if weights == 'imagenet': + if include_top: + weights_path = keras_utils.get_file( + 'resnet50_weights_tf_dim_ordering_tf_kernels.h5', + WEIGHTS_PATH, + cache_subdir='models', + md5_hash='a7b3fe01876f51b976af0dea6bc144eb') + else: + weights_path = keras_utils.get_file( + 'resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5', + WEIGHTS_PATH_NO_TOP, + cache_subdir='models', + md5_hash='a268eb855778b3df3c7506639542a6af') + model.load_weights(weights_path) + if backend.backend() == 'theano': + keras_utils.convert_all_kernels_in_model(model) + elif weights is not None: + model.load_weights(weights) + + return model diff --git a/keras_applications/vgg16.py b/keras_applications/vgg16.py new file mode 100644 index 0000000..5177025 --- /dev/null +++ b/keras_applications/vgg16.py @@ -0,0 +1,219 @@ +"""VGG16 model for Keras. + +# Reference + +- [Very Deep Convolutional Networks for Large-Scale Image Recognition]( + https://arxiv.org/abs/1409.1556) + +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from . import get_keras_submodule + +backend = get_keras_submodule('backend') +engine = get_keras_submodule('engine') +layers = get_keras_submodule('layers') +models = get_keras_submodule('models') +keras_utils = get_keras_submodule('utils') + +from . import imagenet_utils +from .imagenet_utils import decode_predictions +from .imagenet_utils import _obtain_input_shape + + +WEIGHTS_PATH = ('https://github.com/fchollet/deep-learning-models/' + 'releases/download/v0.1/' + 'vgg16_weights_tf_dim_ordering_tf_kernels.h5') +WEIGHTS_PATH_NO_TOP = ('https://github.com/fchollet/deep-learning-models/' + 'releases/download/v0.1/' + 'vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5') + + +def VGG16(include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000): + """Instantiates the VGG16 architecture. + + Optionally loads weights pre-trained on ImageNet. + Note that the data format convention used by the model is + the one specified in your Keras config at `~/.keras/keras.json`. + + # Arguments + include_top: whether to include the 3 fully-connected + layers at the top of the network. + weights: one of `None` (random initialization), + 'imagenet' (pre-training on ImageNet), + or the path to the weights file to be loaded. + input_tensor: optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: optional shape tuple, only to be specified + if `include_top` is False (otherwise the input shape + has to be `(224, 224, 3)` + (with `channels_last` data format) + or `(3, 224, 224)` (with `channels_first` data format). + It should have exactly 3 input channels, + and width and height should be no smaller than 48. + E.g. `(200, 200, 3)` would be one valid value. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. + + # Returns + A Keras model instance. + + # Raises + ValueError: in case of invalid argument for `weights`, + or invalid input shape. + """ + if not (weights in {'imagenet', None} or os.path.exists(weights)): + raise ValueError('The `weights` argument should be either ' + '`None` (random initialization), `imagenet` ' + '(pre-training on ImageNet), ' + 'or the path to the weights file to be loaded.') + + if weights == 'imagenet' and include_top and classes != 1000: + raise ValueError('If using `weights` as imagenet with `include_top`' + ' as true, `classes` should be 1000') + # Determine proper input shape + input_shape = _obtain_input_shape(input_shape, + default_size=224, + min_size=48, + data_format=backend.image_data_format(), + require_flatten=include_top, + weights=weights) + + if input_tensor is None: + img_input = layers.Input(shape=input_shape) + else: + if not backend.is_keras_tensor(input_tensor): + img_input = layers.Input(tensor=input_tensor, shape=input_shape) + else: + img_input = input_tensor + # Block 1 + x = layers.Conv2D(64, (3, 3), + activation='relu', + padding='same', + name='block1_conv1')(img_input) + x = layers.Conv2D(64, (3, 3), + activation='relu', + padding='same', + name='block1_conv2')(x) + x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) + + # Block 2 + x = layers.Conv2D(128, (3, 3), + activation='relu', + padding='same', + name='block2_conv1')(x) + x = layers.Conv2D(128, (3, 3), + activation='relu', + padding='same', + name='block2_conv2')(x) + x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) + + # Block 3 + x = layers.Conv2D(256, (3, 3), + activation='relu', + padding='same', + name='block3_conv1')(x) + x = layers.Conv2D(256, (3, 3), + activation='relu', + padding='same', + name='block3_conv2')(x) + x = layers.Conv2D(256, (3, 3), + activation='relu', + padding='same', + name='block3_conv3')(x) + x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) + + # Block 4 + x = layers.Conv2D(512, (3, 3), + activation='relu', + padding='same', + name='block4_conv1')(x) + x = layers.Conv2D(512, (3, 3), + activation='relu', + padding='same', + name='block4_conv2')(x) + x = layers.Conv2D(512, (3, 3), + activation='relu', + padding='same', + name='block4_conv3')(x) + x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x) + + # Block 5 + x = layers.Conv2D(512, (3, 3), + activation='relu', + padding='same', + name='block5_conv1')(x) + x = layers.Conv2D(512, (3, 3), + activation='relu', + padding='same', + name='block5_conv2')(x) + x = layers.Conv2D(512, (3, 3), + activation='relu', + padding='same', + name='block5_conv3')(x) + x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x) + + if include_top: + # Classification block + x = layers.Flatten(name='flatten')(x) + x = layers.Dense(4096, activation='relu', name='fc1')(x) + x = layers.Dense(4096, activation='relu', name='fc2')(x) + x = layers.Dense(classes, activation='softmax', name='predictions')(x) + else: + if pooling == 'avg': + x = layers.GlobalAveragePooling2D()(x) + elif pooling == 'max': + x = layers.GlobalMaxPooling2D()(x) + + # Ensure that the model takes into account + # any potential predecessors of `input_tensor`. + if input_tensor is not None: + inputs = engine.get_source_inputs(input_tensor) + else: + inputs = img_input + # Create model. + model = models.Model(inputs, x, name='vgg16') + + # load weights + if weights == 'imagenet': + if include_top: + weights_path = keras_utils.get_file( + 'vgg16_weights_tf_dim_ordering_tf_kernels.h5', + WEIGHTS_PATH, + cache_subdir='models', + file_hash='64373286793e3c8b2b4e3219cbf3544b') + else: + weights_path = keras_utils.get_file( + 'vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5', + WEIGHTS_PATH_NO_TOP, + cache_subdir='models', + file_hash='6d6bbae143d832006294945121d1f1fc') + model.load_weights(weights_path) + if backend.backend() == 'theano': + keras_utils.convert_all_kernels_in_model(model) + elif weights is not None: + model.load_weights(weights) + + return model diff --git a/keras_applications/vgg19.py b/keras_applications/vgg19.py new file mode 100644 index 0000000..2fda3bf --- /dev/null +++ b/keras_applications/vgg19.py @@ -0,0 +1,231 @@ +"""VGG19 model for Keras. + +# Reference + +- [Very Deep Convolutional Networks for Large-Scale Image Recognition]( + https://arxiv.org/abs/1409.1556) + +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from . import get_keras_submodule + +backend = get_keras_submodule('backend') +engine = get_keras_submodule('engine') +layers = get_keras_submodule('layers') +models = get_keras_submodule('models') +keras_utils = get_keras_submodule('utils') + +from . import imagenet_utils +from .imagenet_utils import decode_predictions +from .imagenet_utils import _obtain_input_shape + + +WEIGHTS_PATH = ('https://github.com/fchollet/deep-learning-models/' + 'releases/download/v0.1/' + 'vgg19_weights_tf_dim_ordering_tf_kernels.h5') +WEIGHTS_PATH_NO_TOP = ('https://github.com/fchollet/deep-learning-models/' + 'releases/download/v0.1/' + 'vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5') + + +def VGG19(include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000): + """Instantiates the VGG19 architecture. + + Optionally loads weights pre-trained on ImageNet. + Note that the data format convention used by the model is + the one specified in your Keras config at `~/.keras/keras.json`. + + # Arguments + include_top: whether to include the 3 fully-connected + layers at the top of the network. + weights: one of `None` (random initialization), + 'imagenet' (pre-training on ImageNet), + or the path to the weights file to be loaded. + input_tensor: optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: optional shape tuple, only to be specified + if `include_top` is False (otherwise the input shape + has to be `(224, 224, 3)` + (with `channels_last` data format) + or `(3, 224, 224)` (with `channels_first` data format). + It should have exactly 3 inputs channels, + and width and height should be no smaller than 48. + E.g. `(200, 200, 3)` would be one valid value. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. + + # Returns + A Keras model instance. + + # Raises + ValueError: in case of invalid argument for `weights`, + or invalid input shape. + """ + if not (weights in {'imagenet', None} or os.path.exists(weights)): + raise ValueError('The `weights` argument should be either ' + '`None` (random initialization), `imagenet` ' + '(pre-training on ImageNet), ' + 'or the path to the weights file to be loaded.') + + if weights == 'imagenet' and include_top and classes != 1000: + raise ValueError('If using `weights` as imagenet with `include_top`' + ' as true, `classes` should be 1000') + # Determine proper input shape + input_shape = _obtain_input_shape(input_shape, + default_size=224, + min_size=48, + data_format=backend.image_data_format(), + require_flatten=include_top, + weights=weights) + + if input_tensor is None: + img_input = layers.Input(shape=input_shape) + else: + if not backend.is_keras_tensor(input_tensor): + img_input = layers.Input(tensor=input_tensor, shape=input_shape) + else: + img_input = input_tensor + # Block 1 + x = layers.Conv2D(64, (3, 3), + activation='relu', + padding='same', + name='block1_conv1')(img_input) + x = layers.Conv2D(64, (3, 3), + activation='relu', + padding='same', + name='block1_conv2')(x) + x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) + + # Block 2 + x = layers.Conv2D(128, (3, 3), + activation='relu', + padding='same', + name='block2_conv1')(x) + x = layers.Conv2D(128, (3, 3), + activation='relu', + padding='same', + name='block2_conv2')(x) + x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) + + # Block 3 + x = layers.Conv2D(256, (3, 3), + activation='relu', + padding='same', + name='block3_conv1')(x) + x = layers.Conv2D(256, (3, 3), + activation='relu', + padding='same', + name='block3_conv2')(x) + x = layers.Conv2D(256, (3, 3), + activation='relu', + padding='same', + name='block3_conv3')(x) + x = layers.Conv2D(256, (3, 3), + activation='relu', + padding='same', + name='block3_conv4')(x) + x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) + + # Block 4 + x = layers.Conv2D(512, (3, 3), + activation='relu', + padding='same', + name='block4_conv1')(x) + x = layers.Conv2D(512, (3, 3), + activation='relu', + padding='same', + name='block4_conv2')(x) + x = layers.Conv2D(512, (3, 3), + activation='relu', + padding='same', + name='block4_conv3')(x) + x = layers.Conv2D(512, (3, 3), + activation='relu', + padding='same', + name='block4_conv4')(x) + x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x) + + # Block 5 + x = layers.Conv2D(512, (3, 3), + activation='relu', + padding='same', + name='block5_conv1')(x) + x = layers.Conv2D(512, (3, 3), + activation='relu', + padding='same', + name='block5_conv2')(x) + x = layers.Conv2D(512, (3, 3), + activation='relu', + padding='same', + name='block5_conv3')(x) + x = layers.Conv2D(512, (3, 3), + activation='relu', + padding='same', + name='block5_conv4')(x) + x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x) + + if include_top: + # Classification block + x = layers.Flatten(name='flatten')(x) + x = layers.Dense(4096, activation='relu', name='fc1')(x) + x = layers.Dense(4096, activation='relu', name='fc2')(x) + x = layers.Dense(classes, activation='softmax', name='predictions')(x) + else: + if pooling == 'avg': + x = layers.GlobalAveragePooling2D()(x) + elif pooling == 'max': + x = layers.GlobalMaxPooling2D()(x) + + # Ensure that the model takes into account + # any potential predecessors of `input_tensor`. + if input_tensor is not None: + inputs = engine.get_source_inputs(input_tensor) + else: + inputs = img_input + # Create model. + model = models.Model(inputs, x, name='vgg19') + + # load weights + if weights == 'imagenet': + if include_top: + weights_path = keras_utils.get_file( + 'vgg19_weights_tf_dim_ordering_tf_kernels.h5', + WEIGHTS_PATH, + cache_subdir='models', + file_hash='cbe5617147190e668d6c5d5026f83318') + else: + weights_path = keras_utils.get_file( + 'vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5', + WEIGHTS_PATH_NO_TOP, + cache_subdir='models', + file_hash='253f8cb515780f3b799900260a226db6') + model.load_weights(weights_path) + if backend.backend() == 'theano': + keras_utils.convert_all_kernels_in_model(model) + elif weights is not None: + model.load_weights(weights) + + return model diff --git a/keras_applications/xception.py b/keras_applications/xception.py new file mode 100644 index 0000000..e83c590 --- /dev/null +++ b/keras_applications/xception.py @@ -0,0 +1,338 @@ +"""Xception V1 model for Keras. + +On ImageNet, this model gets to a top-1 validation accuracy of 0.790 +and a top-5 validation accuracy of 0.945. + +Do note that the input image format for this model is different than for +the VGG16 and ResNet models (299x299 instead of 224x224), +and that the input preprocessing function +is also different (same as Inception V3). + +# Reference + +- [Xception: Deep Learning with Depthwise Separable Convolutions]( + https://arxiv.org/abs/1610.02357) + +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import warnings + +from . import get_keras_submodule + +backend = get_keras_submodule('backend') +engine = get_keras_submodule('engine') +layers = get_keras_submodule('layers') +models = get_keras_submodule('models') +keras_utils = get_keras_submodule('utils') + +from . import imagenet_utils +from .imagenet_utils import decode_predictions +from .imagenet_utils import _obtain_input_shape + + +TF_WEIGHTS_PATH = ( + 'https://github.com/fchollet/deep-learning-models/' + 'releases/download/v0.4/' + 'xception_weights_tf_dim_ordering_tf_kernels.h5') +TF_WEIGHTS_PATH_NO_TOP = ( + 'https://github.com/fchollet/deep-learning-models/' + 'releases/download/v0.4/' + 'xception_weights_tf_dim_ordering_tf_kernels_notop.h5') + + +def Xception(include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000): + """Instantiates the Xception architecture. + + Optionally loads weights pre-trained on ImageNet. This model can + only be used with the data format `(width, height, channels)`. + You should set `image_data_format='channels_last'` in your Keras config + located at ~/.keras/keras.json. + + Note that the default input image size for this model is 299x299. + + # Arguments + include_top: whether to include the fully-connected + layer at the top of the network. + weights: one of `None` (random initialization), + 'imagenet' (pre-training on ImageNet), + or the path to the weights file to be loaded. + input_tensor: optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: optional shape tuple, only to be specified + if `include_top` is False (otherwise the input shape + has to be `(299, 299, 3)`. + It should have exactly 3 inputs channels, + and width and height should be no smaller than 71. + E.g. `(150, 150, 3)` would be one valid value. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is True, + and if no `weights` argument is specified. + + # Returns + A Keras model instance. + + # Raises + ValueError: in case of invalid argument for `weights`, + or invalid input shape. + RuntimeError: If attempting to run this model with a + backend that does not support separable convolutions. + """ + if not (weights in {'imagenet', None} or os.path.exists(weights)): + raise ValueError('The `weights` argument should be either ' + '`None` (random initialization), `imagenet` ' + '(pre-training on ImageNet), ' + 'or the path to the weights file to be loaded.') + + if weights == 'imagenet' and include_top and classes != 1000: + raise ValueError('If using `weights` as imagenet with `include_top`' + ' as true, `classes` should be 1000') + + if backend.image_data_format() != 'channels_last': + warnings.warn('The Xception model is only available for the ' + 'input data format "channels_last" ' + '(width, height, channels). ' + 'However your settings specify the default ' + 'data format "channels_first" ' + '(channels, width, height). ' + 'You should set `image_data_format="channels_last"` ' + 'in your Keras ' + 'config located at ~/.keras/keras.json. ' + 'The model being returned right now will expect inputs ' + 'to follow the "channels_last" data format.') + backend.set_image_data_format('channels_last') + old_data_format = 'channels_first' + else: + old_data_format = None + + # Determine proper input shape + input_shape = _obtain_input_shape(input_shape, + default_size=299, + min_size=71, + data_format=backend.image_data_format(), + require_flatten=False, + weights=weights) + + if input_tensor is None: + img_input = layers.Input(shape=input_shape) + else: + if not backend.is_keras_tensor(input_tensor): + img_input = layers.Input(tensor=input_tensor, shape=input_shape) + else: + img_input = input_tensor + + x = layers.Conv2D(32, (3, 3), + strides=(2, 2), + use_bias=False, + name='block1_conv1')(img_input) + x = layers.BatchNormalization(name='block1_conv1_bn')(x) + x = layers.Activation('relu', name='block1_conv1_act')(x) + x = layers.Conv2D(64, (3, 3), use_bias=False, name='block1_conv2')(x) + x = layers.BatchNormalization(name='block1_conv2_bn')(x) + x = layers.Activation('relu', name='block1_conv2_act')(x) + + residual = layers.Conv2D(128, (1, 1), + strides=(2, 2), + padding='same', + use_bias=False)(x) + residual = layers.BatchNormalization()(residual) + + x = layers.SeparableConv2D(128, (3, 3), + padding='same', + use_bias=False, + name='block2_sepconv1')(x) + x = layers.BatchNormalization(name='block2_sepconv1_bn')(x) + x = layers.Activation('relu', name='block2_sepconv2_act')(x) + x = layers.SeparableConv2D(128, (3, 3), + padding='same', + use_bias=False, + name='block2_sepconv2')(x) + x = layers.BatchNormalization(name='block2_sepconv2_bn')(x) + + x = layers.MaxPooling2D((3, 3), + strides=(2, 2), + padding='same', + name='block2_pool')(x) + x = layers.add([x, residual]) + + residual = layers.Conv2D(256, (1, 1), strides=(2, 2), + padding='same', use_bias=False)(x) + residual = layers.BatchNormalization()(residual) + + x = layers.Activation('relu', name='block3_sepconv1_act')(x) + x = layers.SeparableConv2D(256, (3, 3), + padding='same', + use_bias=False, + name='block3_sepconv1')(x) + x = layers.BatchNormalization(name='block3_sepconv1_bn')(x) + x = layers.Activation('relu', name='block3_sepconv2_act')(x) + x = layers.SeparableConv2D(256, (3, 3), + padding='same', + use_bias=False, + name='block3_sepconv2')(x) + x = layers.BatchNormalization(name='block3_sepconv2_bn')(x) + + x = layers.MaxPooling2D((3, 3), strides=(2, 2), + padding='same', + name='block3_pool')(x) + x = layers.add([x, residual]) + + residual = layers.Conv2D(728, (1, 1), + strides=(2, 2), + padding='same', + use_bias=False)(x) + residual = layers.BatchNormalization()(residual) + + x = layers.Activation('relu', name='block4_sepconv1_act')(x) + x = layers.SeparableConv2D(728, (3, 3), + padding='same', + use_bias=False, + name='block4_sepconv1')(x) + x = layers.BatchNormalization(name='block4_sepconv1_bn')(x) + x = layers.Activation('relu', name='block4_sepconv2_act')(x) + x = layers.SeparableConv2D(728, (3, 3), + padding='same', + use_bias=False, + name='block4_sepconv2')(x) + x = layers.BatchNormalization(name='block4_sepconv2_bn')(x) + + x = layers.MaxPooling2D((3, 3), strides=(2, 2), + padding='same', + name='block4_pool')(x) + x = layers.add([x, residual]) + + for i in range(8): + residual = x + prefix = 'block' + str(i + 5) + + x = layers.Activation('relu', name=prefix + '_sepconv1_act')(x) + x = layers.SeparableConv2D(728, (3, 3), + padding='same', + use_bias=False, + name=prefix + '_sepconv1')(x) + x = layers.BatchNormalization(name=prefix + '_sepconv1_bn')(x) + x = layers.Activation('relu', name=prefix + '_sepconv2_act')(x) + x = layers.SeparableConv2D(728, (3, 3), + padding='same', + use_bias=False, + name=prefix + '_sepconv2')(x) + x = layers.BatchNormalization(name=prefix + '_sepconv2_bn')(x) + x = layers.Activation('relu', name=prefix + '_sepconv3_act')(x) + x = layers.SeparableConv2D(728, (3, 3), + padding='same', + use_bias=False, + name=prefix + '_sepconv3')(x) + x = layers.BatchNormalization(name=prefix + '_sepconv3_bn')(x) + + x = layers.add([x, residual]) + + residual = layers.Conv2D(1024, (1, 1), strides=(2, 2), + padding='same', use_bias=False)(x) + residual = layers.BatchNormalization()(residual) + + x = layers.Activation('relu', name='block13_sepconv1_act')(x) + x = layers.SeparableConv2D(728, (3, 3), + padding='same', + use_bias=False, + name='block13_sepconv1')(x) + x = layers.BatchNormalization(name='block13_sepconv1_bn')(x) + x = layers.Activation('relu', name='block13_sepconv2_act')(x) + x = layers.SeparableConv2D(1024, (3, 3), + padding='same', + use_bias=False, + name='block13_sepconv2')(x) + x = layers.BatchNormalization(name='block13_sepconv2_bn')(x) + + x = layers.MaxPooling2D((3, 3), + strides=(2, 2), + padding='same', + name='block13_pool')(x) + x = layers.add([x, residual]) + + x = layers.SeparableConv2D(1536, (3, 3), + padding='same', + use_bias=False, + name='block14_sepconv1')(x) + x = layers.BatchNormalization(name='block14_sepconv1_bn')(x) + x = layers.Activation('relu', name='block14_sepconv1_act')(x) + + x = layers.SeparableConv2D(2048, (3, 3), + padding='same', + use_bias=False, + name='block14_sepconv2')(x) + x = layers.BatchNormalization(name='block14_sepconv2_bn')(x) + x = layers.Activation('relu', name='block14_sepconv2_act')(x) + + if include_top: + x = layers.GlobalAveragePooling2D(name='avg_pool')(x) + x = layers.Dense(classes, activation='softmax', name='predictions')(x) + else: + if pooling == 'avg': + x = layers.GlobalAveragePooling2D()(x) + elif pooling == 'max': + x = layers.GlobalMaxPooling2D()(x) + + # Ensure that the model takes into account + # any potential predecessors of `input_tensor`. + if input_tensor is not None: + inputs = engine.get_source_inputs(input_tensor) + else: + inputs = img_input + # Create model. + model = models.Model(inputs, x, name='xception') + + # load weights + if weights == 'imagenet': + if include_top: + weights_path = keras_utils.get_file( + 'xception_weights_tf_dim_ordering_tf_kernels.h5', + TF_WEIGHTS_PATH, + cache_subdir='models', + file_hash='0a58e3b7378bc2990ea3b43d5981f1f6') + else: + weights_path = keras_utils.get_file( + 'xception_weights_tf_dim_ordering_tf_kernels_notop.h5', + TF_WEIGHTS_PATH_NO_TOP, + cache_subdir='models', + file_hash='b0042744bf5b25fce3cb969f33bebb97') + model.load_weights(weights_path) + if backend.backend() == 'theano': + keras_utils.convert_all_kernels_in_model(model) + elif weights is not None: + model.load_weights(weights) + + if old_data_format: + backend.set_image_data_format(old_data_format) + return model + + +def preprocess_input(x): + """Preprocesses a numpy array encoding a batch of images. + + # Arguments + x: a 4D numpy array consists of RGB values within [0, 255]. + + # Returns + Preprocessed array. + """ + return imagenet_utils.preprocess_input(x, mode='tf') diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..41c0574 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,18 @@ +# Configuration of py.test +[pytest] +addopts=-v + -n 2 + --durations=20 + +# Do not run tests in the build folder +norecursedirs= build + +# Use 85 as max line length in PEP8 test. +pep8maxlinelength=85 + +# PEP-8 The following are ignored: +# E731 do not assign a lambda expression, use a def +# E402 module level import not at top of file + +pep8ignore=* E731 \ + * E402 \ \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..61acf71 --- /dev/null +++ b/setup.py @@ -0,0 +1,55 @@ +from setuptools import setup +from setuptools import find_packages + +long_description = ''' +Keras Applications is the `applications` module of +the Keras deep learning library. +It provides model definitions and pre-trained weights for a number +of popular archictures, such as VGG16, ResNet50, Xception, MobileNet, and more. + +Read the documentation at: https://keras.io/applications/ + +Keras Applications may be imported directly +from an up-to-date installation of Keras: + +``` +from keras import applications +``` + +Keras Applications is compatible with Python 2.7-3.6 +and is distributed under the MIT license. +''' + +setup(name='Keras_Applications', + version='1.0.0', + description='Easy-to-use pre-trained deep learning models', + long_description=long_description, + author='Keras Team', + url='https://github.com/keras-team/keras-applications', + download_url='https://github.com/keras-team/' + 'keras-applications/tarball/1.0.0', + license='MIT', + install_requires=['keras>=2.1.6', + 'numpy>=1.9.1', + 'scipy>=0.14', + 'six>=1.9.0'], + extras_require={ + 'tests': ['pytest', + 'pytest-pep8', + 'pytest-xdist', + 'pytest-cov'], + }, + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Developers', + 'Intended Audience :: Education', + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: MIT License', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.6', + 'Topic :: Software Development :: Libraries', + 'Topic :: Software Development :: Libraries :: Python Modules' + ], + packages=find_packages()) diff --git a/tests/applications_test.py b/tests/applications_test.py new file mode 100644 index 0000000..82cef7d --- /dev/null +++ b/tests/applications_test.py @@ -0,0 +1,187 @@ +import pytest +import random +import os +from multiprocessing import Process, Queue + +import keras +# TODO: remove the few lines below once the Keras release +# is configured to use keras_applications +import keras_applications +keras_applications.set_keras_submodules( + backend=keras.backend, + engine=keras.engine, + layers=keras.layers, + models=keras.models, + utils=keras.utils) + +from keras_applications import densenet +from keras_applications import inception_resnet_v2 +from keras_applications import inception_v3 +from keras_applications import mobilenet +from keras_applications import mobilenet_v2 +from keras_applications import nasnet +from keras_applications import resnet50 +from keras_applications import vgg16 +from keras_applications import vgg19 +from keras_applications import xception + +from keras.utils.test_utils import keras_test +from keras import backend + + +pytestmark = pytest.mark.skipif( + os.environ.get('CORE_CHANGED', 'True') == 'False' and + os.environ.get('APP_CHANGED', 'True') == 'False', + reason='Runs only when the relevant files have been modified.') + + +MOBILENET_LIST = [(mobilenet.MobileNet, 1024), + (mobilenet_v2.MobileNetV2, 1280)] +DENSENET_LIST = [(densenet.DenseNet121, 1024), + (densenet.DenseNet169, 1664), + (densenet.DenseNet201, 1920)] +NASNET_LIST = [(nasnet.NASNetMobile, 1056), + (nasnet.NASNetLarge, 4032)] + + +def _get_output_shape(model_fn): + if backend.backend() == 'cntk': + # Create model in a subprocess so that + # the memory consumed by InceptionResNetV2 will be + # released back to the system after this test + # (to deal with OOM error on CNTK backend). + # TODO: remove the use of multiprocessing from these tests + # once a memory clearing mechanism + # is implemented in the CNTK backend. + def target(queue): + model = model_fn() + queue.put(model.output_shape) + queue = Queue() + p = Process(target=target, args=(queue,)) + p.start() + p.join() + # The error in a subprocess won't propagate + # to the main process, so we check if the model + # is successfully created by checking if the output shape + # has been put into the queue + assert not queue.empty(), 'Model creation failed.' + return queue.get_nowait() + else: + model = model_fn() + return model.output_shape + + +@keras_test +def _test_application_basic(app, last_dim=1000): + output_shape = _get_output_shape(lambda: app(weights=None)) + assert output_shape == (None, last_dim) + + +@keras_test +def _test_application_notop(app, last_dim): + output_shape = _get_output_shape( + lambda: app(weights=None, include_top=False)) + assert output_shape == (None, None, None, last_dim) + + +@keras_test +def _test_application_variable_input_channels(app, last_dim): + if backend.image_data_format() == 'channels_first': + input_shape = (1, None, None) + else: + input_shape = (None, None, 1) + output_shape = _get_output_shape( + lambda: app(weights=None, include_top=False, input_shape=input_shape)) + assert output_shape == (None, None, None, last_dim) + + if backend.image_data_format() == 'channels_first': + input_shape = (4, None, None) + else: + input_shape = (None, None, 4) + output_shape = _get_output_shape( + lambda: app(weights=None, include_top=False, input_shape=input_shape)) + assert output_shape == (None, None, None, last_dim) + + +@keras_test +def _test_app_pooling(app, last_dim): + output_shape = _get_output_shape( + lambda: app(weights=None, + include_top=False, + pooling=random.choice(['avg', 'max']))) + assert output_shape == (None, last_dim) + + +def test_resnet50(): + app = resnet50.ResNet50 + last_dim = 2048 + _test_application_basic(app) + _test_application_notop(app, last_dim) + _test_application_variable_input_channels(app, last_dim) + _test_app_pooling(app, last_dim) + + +def test_vgg(): + app = random.choice([vgg16.VGG16, vgg19.VGG19]) + last_dim = 512 + _test_application_basic(app) + _test_application_notop(app, last_dim) + _test_application_variable_input_channels(app, last_dim) + _test_app_pooling(app, last_dim) + + +def test_xception(): + app = xception.Xception + last_dim = 2048 + _test_application_basic(app) + _test_application_notop(app, last_dim) + _test_application_variable_input_channels(app, last_dim) + _test_app_pooling(app, last_dim) + + +def test_inceptionv3(): + app = inception_v3.InceptionV3 + last_dim = 2048 + _test_application_basic(app) + _test_application_notop(app, last_dim) + _test_application_variable_input_channels(app, last_dim) + _test_app_pooling(app, last_dim) + + +def test_inceptionresnetv2(): + app = inception_resnet_v2.InceptionResNetV2 + last_dim = 1536 + _test_application_basic(app) + _test_application_notop(app, last_dim) + _test_application_variable_input_channels(app, last_dim) + _test_app_pooling(app, last_dim) + + +def test_mobilenet(): + app, last_dim = random.choice(MOBILENET_LIST) + _test_application_basic(app) + _test_application_notop(app, last_dim) + _test_application_variable_input_channels(app, last_dim) + _test_app_pooling(app, last_dim) + + +def test_densenet(): + app, last_dim = random.choice(DENSENET_LIST) + _test_application_basic(app) + _test_application_notop(app, last_dim) + _test_application_variable_input_channels(app, last_dim) + _test_app_pooling(app, last_dim) + + +@pytest.mark.skipif((backend.backend() != 'tensorflow'), + reason='NASNets are supported only on TensorFlow') +def test_nasnet(): + app, last_dim = random.choice(NASNET_LIST) + _test_application_basic(app) + _test_application_notop(app, last_dim) + _test_application_variable_input_channels(app, last_dim) + _test_app_pooling(app, last_dim) + + +if __name__ == '__main__': + pytest.main([__file__]) diff --git a/tests/imagenet_utils_test.py b/tests/imagenet_utils_test.py new file mode 100644 index 0000000..a03d9c6 --- /dev/null +++ b/tests/imagenet_utils_test.py @@ -0,0 +1,252 @@ +import pytest +import numpy as np +from numpy.testing import assert_allclose + +import keras +# TODO: remove the few lines below once the Keras release +# is configured to use keras_applications +import keras_applications +keras_applications.set_keras_submodules( + backend=keras.backend, + engine=keras.engine, + layers=keras.layers, + models=keras.models, + utils=keras.utils) + +from keras_applications import imagenet_utils as utils +from keras import models +from keras import layers + + +def test_preprocess_input(): + # Test image batch with float and int image input + x = np.random.uniform(0, 255, (2, 10, 10, 3)) + xint = x.astype('int32') + assert utils.preprocess_input(x).shape == x.shape + assert utils.preprocess_input(xint).shape == xint.shape + + out1 = utils.preprocess_input(x, 'channels_last') + out1int = utils.preprocess_input(xint, 'channels_last') + out2 = utils.preprocess_input(np.transpose(x, (0, 3, 1, 2)), + 'channels_first') + out2int = utils.preprocess_input(np.transpose(xint, (0, 3, 1, 2)), + 'channels_first') + assert_allclose(out1, out2.transpose(0, 2, 3, 1)) + assert_allclose(out1int, out2int.transpose(0, 2, 3, 1)) + + # Test single image + x = np.random.uniform(0, 255, (10, 10, 3)) + xint = x.astype('int32') + assert utils.preprocess_input(x).shape == x.shape + assert utils.preprocess_input(xint).shape == xint.shape + + out1 = utils.preprocess_input(x, 'channels_last') + out1int = utils.preprocess_input(xint, 'channels_last') + out2 = utils.preprocess_input(np.transpose(x, (2, 0, 1)), + 'channels_first') + out2int = utils.preprocess_input(np.transpose(xint, (2, 0, 1)), + 'channels_first') + assert_allclose(out1, out2.transpose(1, 2, 0)) + assert_allclose(out1int, out2int.transpose(1, 2, 0)) + + # Test that writing over the input data works predictably + for mode in ['torch', 'tf']: + x = np.random.uniform(0, 255, (2, 10, 10, 3)) + xint = x.astype('int') + x2 = utils.preprocess_input(x, mode=mode) + xint2 = utils.preprocess_input(xint) + assert_allclose(x, x2) + assert xint.astype('float').max() != xint2.max() + # Caffe mode works differently from the others + x = np.random.uniform(0, 255, (2, 10, 10, 3)) + xint = x.astype('int') + x2 = utils.preprocess_input(x, data_format='channels_last', mode='caffe') + xint2 = utils.preprocess_input(xint) + assert_allclose(x, x2[..., ::-1]) + assert xint.astype('float').max() != xint2.max() + + +def test_preprocess_input_symbolic(): + # Test image batch + x = np.random.uniform(0, 255, (2, 10, 10, 3)) + inputs = layers.Input(shape=x.shape[1:]) + outputs = layers.Lambda( + utils.preprocess_input, output_shape=x.shape[1:])(inputs) + model = models.Model(inputs, outputs) + assert model.predict(x).shape == x.shape + + outputs1 = layers.Lambda( + lambda x: utils.preprocess_input(x, 'channels_last'), + output_shape=x.shape[1:])(inputs) + model1 = models.Model(inputs, outputs1) + out1 = model1.predict(x) + x2 = np.transpose(x, (0, 3, 1, 2)) + inputs2 = layers.Input(shape=x2.shape[1:]) + outputs2 = layers.Lambda( + lambda x: utils.preprocess_input(x, 'channels_first'), + output_shape=x2.shape[1:])(inputs2) + model2 = models.Model(inputs2, outputs2) + out2 = model2.predict(x2) + assert_allclose(out1, out2.transpose(0, 2, 3, 1)) + + # Test single image + x = np.random.uniform(0, 255, (10, 10, 3)) + inputs = layers.Input(shape=x.shape) + outputs = layers.Lambda( + utils.preprocess_input, output_shape=x.shape)(inputs) + model = models.Model(inputs, outputs) + assert model.predict(x[np.newaxis])[0].shape == x.shape + + outputs1 = layers.Lambda( + lambda x: utils.preprocess_input(x, 'channels_last'), + output_shape=x.shape)(inputs) + model1 = models.Model(inputs, outputs1) + out1 = model1.predict(x[np.newaxis])[0] + x2 = np.transpose(x, (2, 0, 1)) + inputs2 = layers.Input(shape=x2.shape) + outputs2 = layers.Lambda( + lambda x: utils.preprocess_input(x, 'channels_first'), + output_shape=x2.shape)(inputs2) + model2 = models.Model(inputs2, outputs2) + out2 = model2.predict(x2[np.newaxis])[0] + assert_allclose(out1, out2.transpose(1, 2, 0)) + + +def test_decode_predictions(): + x = np.zeros((2, 1000)) + x[0, 372] = 1.0 + x[1, 549] = 1.0 + outs = utils.decode_predictions(x, top=1) + scores = [out[0][2] for out in outs] + assert scores[0] == scores[1] + + # the numbers of columns and ImageNet classes are not identical. + with pytest.raises(ValueError): + utils.decode_predictions(np.ones((2, 100))) + + +def test_obtain_input_shape(): + # input_shape and default_size are not identical. + with pytest.raises(ValueError): + utils._obtain_input_shape( + input_shape=(224, 224, 3), + default_size=299, + min_size=139, + data_format='channels_last', + require_flatten=True, + weights='imagenet') + + # Test invalid use cases + for data_format in ['channels_last', 'channels_first']: + # test warning + shape = (139, 139) + if data_format == 'channels_last': + input_shape = shape + (99,) + else: + input_shape = (99,) + shape + with pytest.warns(UserWarning): + utils._obtain_input_shape( + input_shape=input_shape, + default_size=None, + min_size=139, + data_format=data_format, + require_flatten=False, + weights='fake_weights') + + # input_shape is smaller than min_size. + shape = (100, 100) + if data_format == 'channels_last': + input_shape = shape + (3,) + else: + input_shape = (3,) + shape + with pytest.raises(ValueError): + utils._obtain_input_shape( + input_shape=input_shape, + default_size=None, + min_size=139, + data_format=data_format, + require_flatten=False) + + # shape is 1D. + shape = (100,) + if data_format == 'channels_last': + input_shape = shape + (3,) + else: + input_shape = (3,) + shape + with pytest.raises(ValueError): + utils._obtain_input_shape( + input_shape=input_shape, + default_size=None, + min_size=139, + data_format=data_format, + require_flatten=False) + + # the number of channels is 5 not 3. + shape = (100, 100) + if data_format == 'channels_last': + input_shape = shape + (5,) + else: + input_shape = (5,) + shape + with pytest.raises(ValueError): + utils._obtain_input_shape( + input_shape=input_shape, + default_size=None, + min_size=139, + data_format=data_format, + require_flatten=False) + + # require_flatten=True with dynamic input shape. + with pytest.raises(ValueError): + utils._obtain_input_shape( + input_shape=None, + default_size=None, + min_size=139, + data_format='channels_first', + require_flatten=True) + + # test include top + assert utils._obtain_input_shape( + input_shape=(3, 200, 200), + default_size=None, + min_size=139, + data_format='channels_first', + require_flatten=True) == (3, 200, 200) + + assert utils._obtain_input_shape( + input_shape=None, + default_size=None, + min_size=139, + data_format='channels_last', + require_flatten=False) == (None, None, 3) + + assert utils._obtain_input_shape( + input_shape=None, + default_size=None, + min_size=139, + data_format='channels_first', + require_flatten=False) == (3, None, None) + + assert utils._obtain_input_shape( + input_shape=None, + default_size=None, + min_size=139, + data_format='channels_last', + require_flatten=False) == (None, None, 3) + + assert utils._obtain_input_shape( + input_shape=(150, 150, 3), + default_size=None, + min_size=139, + data_format='channels_last', + require_flatten=False) == (150, 150, 3) + + assert utils._obtain_input_shape( + input_shape=(3, None, None), + default_size=None, + min_size=139, + data_format='channels_first', + require_flatten=False) == (3, None, None) + + +if __name__ == '__main__': + pytest.main([__file__]) diff --git a/tests/integration_tests.py b/tests/integration_tests.py new file mode 100644 index 0000000..4640904 --- /dev/null +++ b/tests/integration_tests.py @@ -0,0 +1 @@ +# TODO