Skip to content

Commit

Permalink
Reduce memory usage and increase performance for convolution on iOS (#…
Browse files Browse the repository at this point in the history
…3778)

* Reduce memory usage and increase performance for convolution on iOS

* Switched to persistent memory allocation, added documentation, and make all the implementations functors

* Updated conv kernel based on review comments
  • Loading branch information
petewarden authored and rmlarsen committed Aug 23, 2016
1 parent 6d04d60 commit 459c2fe
Show file tree
Hide file tree
Showing 8 changed files with 653 additions and 0 deletions.
3 changes: 3 additions & 0 deletions tensorflow/contrib/ios_examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ rundown:
inside the library are not stripped out. To the linker, they can appear
unused because no other code references the variables, but in fact their
constructors have the important side effect of registering the class.

- You'll need to include the Accelerate framework in the "Link Binary with
Libraries" build phase of your project.

- C++11 support (or later) should be enabled by setting `C++ Language Dialect` to
`GNU++11` (or `GNU++14`), and `C++ Standard Library` to `libc++`.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
/* Begin PBXBuildFile section */
590E7D881D02091F00DF5523 /* libprotobuf-lite.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 590E7D861D02091F00DF5523 /* libprotobuf-lite.a */; };
590E7D8A1D0209DD00DF5523 /* libprotobuf.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 590E7D871D02091F00DF5523 /* libprotobuf.a */; };
5993C7701D5D4E7F0048CE6A /* Accelerate.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 5993C76F1D5D4E7F0048CE6A /* Accelerate.framework */; };
59A3D0011CF4E68100C4259F /* AppDelegate.mm in Sources */ = {isa = PBXBuildFile; fileRef = 59A3CFF21CF4E68100C4259F /* AppDelegate.mm */; };
59A3D0031CF4E68100C4259F /* grace_hopper.jpg in Resources */ = {isa = PBXBuildFile; fileRef = 59A3CFF51CF4E68100C4259F /* grace_hopper.jpg */; };
59A3D0051CF4E68100C4259F /* imagenet_comp_graph_label_strings.txt in Resources */ = {isa = PBXBuildFile; fileRef = 59A3CFF71CF4E68100C4259F /* imagenet_comp_graph_label_strings.txt */; };
Expand All @@ -25,6 +26,7 @@
590E7D861D02091F00DF5523 /* libprotobuf-lite.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = "libprotobuf-lite.a"; path = "../../makefile/gen/protobuf_ios/lib/libprotobuf-lite.a"; sourceTree = "<group>"; };
590E7D871D02091F00DF5523 /* libprotobuf.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libprotobuf.a; path = ../../makefile/gen/protobuf_ios/lib/libprotobuf.a; sourceTree = "<group>"; };
5911579B1CF4011C00C31E3A /* benchmark.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = benchmark.app; sourceTree = BUILT_PRODUCTS_DIR; };
5993C76F1D5D4E7F0048CE6A /* Accelerate.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Accelerate.framework; path = System/Library/Frameworks/Accelerate.framework; sourceTree = SDKROOT; };
59A3CFF11CF4E68100C4259F /* AppDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = "<group>"; };
59A3CFF21CF4E68100C4259F /* AppDelegate.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = AppDelegate.mm; sourceTree = "<group>"; };
59A3CFF41CF4E68100C4259F /* cropped_panda.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = cropped_panda.jpg; sourceTree = "<group>"; };
Expand All @@ -50,6 +52,7 @@
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
5993C7701D5D4E7F0048CE6A /* Accelerate.framework in Frameworks */,
590E7D8A1D0209DD00DF5523 /* libprotobuf.a in Frameworks */,
590E7D881D02091F00DF5523 /* libprotobuf-lite.a in Frameworks */,
59A3D0181CF4E86100C4259F /* UIKit.framework in Frameworks */,
Expand All @@ -63,6 +66,7 @@
591157921CF4011C00C31E3A = {
isa = PBXGroup;
children = (
5993C76F1D5D4E7F0048CE6A /* Accelerate.framework */,
590E7D861D02091F00DF5523 /* libprotobuf-lite.a */,
590E7D871D02091F00DF5523 /* libprotobuf.a */,
59A3D0171CF4E86100C4259F /* UIKit.framework */,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
592FF90D18EDD0DA00C164F8 /* MainStoryboard_iPhone.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 592FF90A18EDD0DA00C164F8 /* MainStoryboard_iPhone.storyboard */; };
592FF92518EE240200C164F8 /* CameraExampleAppDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 592FF92218EE240200C164F8 /* CameraExampleAppDelegate.m */; };
592FF92618EE240200C164F8 /* CameraExampleViewController.mm in Sources */ = {isa = PBXBuildFile; fileRef = 592FF92418EE240200C164F8 /* CameraExampleViewController.mm */; };
5993C7721D5D4E980048CE6A /* Accelerate.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 5993C7711D5D4E980048CE6A /* Accelerate.framework */; };
/* End PBXBuildFile section */

/* Begin PBXFileReference section */
Expand Down Expand Up @@ -52,13 +53,15 @@
592FF92218EE240200C164F8 /* CameraExampleAppDelegate.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = CameraExampleAppDelegate.m; sourceTree = SOURCE_ROOT; };
592FF92318EE240200C164F8 /* CameraExampleViewController.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CameraExampleViewController.h; sourceTree = SOURCE_ROOT; };
592FF92418EE240200C164F8 /* CameraExampleViewController.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = CameraExampleViewController.mm; sourceTree = SOURCE_ROOT; };
5993C7711D5D4E980048CE6A /* Accelerate.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Accelerate.framework; path = Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS9.3.sdk/System/Library/Frameworks/Accelerate.framework; sourceTree = DEVELOPER_DIR; };
/* End PBXFileReference section */

/* Begin PBXFrameworksBuildPhase section */
592FF8B218ECBD7600C164F8 /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
5993C7721D5D4E980048CE6A /* Accelerate.framework in Frameworks */,
591D3EDF1CFFAD230059011C /* libprotobuf-lite.a in Frameworks */,
591D3EE01CFFAD230059011C /* libprotobuf.a in Frameworks */,
591D3ECF1CFF7FCE0059011C /* ImageIO.framework in Frameworks */,
Expand Down Expand Up @@ -103,6 +106,7 @@
592FF8B718ECBD7600C164F8 /* Frameworks */ = {
isa = PBXGroup;
children = (
5993C7711D5D4E980048CE6A /* Accelerate.framework */,
591D3EDD1CFFAD230059011C /* libprotobuf-lite.a */,
591D3EDE1CFFAD230059011C /* libprotobuf.a */,
591D3ECE1CFF7FCE0059011C /* ImageIO.framework */,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
/* Begin PBXBuildFile section */
590E7D881D02091F00DF5523 /* libprotobuf-lite.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 590E7D861D02091F00DF5523 /* libprotobuf-lite.a */; };
590E7D8A1D0209DD00DF5523 /* libprotobuf.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 590E7D871D02091F00DF5523 /* libprotobuf.a */; };
5993C7741D5D4EAF0048CE6A /* Accelerate.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 5993C7731D5D4EAF0048CE6A /* Accelerate.framework */; };
59A3D0011CF4E68100C4259F /* AppDelegate.mm in Sources */ = {isa = PBXBuildFile; fileRef = 59A3CFF21CF4E68100C4259F /* AppDelegate.mm */; };
59A3D0031CF4E68100C4259F /* grace_hopper.jpg in Resources */ = {isa = PBXBuildFile; fileRef = 59A3CFF51CF4E68100C4259F /* grace_hopper.jpg */; };
59A3D0051CF4E68100C4259F /* imagenet_comp_graph_label_strings.txt in Resources */ = {isa = PBXBuildFile; fileRef = 59A3CFF71CF4E68100C4259F /* imagenet_comp_graph_label_strings.txt */; };
Expand All @@ -25,6 +26,7 @@
590E7D861D02091F00DF5523 /* libprotobuf-lite.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = "libprotobuf-lite.a"; path = "../../makefile/gen/protobuf_ios/lib/libprotobuf-lite.a"; sourceTree = "<group>"; };
590E7D871D02091F00DF5523 /* libprotobuf.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libprotobuf.a; path = ../../makefile/gen/protobuf_ios/lib/libprotobuf.a; sourceTree = "<group>"; };
5911579B1CF4011C00C31E3A /* tf_ios_makefile_example.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = tf_ios_makefile_example.app; sourceTree = BUILT_PRODUCTS_DIR; };
5993C7731D5D4EAF0048CE6A /* Accelerate.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Accelerate.framework; path = System/Library/Frameworks/Accelerate.framework; sourceTree = SDKROOT; };
59A3CFF11CF4E68100C4259F /* AppDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = "<group>"; };
59A3CFF21CF4E68100C4259F /* AppDelegate.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = AppDelegate.mm; sourceTree = "<group>"; };
59A3CFF41CF4E68100C4259F /* cropped_panda.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = cropped_panda.jpg; sourceTree = "<group>"; };
Expand All @@ -50,6 +52,7 @@
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
5993C7741D5D4EAF0048CE6A /* Accelerate.framework in Frameworks */,
590E7D8A1D0209DD00DF5523 /* libprotobuf.a in Frameworks */,
590E7D881D02091F00DF5523 /* libprotobuf-lite.a in Frameworks */,
59A3D0181CF4E86100C4259F /* UIKit.framework in Frameworks */,
Expand All @@ -63,6 +66,7 @@
591157921CF4011C00C31E3A = {
isa = PBXGroup;
children = (
5993C7731D5D4EAF0048CE6A /* Accelerate.framework */,
590E7D861D02091F00DF5523 /* libprotobuf-lite.a */,
590E7D871D02091F00DF5523 /* libprotobuf.a */,
59A3D0171CF4E86100C4259F /* UIKit.framework */,
Expand Down
10 changes: 10 additions & 0 deletions tensorflow/contrib/makefile/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@ ifeq ($(TARGET),IOS)
CXXFLAGS += -miphoneos-version-min=$(MIN_SDK_VERSION) \
-arch armv7 \
-D__thread= \
-DUSE_GEMM_FOR_CONV \
-Wno-c++11-narrowing \
-mno-thumb \
-DTF_LEAN_BINARY \
Expand All @@ -294,6 +295,7 @@ ifeq ($(TARGET),IOS)
${IPHONEOS_SYSROOT}
LDFLAGS := -arch armv7 \
-miphoneos-version-min=${MIN_SDK_VERSION} \
-framework Accelerate \
-Xlinker -S \
-Xlinker -x \
-Xlinker -dead_strip \
Expand All @@ -305,6 +307,7 @@ ifeq ($(TARGET),IOS)
CXXFLAGS += -miphoneos-version-min=$(MIN_SDK_VERSION) \
-arch armv7s \
-D__thread= \
-DUSE_GEMM_FOR_CONV \
-Wno-c++11-narrowing \
-mno-thumb \
-DTF_LEAN_BINARY \
Expand All @@ -315,6 +318,7 @@ ifeq ($(TARGET),IOS)
${IPHONEOS_SYSROOT}
LDFLAGS := -arch armv7s \
-miphoneos-version-min=${MIN_SDK_VERSION} \
-framework Accelerate \
-Xlinker -S \
-Xlinker -x \
-Xlinker -dead_strip \
Expand All @@ -326,6 +330,7 @@ ifeq ($(TARGET),IOS)
CXXFLAGS += -miphoneos-version-min=$(MIN_SDK_VERSION) \
-arch arm64 \
-D__thread= \
-DUSE_GEMM_FOR_CONV \
-Wno-c++11-narrowing \
-DTF_LEAN_BINARY \
-D__ANDROID_TYPES_SLIM__ \
Expand All @@ -335,6 +340,7 @@ ifeq ($(TARGET),IOS)
${IPHONEOS_SYSROOT}
LDFLAGS := -arch arm64 \
-miphoneos-version-min=${MIN_SDK_VERSION} \
-framework Accelerate \
-Xlinker -S \
-Xlinker -x \
-Xlinker -dead_strip \
Expand All @@ -346,6 +352,7 @@ ifeq ($(TARGET),IOS)
CXXFLAGS += -mios-simulator-version-min=$(MIN_SDK_VERSION) \
-arch i386 \
-D__thread= \
-DUSE_GEMM_FOR_CONV \
-Wno-c++11-narrowing \
-DTF_LEAN_BINARY \
-D__ANDROID_TYPES_SLIM__ \
Expand All @@ -355,6 +362,7 @@ ifeq ($(TARGET),IOS)
${IPHONESIMULATOR_SYSROOT}
LDFLAGS := -arch i386 \
-mios-simulator-version-min=${MIN_SDK_VERSION} \
-framework Accelerate \
-Xlinker -S \
-Xlinker -x \
-Xlinker -dead_strip \
Expand All @@ -366,6 +374,7 @@ ifeq ($(TARGET),IOS)
CXXFLAGS += -mios-simulator-version-min=$(MIN_SDK_VERSION) \
-arch x86_64 \
-D__thread= \
-DUSE_GEMM_FOR_CONV \
-Wno-c++11-narrowing \
-DTF_LEAN_BINARY \
-D__ANDROID_TYPES_SLIM__ \
Expand All @@ -375,6 +384,7 @@ ifeq ($(TARGET),IOS)
${IPHONESIMULATOR_SYSROOT}
LDFLAGS := -arch x86_64 \
-mios-simulator-version-min=${MIN_SDK_VERSION} \
-framework Accelerate \
-Xlinker -S \
-Xlinker -x \
-Xlinker -dead_strip \
Expand Down
1 change: 1 addition & 0 deletions tensorflow/contrib/makefile/tf_op_files.txt
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ tensorflow/core/kernels/cwise_op_equal_to.cc
tensorflow/core/kernels/cwise_op_div.cc
tensorflow/core/kernels/cwise_op_add.cc
tensorflow/core/kernels/ctc_decoder_ops.cc
tensorflow/core/kernels/conv_ops_using_gemm.cc
tensorflow/core/kernels/conv_ops.cc
tensorflow/core/kernels/conv_grad_ops.cc
tensorflow/core/kernels/control_flow_ops.cc
Expand Down
4 changes: 4 additions & 0 deletions tensorflow/core/kernels/conv_ops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -242,8 +242,12 @@ class Conv2DOp : public BinaryOp<T> {
Name("Conv2D").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
Conv2DOp<CPUDevice, T>);

// If we're using the alternative GEMM-based implementation of Conv2D for the
// CPU implementation, don't register this EigenTensor-based version.
#if !defined(USE_GEMM_FOR_CONV)
TF_CALL_half(REGISTER_CPU);
TF_CALL_float(REGISTER_CPU);
#endif // USE_GEMM_FOR_CONV

// To be used inside depthwise_conv_op.cc.
template class LaunchConv2DOp<CPUDevice, float>;
Expand Down

0 comments on commit 459c2fe

Please sign in to comment.