Skip to content

Commit

Permalink
Windows loadlibrary (#8217)
Browse files Browse the repository at this point in the history
* cmake versions > 3.6 will quote -D__VERSION__=\"MSVC\" as -D"__VERSION__=\"MSVC\""
which nvcc stubles over. Since this is only used by version.cc we apply this only
core/framework so nvcc will never see it.

* - support for tf.load_library() on windows
- support for linking against tensorflow.dll to access c/c++ api
- switched contrib/rnn to use .so's
- added add tests in contrib/rnn

* Address review feedback. Changes:
- Some style changes
- TF_COMPILE_LIBRARY define controls exports from the tensorflow.dll.
  If TF_COMPILE_LIBRARY is defined we compile tensorflow, if undefined
  we compile a user op.
- On windows user ops have now a .dll extension. The resource loader will
  take care of finding it.
- pywrap_tensorflow_internal.lib that is needed to build a user op on windows
  comes with the wheel

* fix linux build

* fix linux build

* keep //tensorflow/tools/test:check_futures_test happy

* add copyright
  • Loading branch information
guschmue authored and drpngx committed Mar 10, 2017
1 parent 43c2d68 commit adc0929
Show file tree
Hide file tree
Showing 11 changed files with 327 additions and 23 deletions.
3 changes: 2 additions & 1 deletion tensorflow/contrib/cmake/CMakeLists.txt
Expand Up @@ -56,9 +56,10 @@ mark_as_advanced(DOWNLOAD_LOCATION)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
add_definitions(-DEIGEN_AVOID_STL_ARRAY)
if(WIN32)
add_definitions(-DNOMINMAX -D_WIN32_WINNT=0x0A00 -DLANG_CXX11 -DCOMPILER_MSVC -D__VERSION__=\"MSVC\")
add_definitions(-DNOMINMAX -D_WIN32_WINNT=0x0A00 -DLANG_CXX11 -DCOMPILER_MSVC)
add_definitions(-DWIN32 -DOS_WIN -D_MBCS -DWIN64 -DWIN32_LEAN_AND_MEAN -DNOGDI -DPLATFORM_WINDOWS)
add_definitions(-DTENSORFLOW_USE_EIGEN_THREADPOOL -DEIGEN_HAS_C99_MATH -D_ITERATOR_DEBUG_LEVEL=0)
add_definitions(-DTF_COMPILE_LIBRARY)
add_definitions(-DNDEBUG /O2) # Equivalent of -c opt in Bazel.
add_definitions(/bigobj /nologo /EHsc /GF /FC /MP /Gm-)
# Suppress warnings to reduce build log size.
Expand Down
40 changes: 40 additions & 0 deletions tensorflow/contrib/cmake/tf_cc_ops.cmake
Expand Up @@ -120,3 +120,43 @@ list(REMOVE_ITEM tf_cc_srcs ${tf_cc_test_srcs})

add_library(tf_cc OBJECT ${tf_cc_srcs})
add_dependencies(tf_cc tf_cc_framework tf_cc_ops)

set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/pywrap_tensorflow_internal.lib")
add_custom_target(tf_extension_ops)

function(AddUserOps)
cmake_parse_arguments(_AT "" "" "TARGET;SOURCES;GPUSOURCES;DEPENDS;DISTCOPY" ${ARGN})
if (tensorflow_ENABLE_GPU AND _AT_GPUSOURCES)
# if gpu build is enabled and we have gpu specific code,
# hint to cmake that this needs to go to nvcc
set (gpu_source ${_AT_GPUSOURCES})
set (gpu_lib "${_AT_TARGET}_gpu")
set_source_files_properties(${gpu_source} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ)
cuda_compile(gpu_lib ${gpu_source})
endif()
# create shared library from source and cuda obj
add_library(${_AT_TARGET} SHARED ${_AT_SOURCES} ${gpu_lib})
target_link_libraries(${_AT_TARGET} ${pywrap_tensorflow_lib})
if(WIN32)
if (tensorflow_ENABLE_GPU AND _AT_GPUSOURCES)
# some ops call out to cuda directly; need to link libs for the cuda dlls
target_link_libraries(${_AT_TARGET} ${CUDA_LIBRARIES})
endif()
if (_AT_DISTCOPY)
add_custom_command(TARGET ${_AT_TARGET} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:${_AT_TARGET}> ${_AT_DISTCOPY}/)
endif()
endif()
if (_AT_DEPENDS)
add_dependencies(${_AT_TARGET} ${_AT_DEPENDS})
endif()
# make sure TF_COMPILE_LIBRARY is not defined for this target
get_target_property(target_compile_flags ${_AT_TARGET} COMPILE_FLAGS)
if(target_compile_flags STREQUAL "target_compile_flags-NOTFOUND")
set(target_compile_flags "/UTF_COMPILE_LIBRARY")
else()
set(target_compile_flags "${target_compile_flags} /UTF_COMPILE_LIBRARY")
endif()
set_target_properties(${_AT_TARGET} PROPERTIES COMPILE_FLAGS ${target_compile_flags})
add_dependencies(tf_extension_ops ${_AT_TARGET})
endfunction(AddUserOps)
7 changes: 6 additions & 1 deletion tensorflow/contrib/cmake/tf_core_framework.cmake
Expand Up @@ -198,7 +198,6 @@ add_custom_command(OUTPUT
COMMAND ${PYTHON_EXECUTABLE} ${tensorflow_source_dir}/tensorflow/tools/git/gen_git_source.py
--raw_generate ${VERSION_INFO_CC}
DEPENDS __force_rebuild)

set(tf_version_srcs ${tensorflow_source_dir}/tensorflow/core/util/version_info.cc)

########################################################
Expand Down Expand Up @@ -237,3 +236,9 @@ add_dependencies(tf_core_framework
tf_core_lib
proto_text
)

if(WIN32)
# Cmake > 3.6 will quote this as -D"__VERSION__=\"MSVC\"" which nvcc fails on.
# Instead of defining this global, limit it to tf_core_framework where its used.
target_compile_definitions(tf_core_framework PRIVATE __VERSION__="MSVC")
endif()
6 changes: 6 additions & 0 deletions tensorflow/contrib/cmake/tf_core_kernels.cmake
Expand Up @@ -93,6 +93,12 @@ if(WIN32)
"${tensorflow_source_dir}/tensorflow/core/kernels/meta_support.*"
"${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.h"
"${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.cc"
# no in tensorflow.dll - comes from .so
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/blas_gemm.cc"
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/gru_ops.cc"
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/lstm_ops.cc"
"${tensorflow_source_dir}/tensorflow/contrib/rnn/ops/gru_ops.cc"
"${tensorflow_source_dir}/tensorflow/contrib/rnn/ops/lstm_ops.cc"
)
list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_windows_exclude_srcs})
endif(WIN32)
Expand Down
107 changes: 99 additions & 8 deletions tensorflow/contrib/cmake/tf_python.cmake 100644 → 100755
Expand Up @@ -568,12 +568,7 @@ add_custom_command(
COMMENT "Running SWIG to generate Python wrappers"
VERBATIM )

# pywrap_tensorflow_internal is a shared library containing all of the
# TensorFlow runtime and the standard ops and kernels. These are installed into
# tf_python/tensorflow/python/.
# TODO(mrry): Refactor this to expose a framework library that
# facilitates `tf.load_op_library()`.
add_library(pywrap_tensorflow_internal SHARED
set (pywrap_tensorflow_internal_src
"${tensorflow_source_dir}/tensorflow/python/client/tf_session_helper.h"
"${tensorflow_source_dir}/tensorflow/python/client/tf_session_helper.cc"
"${tensorflow_source_dir}/tensorflow/python/framework/cpp_shape_inference.h"
Expand All @@ -597,6 +592,55 @@ add_library(pywrap_tensorflow_internal SHARED
"${tensorflow_source_dir}/tensorflow/c/tf_status_helper.cc"
"${tensorflow_source_dir}/tensorflow/c/tf_status_helper.h"
"${CMAKE_CURRENT_BINARY_DIR}/pywrap_tensorflow_internal.cc"
)

if(WIN32)
# Windows: build a static library with the same objects as tensorflow.dll.
# This can be used to build for a standalone exe and also helps us to
# find all symbols that need to be exported from the dll which is needed
# to provide the tensorflow c/c++ api in tensorflow.dll.
# From the static library we create the def file with all symbols that need to
# be exported from tensorflow.dll. Because there is a limit of 64K sybmols
# that can be exported, we filter the symbols with a python script to the namespaces
# we need.
#
add_library(pywrap_tensorflow_internal_static STATIC
${pywrap_tensorflow_internal_src}
$<TARGET_OBJECTS:tf_core_lib>
$<TARGET_OBJECTS:tf_core_cpu>
$<TARGET_OBJECTS:tf_core_framework>
$<TARGET_OBJECTS:tf_core_ops>
$<TARGET_OBJECTS:tf_core_direct_session>
$<TARGET_OBJECTS:tf_tools_transform_graph_lib>
$<$<BOOL:${tensorflow_ENABLE_GRPC_SUPPORT}>:$<TARGET_OBJECTS:tf_core_distributed_runtime>>
$<TARGET_OBJECTS:tf_core_kernels>
$<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>
$<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_stream_executor>>
)
target_include_directories(pywrap_tensorflow_internal_static PUBLIC
${PYTHON_INCLUDE_DIR}
${NUMPY_INCLUDE_DIR}
)
target_link_libraries(pywrap_tensorflow_internal_static
tf_protos_cc
tf_python_protos_cc
)
set(pywrap_tensorflow_deffile "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/pywrap_tensorflow.def")
set_source_files_properties(${pywrap_tensorflow_deffile} PROPERTIES GENERATED TRUE)

add_custom_command(TARGET pywrap_tensorflow_internal_static POST_BUILD
COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/tools/create_def_file.py
--input $<TARGET_FILE:pywrap_tensorflow_internal_static>
--output ${pywrap_tensorflow_deffile}
)
endif(WIN32)


# pywrap_tensorflow_internal is a shared library containing all of the
# TensorFlow runtime and the standard ops and kernels. These are installed into
# tf_python/tensorflow/python/.
add_library(pywrap_tensorflow_internal SHARED
${pywrap_tensorflow_internal_src}
$<TARGET_OBJECTS:tf_core_lib>
$<TARGET_OBJECTS:tf_core_cpu>
$<TARGET_OBJECTS:tf_core_framework>
Expand All @@ -607,7 +651,13 @@ add_library(pywrap_tensorflow_internal SHARED
$<TARGET_OBJECTS:tf_core_kernels>
$<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>
$<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_stream_executor>>
${pywrap_tensorflow_deffile}
)

if(WIN32)
add_dependencies(pywrap_tensorflow_internal pywrap_tensorflow_internal_static)
endif(WIN32)

target_include_directories(pywrap_tensorflow_internal PUBLIC
${PYTHON_INCLUDE_DIR}
${NUMPY_INCLUDE_DIR}
Expand All @@ -620,6 +670,44 @@ target_link_libraries(pywrap_tensorflow_internal
${PYTHON_LIBRARIES}
)

if(WIN32)
# include contrib/rnn as .so
#
set(tf_gru_srcs
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/blas_gemm.cc"
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/blas_gemm.h"
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/gru_ops.cc"
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/gru_ops.h"
"${tensorflow_source_dir}/tensorflow/contrib/rnn/ops/gru_ops.cc"
)
set(tf_gru_gpu_srcs
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/gru_ops_gpu.cu.cc"
)

set(tf_lstm_srcs
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/blas_gemm.cc"
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/blas_gemm.h"
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/lstm_ops.cc"
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/lstm_ops.h"
"${tensorflow_source_dir}/tensorflow/contrib/rnn/ops/lstm_ops.cc"
)
set(tf_lstm_gpu_srcs
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/lstm_ops_gpu.cu.cc"
)

AddUserOps(TARGET _gru_ops
SOURCES "${tf_gru_srcs}"
GPUSOURCES ${tf_gru_gpu_srcs}
DEPENDS pywrap_tensorflow_internal tf_python_ops
DISTCOPY ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/rnn/python/ops/)

AddUserOps(TARGET _lstm_ops
SOURCES "${tf_lstm_srcs}"
GPUSOURCES ${tf_lstm_gpu_srcs}
DEPENDS pywrap_tensorflow_internal tf_python_ops
DISTCOPY ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/rnn/python/ops/)
endif(WIN32)

############################################################
# Build a PIP package containing the TensorFlow runtime.
############################################################
Expand All @@ -629,14 +717,17 @@ add_dependencies(tf_python_build_pip_package
tensorboard_copy_dependencies
tf_python_copy_scripts_to_destination
tf_python_touchup_modules
tf_python_ops)
tf_python_ops
tf_extension_ops)
add_custom_command(TARGET tf_python_build_pip_package POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy ${tensorflow_source_dir}/tensorflow/tools/pip_package/setup.py
${CMAKE_CURRENT_BINARY_DIR}/tf_python/)
if(WIN32)
add_custom_command(TARGET tf_python_build_pip_package POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/pywrap_tensorflow_internal.dll
${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/_pywrap_tensorflow_internal.pyd)
${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/_pywrap_tensorflow_internal.pyd
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/pywrap_tensorflow_internal.lib
${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/)
else()
add_custom_command(TARGET tf_python_build_pip_package POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow_internal.so
Expand Down
7 changes: 7 additions & 0 deletions tensorflow/contrib/cmake/tf_tests.cmake
Expand Up @@ -115,7 +115,14 @@ if (tensorflow_BUILD_PYTHON_TESTS)
#

# include all test
if (WIN32)
file(GLOB_RECURSE tf_test_rnn_src_py
"${tensorflow_source_dir}/tensorflow/contrib/rnn/python/kernel_tests/*_test.py"
)
endif()

file(GLOB_RECURSE tf_test_src_py
${tf_test_rnn_src_py}
"${tensorflow_source_dir}/tensorflow/python/debug/cli/*_test.py"
"${tensorflow_source_dir}/tensorflow/python/debug/lib/*_test.py"
"${tensorflow_source_dir}/tensorflow/python/debug/wrappers/*_test.py"
Expand Down
134 changes: 134 additions & 0 deletions tensorflow/contrib/cmake/tools/create_def_file.py
@@ -0,0 +1,134 @@
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""
create_def_file.py - tool to create a windows def file to export
symbols from tensorflow.dll to enable tf.load_library().
Because the linker allows only 64K symbols to be exported per dll
we filter the symbols down to the essentials. The regular expressions
we use for this are specific to tensorflow.
TODO: this works fine but there is an issue with exporting
'const char * const' and importing it from a user_ops. The problem is
on the importing end and using __declspec(dllimport) works around it.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import io
import os
import re
import sys
import tempfile
from subprocess import Popen, PIPE

# External tools we use that come with visual studio sdk and
# we assume that the caller has the correct PATH to the sdk
UNDNAME = "undname.exe"
DUMPBIN = "dumpbin.exe"

# Exclude if matched
EXCLUDE_RE = re.compile(r"deleting destructor|::internal::")

# Include if matched before exclude
INCLUDEPRE_RE = re.compile(r"tensorflow::internal::LogMessage|" +
r"tensorflow::internal::CheckOpMessageBuilder")

# Include if matched after exclude
INCLUDE_RE = re.compile(r"^(TF_\w*)$|" +
r"tensorflow::|" +
r"functor::|" +
r"perftools::gputools")


def get_args():
"""Parse command line."""
parser = argparse.ArgumentParser()
parser.add_argument("--input", help="input library", required=True)
parser.add_argument("--output", help="output deffile", required=True)
args = parser.parse_args()
return args


def main():
"""main."""
args = get_args()

# Pipe dumpbin to extract all linkable symbols from a lib.
# Good symbols are collected in candidates and also written to
# a temp file.
candidates = []
tmpfile = tempfile.NamedTemporaryFile(mode="w", delete=False)
proc = Popen([DUMPBIN, "/nologo", "/linkermember:1", args.input], stdout=PIPE)
for line in io.TextIOWrapper(proc.stdout, encoding="utf-8"):
cols = line.split()
if len(cols) < 2:
continue
sym = cols[1]
tmpfile.file.write(sym + "\n")
candidates.append(sym)
tmpfile.file.close()
exit_code = proc.wait()
if exit_code != 0:
print("{} failed, exit={}".format(DUMPBIN, exit_code))
return exit_code

# Run the symbols through undname to get their undecorated name
# so we can filter on something readable.
with open(args.output, "w") as def_fp:
# track dupes
taken = set()

# Header for the def file. Since the tensorflow.dll is actually called
# _pywrap_tensorflow.pyd in the python wheel, hint that in the def file.
def_fp.write("LIBRARY _pywrap_tensorflow_internal.pyd\n")
def_fp.write("EXPORTS\n")
def_fp.write("\t ??1OpDef@tensorflow@@UEAA@XZ\n")

# Each symbols returned by undname matches the same position in candidates.
# We compare on undname but use the decorated name from candidates.
dupes = 0
proc = Popen([UNDNAME, tmpfile.name], stdout=PIPE)
for idx, line in enumerate(io.TextIOWrapper(proc.stdout, encoding="utf-8")):
decorated = candidates[idx]
if decorated in taken:
# Symbol is already in output, done.
dupes += 1
continue

if not INCLUDEPRE_RE.search(line):
if EXCLUDE_RE.search(line):
continue
if not INCLUDE_RE.search(line):
continue

def_fp.write("\t" + decorated + "\n")
taken.add(decorated)
exit_code = proc.wait()
if exit_code != 0:
print("{} failed, exit={}".format(UNDNAME, exit_code))
return exit_code

os.unlink(tmpfile.name)

print("symbols={}, taken={}, dupes={}"
.format(len(candidates), len(taken), dupes))
return 0


if __name__ == "__main__":
sys.exit(main())

0 comments on commit adc0929

Please sign in to comment.