diff --git a/.kokorun/io_cpu.sh b/.kokorun/io_cpu.sh index 2fe4f38d3..e8b22c270 100755 --- a/.kokorun/io_cpu.sh +++ b/.kokorun/io_cpu.sh @@ -56,6 +56,8 @@ docker run -i --rm -v $PWD:/v -w /v --net=host \ -e BAZEL_OPTIMIZATION="${BAZEL_OPTIMIZATION}" \ gcr.io/tensorflow-testing/nosla-ubuntu16.04-manylinux2010@sha256:3a9b4820021801b1fa7d0592c1738483ac7abc209fc6ee8c9ef06cf2eab2d170 /v/.github/workflows/build.bazel.sh +sudo chown -R $(id -nu):$(id -ng) . + docker run -i --rm --user $(id -u):$(id -g) -v /etc/password:/etc/password -v $PWD:/v -w /v --net=host \ python:${PYTHON_VERSION}-slim python setup.py --data build -q bdist_wheel diff --git a/BUILD b/BUILD.bazel similarity index 100% rename from BUILD rename to BUILD.bazel diff --git a/README.md b/README.md index 2bcf394e1..da8d84152 100644 --- a/README.md +++ b/README.md @@ -288,6 +288,34 @@ TFIO_DATAPATH=bazel-bin \ 'python3 -m pytest -s -v tests/test_serialization_eager.py' ``` +#### Python Wheels + +It is possible to build python wheels after bazel build is complete with the following command: +``` +python3 setup.py bdist_wheel --data bazel-bin +``` +The whl file is will be available in dist directory. Note the bazel binary directory `bazel-bin` +has to be passed with `--data` args in order for setup.py to locate the necessary share objects, +as `bazel-bin` is outside of the `tensorflow_io` package directory. + +Alternatively, source install could be done with: +``` +TFIO_DATAPATH=bazel-bin python3 -m pip install . +``` +with `TFIO_DATAPATH=bazel-bin` passed for the same readon. + +Note installing with `-e` is different from the above. The +``` +TFIO_DATAPATH=bazel-bin python3 -m pip install -e . +``` +will not install shared object automatically even with `TFIO_DATAPATH=bazel-bin`. Instead, +`TFIO_DATAPATH=bazel-bin` has to be passed everytime the program is run after the install: +``` +TFIO_DATAPATH=bazel-bin python3 +# import tensorflow_io as tfio +# ... +``` + #### Docker For Python development, a reference Dockerfile [here](tools/dev/Dockerfile) can be diff --git a/setup.py b/setup.py index 0a53bfc07..74a891891 100644 --- a/setup.py +++ b/setup.py @@ -13,274 +13,135 @@ # limitations under the License. # ============================================================================== """Setup for pip package.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import os import sys import shutil import tempfile import fnmatch +import setuptools -content = """ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -\"\"\"Setup for pip package.\"\"\" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import sys - -from setuptools import Command -from setuptools import find_packages -from setuptools import setup -from setuptools.command.install import install as InstallCommandBase -from setuptools.dist import Distribution - -REQUIRED_PACKAGES = [ - '{}', -] -__version__ = '{}' -project_name = '{}' - -class BinaryDistribution(Distribution): - \"\"\"This class is needed in order to create OS specific wheels.\"\"\" - - def has_ext_modules(self): - return True - -class InstallCommand(InstallCommandBase): - \"\"\"Override the dir where the headers go.\"\"\" - - def finalize_options(self): - ret = InstallCommandBase.finalize_options(self) - self.install_headers = os.path.join(self.install_purelib, 'tensorflow_core', - 'include') - self.install_lib = self.install_platlib - return ret - - -class InstallHeaders(Command): - \"\"\"Override how headers are copied. - - The install_headers that comes with setuptools copies all files to - the same directory. But we need the files to be in a specific directory - hierarchy for -I to work correctly. - \"\"\" - description = 'install C/C++ header files' - - user_options = [('install-dir=', 'd', - 'directory to install header files to'), - ('force', 'f', - 'force installation (overwrite existing files)'), - ] - - boolean_options = ['force'] - - def initialize_options(self): - self.install_dir = None - self.force = 0 - self.outfiles = [] - - def finalize_options(self): - self.set_undefined_options('install', - ('install_headers', 'install_dir'), - ('force', 'force')) - - def mkdir_and_copy_file(self, header): - install_dir = os.path.join(self.install_dir, os.path.dirname(header)) - # Get rid of some extra intervening directories so we can have fewer - # directories for -I - install_dir = re.sub('/google/protobuf_archive/src', '', install_dir) - install_dir = re.sub('/include/tensorflow_core/', '/include/tensorflow/', - install_dir) - - # Copy external code headers into tensorflow_core/include. - # A symlink would do, but the wheel file that gets created ignores - # symlink within the directory hierarchy. - # NOTE(keveman): Figure out how to customize bdist_wheel package so - # we can do the symlink. - external_header_locations = [ - 'tensorflow_core/include/external/eigen_archive/', - 'tensorflow_core/include/external/com_google_absl/', - ] - for location in external_header_locations: - if location in install_dir: - extra_dir = install_dir.replace(location, '') - if not os.path.exists(extra_dir): - self.mkpath(extra_dir) - self.copy_file(header, extra_dir) - - if not os.path.exists(install_dir): - self.mkpath(install_dir) - return self.copy_file(header, install_dir) - - def run(self): - hdrs = self.distribution.headers - if not hdrs: - return - - self.mkpath(self.install_dir) - for header in hdrs: - (out, _) = self.mkdir_and_copy_file(header) - self.outfiles.append(out) - - def get_inputs(self): - return self.distribution.headers or [] - - def get_outputs(self): - return self.outfiles - - -setup( - name=project_name, - version=__version__, - description=('TensorFlow IO'), - url='https://github.com/tensorflow/io', - download_url='https://github.com/tensorflow/io/tags', - author='Google Inc.', - author_email='opensource@google.com', - # Contained modules and scripts. - packages=find_packages(), - install_requires=REQUIRED_PACKAGES, - # Add in any packaged data. - include_package_data=True, - zip_safe=False, - distclass=BinaryDistribution, - cmdclass={}, - # PyPI package information. - classifiers=[ - 'Development Status :: 4 - Beta', - 'Intended Audience :: Developers', - 'Intended Audience :: Education', - 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: Apache Software License', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3 :: Only', - 'Topic :: Scientific/Engineering', - 'Topic :: Scientific/Engineering :: Mathematics', - 'Topic :: Scientific/Engineering :: Artificial Intelligence', - 'Topic :: Software Development', - 'Topic :: Software Development :: Libraries', - 'Topic :: Software Development :: Libraries :: Python Modules', - ], - license='Apache 2.0', - keywords='tensorflow io machine learning', -) -""" +here = os.path.abspath(os.path.dirname(__file__)) # read package and version from: # tensorflow_io/core/python/ops/version_ops.py -with open("tensorflow_io/core/python/ops/version_ops.py") as f: - entries = [e.strip() for e in f.readlines() if not e.startswith("#")] - assert sum(e.startswith("package = ") for e in entries) == 1 - assert sum(e.startswith("version = ") for e in entries) == 1 - package = list([ - e[10:] for e in entries if e.startswith("package = ")])[0].strip('"') - version = list([ - e[10:] for e in entries if e.startswith("version = ")])[0].strip('"') - assert package != "" - assert version != "" - -if '--package-version' in sys.argv: - print(package) - sys.exit(0) - -project = 'tensorflow-io' - -# Note: import setuptools later to avoid unnecessary dependency -from setuptools import sandbox # pylint: disable=wrong-import-position - -if '--nightly' in sys.argv: - nightly_idx = sys.argv.index('--nightly') - version = version + ".dev" + sys.argv[nightly_idx + 1] - project = 'tensorflow-io-nightly' - sys.argv.remove('--nightly') - sys.argv.pop(nightly_idx) - -print("setup.py - project = '{}'".format(project)) -print("setup.py - package = '{}'".format(package)) -print("setup.py - version = '{}'".format(version)) - -rootpath = tempfile.mkdtemp() -print("setup.py - create {} and copy tensorflow_io".format(rootpath)) -shutil.copytree("tensorflow_io", os.path.join(rootpath, "tensorflow_io")) - -print("setup.py - create {}/MANIFEST.in".format(rootpath)) -with open(os.path.join(rootpath, "MANIFEST.in"), "w") as f: - f.write("recursive-include tensorflow_io *.so") - -print("setup.py - create {}/setup.py with required = '{}', " - "project_name = '{}' and __version__ = {}".format( - rootpath, package, project, version)) -cmdclass = "{'install_headers':InstallHeaders,'install':InstallCommand,}" -with open(os.path.join(rootpath, "setup.py"), "w") as f: - f.write(content.format(package, version, project, cmdclass)) +with open(os.path.join(here, "tensorflow_io/core/python/ops/version_ops.py")) as f: + entries = [e.strip() for e in f.readlines() if not e.startswith("#")] + assert sum(e.startswith("package = ") for e in entries) == 1 + assert sum(e.startswith("version = ") for e in entries) == 1 + package = list([e[10:] for e in entries if e.startswith("package = ")])[0].strip( + '"' + ) + version = list([e[10:] for e in entries if e.startswith("version = ")])[0].strip( + '"' + ) + assert package != "" + assert version != "" + +if "--package-version" in sys.argv: + print(package) + sys.exit(0) + +project = "tensorflow-io" +if "--nightly" in sys.argv: + nightly_idx = sys.argv.index("--nightly") + version = version + ".dev" + sys.argv[nightly_idx + 1] + project = "tensorflow-io-nightly" + sys.argv.remove("--nightly") + sys.argv.pop(nightly_idx) datapath = None -if '--data' in sys.argv: - data_idx = sys.argv.index('--data') - datapath = sys.argv[data_idx + 1] - sys.argv.remove('--data') - sys.argv.pop(data_idx) +if "--data" in sys.argv: + data_idx = sys.argv.index("--data") + datapath = sys.argv[data_idx + 1] + sys.argv.remove("--data") + sys.argv.pop(data_idx) else: - datapath = os.environ.get('TFIO_DATAPATH') - -if datapath is not None: - for rootname, _, filenames in os.walk( - os.path.join(datapath, "tensorflow_io")): - if (not fnmatch.fnmatch(rootname, "*test*") and - not fnmatch.fnmatch(rootname, "*runfiles*")): - for filename in [ - f for f in filenames if fnmatch.fnmatch( - f, "*.so") or fnmatch.fnmatch(f, "*.py")]: - # NOTE: - # cc_grpc_library will generate a lib_cc_grpc.so - # proto_library will generate a lib_proto.so - # both .so files are not needed in final wheel. - # The cc_grpc_library only need to pass `linkstatic = True` - # to the underlying native.cc_library. However it is not - # exposed. proto_library is a native library in bazel which - # we could not patch easily as well. - # For that reason we skip lib_cc_grpc.so and lib_proto.so: - if filename.endswith("_cc_grpc.so") or filename.endswith("_proto.so"): - continue - src = os.path.join(rootname, filename) - dst = os.path.join( - rootpath, - os.path.relpath(os.path.join(rootname, filename), datapath)) - print("setup.py - copy {} to {}".format(src, dst)) - shutil.copyfile(src, dst) - -print("setup.py - run sandbox.run_setup {} {}".format( - os.path.join(rootpath, "setup.py"), sys.argv[1:])) -sandbox.run_setup(os.path.join(rootpath, "setup.py"), sys.argv[1:]) - -if not os.path.exists("dist"): - os.makedirs("dist") -for f in os.listdir(os.path.join(rootpath, "dist")): - print("setup.py - copy {} to {}".format( - os.path.join(rootpath, "dist", f), os.path.join("dist", f))) - shutil.copyfile(os.path.join(rootpath, "dist", f), os.path.join("dist", f)) -print("setup.py - remove {}".format(rootpath)) -shutil.rmtree(rootpath) -print("setup.py - complete") + datapath = os.environ.get("TFIO_DATAPATH") + +if (datapath is not None) and ("bdist_wheel" in sys.argv): + rootpath = tempfile.mkdtemp() + print("setup.py - create {} and copy tensorflow_io data files".format(rootpath)) + for rootname, _, filenames in os.walk(os.path.join(datapath, "tensorflow_io")): + if not fnmatch.fnmatch(rootname, "*test*") and not fnmatch.fnmatch( + rootname, "*runfiles*" + ): + for filename in [ + f + for f in filenames + if fnmatch.fnmatch(f, "*.so") or fnmatch.fnmatch(f, "*.py") + ]: + # NOTE: + # cc_grpc_library will generate a lib_cc_grpc.so + # proto_library will generate a lib_proto.so + # both .so files are not needed in final wheel. + # The cc_grpc_library only need to pass `linkstatic = True` + # to the underlying native.cc_library. However it is not + # exposed. proto_library is a native library in bazel which + # we could not patch easily as well. + # For that reason we skip lib_cc_grpc.so and lib_proto.so: + if filename.endswith("_cc_grpc.so") or filename.endswith("_proto.so"): + continue + src = os.path.join(rootname, filename) + dst = os.path.join( + rootpath, + os.path.relpath(os.path.join(rootname, filename), datapath), + ) + print("setup.py - copy {} to {}".format(src, dst)) + os.makedirs(os.path.dirname(dst), exist_ok=True) + shutil.copyfile(src, dst) + sys.argv.append("--bdist-dir") + sys.argv.append(rootpath) + +# Get the long description from the README file +with open(os.path.join(here, "README.md"), encoding="utf-8") as f: + long_description = f.read() + + +class BinaryDistribution(setuptools.dist.Distribution): + def has_ext_modules(self): + return True + + +setuptools.setup( + name=project, + version=version, + description="TensorFlow IO", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/tensorflow/io", + download_url="https://github.com/tensorflow/io/tags", + author="Google Inc.", + author_email="opensource@google.com", + classifiers=[ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Intended Audience :: Education", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3 :: Only", + "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Mathematics", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Software Development", + "Topic :: Software Development :: Libraries", + "Topic :: Software Development :: Libraries :: Python Modules", + ], + keywords="tensorflow io machine learning", + packages=setuptools.find_packages(where=".", exclude=["tests"]), + python_requires=">=3.5, <3.9", + install_requires=[package], + package_data={".": ["*.so"],}, + project_urls={ + "Source": "https://github.com/tensorflow/io", + "Bug Reports": "https://github.com/tensorflow/io/issues", + "Documentation": "https://tensorflow.org/io", + }, + zip_safe=False, + distclass=BinaryDistribution, +)