From 172e61e07408f48547812644b092dbdfd3e621f2 Mon Sep 17 00:00:00 2001 From: Saugat Pachhai Date: Tue, 10 Mar 2020 09:32:06 +0545 Subject: [PATCH 1/5] py38: make wheels ci: set ARROW_LIBHDFS_DIR to load native library deps: pin pyarrow to 0.15.1 for py<=3.7 pin deps for python<3.8 pin deps for python==3.8 as well add windows Py38 on travis do not use speedcopy for Python3.8 and Windows --- .travis.yml | 19 ++++++++++++++++--- dvc/system.py | 3 ++- scripts/ci/before_install.sh | 4 ++-- scripts/ci/install_hadoop.sh | 7 +++++++ setup.py | 10 ++++++++-- 5 files changed, 35 insertions(+), 8 deletions(-) diff --git a/.travis.yml b/.travis.yml index 514d4bd212..b22280038f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -33,10 +33,20 @@ jobs: install: script: ./scripts/ci/check_patch.sh # test jobs + - name: "3.8 on Windows" + stage: test + os: windows # Windows 10.0.17134 N/A Build 17134 + language: shell # 'language: python' is an error on Travis CI Windows + env: + - PYTHON_VERSION=3.8.0 + - PATH=/c/Python38:/c/Python38/Scripts:$PATH - name: "3.7 on Windows" stage: test os: windows # Windows 10.0.17134 N/A Build 17134 language: shell # 'language: python' is an error on Travis CI Windows + env: + - PYTHON_VERSION=3.7.5 + - PATH=/c/Python37:/c/Python37/Scripts:$PATH - name: "3.5 on Linux" language: python python: 3.5 @@ -46,6 +56,9 @@ jobs: - name: "3.7 on Linux" language: python python: 3.7 + - name: "3.8 on Linux" + language: python + python: 3.8 - name: "3.7 on Mac" os: osx osx_image: xcode11.3 @@ -65,17 +78,17 @@ jobs: script: ./scripts/build_windows.cmd - name: "Linux pkgs" language: python - python: 3.7 + python: 3.8 before_install: install: script: ./scripts/build_posix.sh - name: "PyPI pkgs" language: python - python: 3.7 + python: 3.8 script: ./scripts/build_package.sh - name: Snapcraft snap language: python - python: 3.7 + python: 3.8 addons: snaps: - name: snapcraft diff --git a/dvc/system.py b/dvc/system.py index 483429cc08..8867edde9e 100644 --- a/dvc/system.py +++ b/dvc/system.py @@ -3,13 +3,14 @@ import os import platform import shutil +import sys from dvc.compat import fspath from dvc.exceptions import DvcException logger = logging.getLogger(__name__) -if platform.system() == "Windows": +if platform.system() == "Windows" and sys.version_info < (3, 8): try: import speedcopy diff --git a/scripts/ci/before_install.sh b/scripts/ci/before_install.sh index 451179c9bb..c7b39dcbfa 100644 --- a/scripts/ci/before_install.sh +++ b/scripts/ci/before_install.sh @@ -44,8 +44,8 @@ if [[ "$TRAVIS_BUILD_STAGE_NAME" == "Test" ]]; then fi if [[ "$TRAVIS_OS_NAME" == "windows" ]]; then - $scriptdir/retry.sh choco install python --version 3.7.5 - echo 'PATH="/c/Python37:/c/Python37/Scripts:$PATH"' >>env.sh + $scriptdir/retry.sh choco install python --version $PYTHON_VERSION + echo "PATH='$PATH'" >>env.sh elif [[ "$TRAVIS_OS_NAME" == "osx" ]]; then ln -s -f /usr/local/bin/python3 /usr/local/bin/python ln -s -f /usr/local/bin/pip3 /usr/local/bin/pip diff --git a/scripts/ci/install_hadoop.sh b/scripts/ci/install_hadoop.sh index 10a42eea5d..4799abe158 100755 --- a/scripts/ci/install_hadoop.sh +++ b/scripts/ci/install_hadoop.sh @@ -23,6 +23,13 @@ echo "export HADOOP_COMMON_HOME=/usr/local/hadoop" >>env.sh echo "export HADOOP_HDFS_HOME=/usr/local/hadoop" >>env.sh echo "export YARN_HOME=/usr/local/hadoop" >>env.sh echo "export HADOOP_COMMON_LIB_NATIVE_DIR=/usr/local/hadoop/lib/native" >>env.sh + +# regression on PyArrow==0.16.0: https://issues.apache.org/jira/browse/ARROW-7841 +# it tries to retrieve native library from $HADOOP_HOME directory, instead of +# `$HADOOP_HOME/lib/native`. +# force it to search for `libhdfs.so` inside `$HADOOP_HOME/lib/native` +echo "export ARROW_LIBHDFS_DIR=/usr/local/hadoop/lib/native" >> env.sh + echo "export JAVA_HOME=/usr/" >>env.sh echo "export PATH=\$PATH:/usr/local/hadoop/sbin:/usr/local/hadoop/bin:$JAVA_PATH/bin" >>env.sh diff --git a/setup.py b/setup.py index 5930136a7e..cb9e1d2b29 100644 --- a/setup.py +++ b/setup.py @@ -74,7 +74,7 @@ def run(self): "pywin32>=225; sys_platform == 'win32'", "networkx>=2.1,<2.4", "pydot>=1.2.4", - "speedcopy>=2.0.1; sys_platform == 'win32'", + "speedcopy>=2.0.1; python_version < '3.8' and sys_platform == 'win32'", "flatten_json>=0.1.6", "texttable>=0.5.2", "pygtrie==2.3.2", @@ -89,7 +89,12 @@ def run(self): azure = ["azure-storage-blob==2.1.0"] oss = ["oss2==2.6.1"] ssh = ["paramiko>=2.5.0"] -hdfs = ["pyarrow==0.15.1"] +hdfs = [ + # pyarrow-0.16.0 import fails on 3.5 and 3.7 (works on 3.6 though) + # due to: https://issues.apache.org/jira/browse/ARROW-7852 + "pyarrow==0.15.1; python_version < '3.8'", + "pyarrow==0.16.0; python_version == '3.8'", +] # gssapi should not be included in all_remotes, because it doesn't have wheels # for linux and mac, so it will fail to compile if user doesn't have all the # requirements, including kerberos itself. Once all the wheels are available, @@ -157,6 +162,7 @@ def run(self): "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", ], packages=find_packages(exclude=["tests"]), include_package_data=True, From 3374ecc7ff2b2bb61a66a501a75e90372605e6fc Mon Sep 17 00:00:00 2001 From: Saugat Pachhai Date: Mon, 16 Mar 2020 10:11:04 +0545 Subject: [PATCH 2/5] use realpath to resolve symbolic links --- tests/func/test_checkout.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/func/test_checkout.py b/tests/func/test_checkout.py index 1f49ca4d84..c8176f8875 100644 --- a/tests/func/test_checkout.py +++ b/tests/func/test_checkout.py @@ -436,10 +436,10 @@ def test(self): self.assertEqual(ret, 0) self.assertTrue(System.is_symlink(self.FOO)) - old_foo_link = os.readlink(self.FOO) + old_foo_link = os.path.realpath(self.FOO) self.assertTrue(System.is_symlink(self.DATA)) - old_data_link = os.readlink(self.DATA) + old_data_link = os.path.realpath(self.DATA) old_cache_dir = self.dvc.cache.local.cache_dir new_cache_dir = old_cache_dir + "_new" @@ -452,10 +452,10 @@ def test(self): self.assertEqual(ret, 0) self.assertTrue(System.is_symlink(self.FOO)) - new_foo_link = os.readlink(self.FOO) + new_foo_link = os.path.realpath(self.FOO) self.assertTrue(System.is_symlink(self.DATA)) - new_data_link = os.readlink(self.DATA) + new_data_link = os.path.realpath(self.DATA) self.assertEqual( relpath(old_foo_link, old_cache_dir), From 8255c47200fd6218f1cd970070aac81091ab4fdd Mon Sep 17 00:00:00 2001 From: Saugat Pachhai Date: Mon, 16 Mar 2020 11:10:34 +0545 Subject: [PATCH 3/5] do not build pkgs for Windows and Linux on Python3.8 --- .travis.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index b22280038f..a57f453e2e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -76,9 +76,12 @@ jobs: language: shell install: script: ./scripts/build_windows.cmd + env: + - PYTHON_VERSION=3.7.5 + - PATH=/c/Python37:/c/Python37/Scripts:$PATH - name: "Linux pkgs" language: python - python: 3.8 + python: 3.7 before_install: install: script: ./scripts/build_posix.sh From 1a8efcf1d6234f2a0e97e72c7c4b8d230fa25a6f Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Mon, 16 Mar 2020 19:09:25 +0000 Subject: [PATCH 4/5] reorder --- .travis.yml | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/.travis.yml b/.travis.yml index a57f453e2e..75d6ded841 100644 --- a/.travis.yml +++ b/.travis.yml @@ -33,13 +33,6 @@ jobs: install: script: ./scripts/ci/check_patch.sh # test jobs - - name: "3.8 on Windows" - stage: test - os: windows # Windows 10.0.17134 N/A Build 17134 - language: shell # 'language: python' is an error on Travis CI Windows - env: - - PYTHON_VERSION=3.8.0 - - PATH=/c/Python38:/c/Python38/Scripts:$PATH - name: "3.7 on Windows" stage: test os: windows # Windows 10.0.17134 N/A Build 17134 @@ -47,6 +40,12 @@ jobs: env: - PYTHON_VERSION=3.7.5 - PATH=/c/Python37:/c/Python37/Scripts:$PATH + - name: "3.8 on Windows" + os: windows # Windows 10.0.17134 N/A Build 17134 + language: shell # 'language: python' is an error on Travis CI Windows + env: + - PYTHON_VERSION=3.8.0 + - PATH=/c/Python38:/c/Python38/Scripts:$PATH - name: "3.5 on Linux" language: python python: 3.5 From 10078db03008c6b05f3566fd1400ca83dfda0558 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Mon, 16 Mar 2020 19:15:00 +0000 Subject: [PATCH 5/5] pyarrow: document regression fix --- scripts/ci/install_hadoop.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/ci/install_hadoop.sh b/scripts/ci/install_hadoop.sh index 4799abe158..48c35e0c77 100755 --- a/scripts/ci/install_hadoop.sh +++ b/scripts/ci/install_hadoop.sh @@ -24,10 +24,11 @@ echo "export HADOOP_HDFS_HOME=/usr/local/hadoop" >>env.sh echo "export YARN_HOME=/usr/local/hadoop" >>env.sh echo "export HADOOP_COMMON_LIB_NATIVE_DIR=/usr/local/hadoop/lib/native" >>env.sh -# regression on PyArrow==0.16.0: https://issues.apache.org/jira/browse/ARROW-7841 -# it tries to retrieve native library from $HADOOP_HOME directory, instead of +# PyArrow==0.16.0 regression https://issues.apache.org/jira/browse/ARROW-7841 +# retrieves native library from $HADOOP_HOME directory instead of # `$HADOOP_HOME/lib/native`. -# force it to search for `libhdfs.so` inside `$HADOOP_HOME/lib/native` +# Fix: force search for `libhdfs.so` inside `$HADOOP_HOME/lib/native`. +# Note: not needed for PyArrow==0.17.0. echo "export ARROW_LIBHDFS_DIR=/usr/local/hadoop/lib/native" >> env.sh echo "export JAVA_HOME=/usr/" >>env.sh