Skip to content

Commit

Permalink
make gensim CI great again (#3488)
Browse files Browse the repository at this point in the history
* pin numpy and scipy versions for running tests

I suspect the latest numpy and/or scipy wheels are the cause of the
recent slowdown, because the tests that ran on 2023/06/01 against the
versions that I'm pinning in this commit were much faster.

* pin cython as well

* add some more diagnostic steps to workflow

* adjust tests.yml

* git add test_wheel.py

* increase pytest verbosity

* fixup

* fixup

* fixup

* adjust cython version in pin

* output cython output as artifacts

* update tests.yml, show numpy libs for all platforms

* show Cython version during build_ext step

* add more diagnostics to setup.py and tests.yml

* force cythonization

* still more diagnostics

* increase test scope to entire suite

* clean up, add helpful comments

* update test.yml

* remove cython magic from setup.py, that gets handled by pyproject.toml

* fixup

* git mv test_wheel.py .github/workflows/

* reduce timeout to 15 minutes

* make that 20 minutes
  • Loading branch information
mpenkov committed Aug 23, 2023
1 parent de016e8 commit 73e8b6a
Show file tree
Hide file tree
Showing 4 changed files with 119 additions and 33 deletions.
76 changes: 76 additions & 0 deletions .github/workflows/test_wheel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/usr/bin/env python
"""Test a Gensim wheel stored on S3.
Downloads the wheel, installs it into a fresh working environment, and then runs gensim tests.
usage:
python test_wheel.py <url> $(which python3.10)
where the URL comes from http://gensim-wheels.s3-website-us-east-1.amazonaws.com/
"""

import argparse
import io
import os
import subprocess
import tempfile
import urllib.parse
import urllib.request
import shutil
import sys

curr_dir = os.path.dirname(os.path.abspath(__file__))


def run(*command, **kwargs):
print("-" * 70, file=sys.stderr)
print(" ".join(command), file=sys.stderr)
print("-" * 70, file=sys.stderr)
subprocess.check_call(command, **kwargs)


def main():
parser = argparse.ArgumentParser()
parser.add_argument("wheel_path", help="The location of the wheel. May be a URL or local path")
parser.add_argument("python", help="Which python binary to use to test the wheel")
parser.add_argument("--gensim-path", default=os.path.expanduser("~/git/gensim"), help="Where the gensim repo lives")
parser.add_argument("--keep", action="store_true", help="Do not delete the sandbox after testing")
parser.add_argument("--test", default="test", help="Specify which tests to run")
args = parser.parse_args()

_, python_version = subprocess.check_output([args.python, "--version"]).decode().strip().split(" ", 1)

try:
tmpdir = tempfile.mkdtemp(prefix=f"test_wheel-py{python_version}-")

tmp_test_path = os.path.join(tmpdir, "test")
shutil.copytree(os.path.join(args.gensim_path, "gensim/test"), tmp_test_path)

if args.wheel_path.startswith("http://") or args.wheel_path.startswith("https://"):
parsed = urllib.parse.urlparse(args.wheel_path)
filename = parsed.path.split('/')[-1]
wheel_path = os.path.join(tmpdir, filename)
urllib.request.urlretrieve(args.wheel_path, wheel_path)
else:
wheel_path = args.wheel_path

env_path = os.path.join(tmpdir, "env")
run("virtualenv", "-p", args.python, env_path)

python_exe = os.path.join(tmpdir, "env/bin/python")
run(python_exe, "-m", "pip", "install", wheel_path)
run(python_exe, "-m", "pip", "install", "mock", "pytest", "testfixtures")

pytest_exe = os.path.join(tmpdir, "env/bin/pytest")
run(pytest_exe, "-vvv", args.test, "--durations", "0", cwd=tmpdir)
finally:
if args.keep:
print(f"keeping {tmpdir}, remove it yourself when done")
else:
shutil.rmtree(tmpdir)



if __name__ == "__main__":
main()
41 changes: 30 additions & 11 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ jobs:
tests:
name: test ${{ matrix.os }} python ${{ matrix.python }}
timeout-minutes: 120
timeout-minutes: 20
runs-on: ${{ matrix.os }}
defaults:
run:
Expand All @@ -63,13 +63,13 @@ jobs:
fail-fast: false
matrix:
include:
- {python: 3.8, os: ubuntu-20.04}
- {python: 3.9, os: ubuntu-20.04}
- {python: '3.8', os: ubuntu-20.04}
- {python: '3.9', os: ubuntu-20.04}
- {python: '3.10', os: ubuntu-20.04}
- {python: '3.11', os: ubuntu-20.04}

- {python: 3.8, os: windows-2019}
- {python: 3.9, os: windows-2019}
- {python: '3.8', os: windows-2019}
- {python: '3.9', os: windows-2019}
- {python: '3.10', os: windows-2019}
- {python: '3.11', os: windows-2019}

Expand Down Expand Up @@ -117,11 +117,31 @@ jobs:
if: matrix.os == 'windows'
run: pip install -e .[test-win]

- name: Build
- run: pip freeze

- name: Show numpy configuration
run: python -c 'import numpy;numpy.show_config()'

- name: Show libraries packaged by numpy
run: |
python --version
pip --version
python setup.py build_ext --inplace
python -c 'import numpy;import os;print(os.listdir(os.path.join(os.path.dirname(numpy.__file__),"..","numpy.libs")))' || echo
python -c 'import numpy;import os;print(os.listdir(os.path.join(os.path.dirname(numpy.__file__),"/.libs")))' || echo
python -c 'import numpy;import os;print(os.listdir(os.path.join(os.path.dirname(numpy.__file__),"/.dylibs")))' || echo
#
# Nb. pip 23.2.1 and newer will quietly run build_ext using the Cython
# version specified in pyproject.toml, so we don't need to run build_ext
# ourselves.
#
# It's helpful to know what cython version was actually used to build the
# extensions: it'll be in the file headers.
#
#
- name: Examine build output
run: |
ls gensim/models/*.so gensim/models/*.dylib gensim/models/*.dll || true
head gensim/models/*.c gensim/models/*.cpp || true
- name: Output FAST_VERSION
run: python -c 'from gensim import models;print(models.FAST_VERSION)'
Expand All @@ -131,8 +151,7 @@ jobs:
#
- name: Run tests (without coverage)
if: matrix.coverage != true
# run: pytest -v gensim/test --durations 0
run: pytest -v gensim/test/test_word2vec.py gensim/test/test_keyedvectors.py --durations 0
run: pytest -v gensim/test --durations 0

- name: Run tests (with coverage)
if: matrix.coverage == true
Expand Down
22 changes: 13 additions & 9 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
[build-system]
requires = [
"Cython>=0.29.32",
# oldest supported Numpy for this platform is 1.17 but the oldest supported by Gensim
# is 1.18.5, remove the line when they increase oldest supported Numpy for this platform
"numpy==1.18.5; python_version=='3.8' and platform_machine not in 'arm64|aarch64'",
"oldest-supported-numpy; python_version>'3.8' or platform_machine in 'arm64|aarch64'",
"scipy",
"setuptools",
"wheel",
]
#
# If we build our extensions with Cython 3.0.0, then they will be an
# order of magnitude slower, so avoid it for now.
#
"Cython>=0.29.32,<3.0.0",
# oldest supported Numpy for this platform is 1.17 but the oldest supported by Gensim
# is 1.18.5, remove the line when they increase oldest supported Numpy for this platform
"numpy==1.18.5; python_version=='3.8' and platform_machine not in 'arm64|aarch64'",
"oldest-supported-numpy; python_version>'3.8' or platform_machine in 'arm64|aarch64'",
"scipy",
"setuptools",
"wheel",
]
13 changes: 0 additions & 13 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,6 @@ def run(self):
'pytest',
'pytest-cov',
'mock',
'cython',
'testfixtures',
]

Expand Down Expand Up @@ -331,25 +330,13 @@ def run(self):
]

NUMPY_STR = 'numpy >= 1.18.5'
#
# We pin the Cython version for reproducibility. We expect our extensions
# to build with any sane version of Cython, so we should update this pin
# periodically.
#
CYTHON_STR = 'Cython==0.29.32'

# Allow overriding the Cython version requirement
CYTHON_STR = os.environ.get('GENSIM_CYTHON_REQUIRES', CYTHON_STR)

install_requires = [
NUMPY_STR,
'scipy >= 1.7.0',
'smart_open >= 1.8.1',
]

if need_cython():
install_requires.append(CYTHON_STR)

setup(
name='gensim',
version='4.3.1.dev0',
Expand Down

0 comments on commit 73e8b6a

Please sign in to comment.