diff --git a/.gitignore b/.gitignore index d15765a2..2665fac0 100644 --- a/.gitignore +++ b/.gitignore @@ -139,3 +139,5 @@ dmypy.json .DS_Store /test_tiles_output *.TIF* + +.asv/ diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 00000000..35674f33 --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,95 @@ +Benchmarking +============ + +`xarray-spatial` uses ASV (https://asv.readthedocs.io) for benchmarking. + +Installing ASV +-------------- + +ASV creates virtualenvs to run benchmarks in. Before using it you need to + +``` +pip install asv virtualenv +``` +or the `conda` equivalent. + +Running benchmarks +------------------ + +ASV configuration information is stored in `benchmarks/asv.conf.json`. This includes a `matrix` section that lists the dependencies to install in the virtual environments in addition to those installed by default. You always need `pyct` as `setup.py` uses it. There are also some other optional dependencies that are commented out in the `matrix` section. + +If you want to benchmark `cupy`-backed `DataArray`s and have the hardware and drivers to support this then uncomment the `cupy-cuda101` line in `asv.conf.json` and change the `101` version number part of this to match the version of your CUDA setup. This can by determined by the last line of the output of `nvcc --version`. + +If you want to benchmark algorithms that use the ray-tracing code in `rtxpy`, then uncomment the `rtxpy` line in `asv.conf.json` as well as the `cupy` line. + +To run all benchmarks against the default `master` branch: +``` +cd benchmarks +asv run +``` + +The first time this is run it will create a machine file to store information about your machine. Then a virtual environment will be created and each benchmark will be run multiple times to obtain a statistically valid benchmark time. + +To list the benchmark timings stored for the `master` branch use: +``` +asv show master +``` + +ASV ships with its own simple webserver to interactively display the results in a webbrowser. To use this: +``` +asv publish +asv preview +``` +and then open a web browser at the URL specified. + +If you want to quickly run all benchmarks once only to check for errors, etc, use: +``` +asv dev +``` +instead of `asv run`. + + +Adding new benchmarks +--------------------- + +Add new benchmarks to existing or new classes in the `benchmarks/benchmarks` directory. Any class member function with a name that starts with `time` will be identified as a timing benchmark when `asv` is run. + +Data that is required to run benchmarks is usually created in the `setup()` member function. This ensures that the time taken to setup the data is not included in the benchmark time. The `setup()` function is called once for each invocation of each benchmark, the data are not cached. + +At the top of each benchmark class there are lists of parameter names and values. Each benchmark is repeated for each unique combination of these parameters. + +If you wish to benchmark `cupy` and/or `rtxpy` functionality, ensure that you test for the availability of the correct libraries and hardware first. This is illustrated in the `get_xr_dataarray()` function. + +If you only want to run a subset of benchmarks, use syntax like: +``` +asv run -b Slope +``` +where the text after the `-b` flag is used as a regex to match benchmark file, class and function names. + + +Benchmarking code changes +------------------------- + +You can compare the performance of code on different branches and in different commits. Usually if you want to determine how much faster a new algorithm is, the old code will be in the `master` branch and the new code will be in a new feature branch. Because ASV uses virtual environments and checks out the `xarray-spatial` source code into these virtual environments, your new code must be committed into the new feature branch. + +To benchmark the latest commits on `master` and your new feature branch, edit `asv.conf.json` to change the line +``` +"branches": ["master"], +``` +into +``` +"branches": ["master", "new_feature_branch"], +``` +or similar. + +Now when you `asv run` the benchmarks will be run against both branches in turn. + +Then use +``` +asv show +``` +to list the commits that have been benchmarked, and +``` +asv compare commit1 commit2 +``` +to give you a side-by-side comparison of the two commits. diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json new file mode 100644 index 00000000..ccf53f20 --- /dev/null +++ b/benchmarks/asv.conf.json @@ -0,0 +1,159 @@ +{ + // The version of the config file format. Do not change, unless + // you know what you are doing. + "version": 1, + + // The name of the project being benchmarked + "project": "xarray-spatial", + + // The project's homepage + "project_url": "https://github.com/makepath/xarray-spatial", + + // The URL or local path of the source code repository for the + // project being benchmarked + "repo": "..", + + // The Python project's subdirectory in your repo. If missing or + // the empty string, the project is assumed to be located at the root + // of the repository. + // "repo_subdir": "", + + // Customizable commands for building, installing, and + // uninstalling the project. See asv.conf.json documentation. + // + // "install_command": ["in-dir={env_dir} python -mpip install {wheel_file}"], + // "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"], + // "build_command": [ + // "python setup.py build", + // "PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}" + // ], + + // List of branches to benchmark. If not provided, defaults to "master" + // (for git) or "default" (for mercurial). + "branches": ["master"], + + // The DVCS being used. If not set, it will be automatically + // determined from "repo" by looking at the protocol in the URL + // (if remote), or by looking for special directories, such as + // ".git" (if local). + "dvcs": "git", + + // The tool to use to create environments. May be "conda", + // "virtualenv" or other value depending on the plugins in use. + // If missing or the empty string, the tool will be automatically + // determined by looking for tools on the PATH environment + // variable. + "environment_type": "virtualenv", + + // timeout in seconds for installing any dependencies in environment + // defaults to 10 min + //"install_timeout": 600, + + // the base URL to show a commit for the project. + "show_commit_url": "http://github.com/makepath/xarray-spatial/commit/", + + // The Pythons you'd like to test against. If not provided, defaults + // to the current version of Python used to run `asv`. + // "pythons": ["3.8"], + + // The list of conda channel names to be searched for benchmark + // dependency packages in the specified order + // "conda_channels": ["conda-forge", "defaults"], + + // The matrix of dependencies to test. Each key is the name of a + // package (in PyPI) and the values are version numbers. An empty + // list or empty string indicates to just test against the default + // (latest) version. null indicates that the package is to not be + // installed. If the package to be tested is only available from + // PyPi, and the 'environment_type' is conda, then you can preface + // the package name by 'pip+', and the package will be installed via + // pip (with all the conda available packages installed first, + // followed by the pip installed packages). + // + "matrix": { + "pyct": [], + //"cupy-cuda101": [], + //"rtxpy": [], + }, + + // Combinations of libraries/python versions can be excluded/included + // from the set to test. Each entry is a dictionary containing additional + // key-value pairs to include/exclude. + // + // An exclude entry excludes entries where all values match. The + // values are regexps that should match the whole string. + // + // An include entry adds an environment. Only the packages listed + // are installed. The 'python' key is required. The exclude rules + // do not apply to includes. + // + // In addition to package names, the following keys are available: + // + // - python + // Python version, as in the *pythons* variable above. + // - environment_type + // Environment type, as above. + // - sys_platform + // Platform, as in sys.platform. Possible values for the common + // cases: 'linux2', 'win32', 'cygwin', 'darwin'. + // + // "exclude": [ + // {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows + // {"environment_type": "conda", "six": null}, // don't run without six on conda + // ], + // + // "include": [ + // // additional env for python2.7 + // {"python": "2.7", "numpy": "1.8"}, + // // additional env if run on windows+conda + // {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""}, + // ], + + // The directory (relative to the current directory) that benchmarks are + // stored in. If not provided, defaults to "benchmarks" + "benchmark_dir": "benchmarks", + + // The directory (relative to the current directory) to cache the Python + // environments in. If not provided, defaults to "env" + "env_dir": ".asv/env", + + // The directory (relative to the current directory) that raw benchmark + // results are stored in. If not provided, defaults to "results". + "results_dir": ".asv/results", + + // The directory (relative to the current directory) that the html tree + // should be written to. If not provided, defaults to "html". + "html_dir": ".asv/html", + + // The number of characters to retain in the commit hashes. + // "hash_length": 8, + + // `asv` will cache results of the recent builds in each + // environment, making them faster to install next time. This is + // the number of builds to keep, per environment. + // "build_cache_size": 2, + + // The commits after which the regression search in `asv publish` + // should start looking for regressions. Dictionary whose keys are + // regexps matching to benchmark names, and values corresponding to + // the commit (exclusive) after which to start looking for + // regressions. The default is to start from the first commit + // with results. If the commit is `null`, regression detection is + // skipped for the matching benchmark. + // + // "regressions_first_commits": { + // "some_benchmark": "352cdf", // Consider regressions only after this commit + // "another_benchmark": null, // Skip regression detection altogether + // }, + + // The thresholds for relative change in results, after which `asv + // publish` starts reporting regressions. Dictionary of the same + // form as in ``regressions_first_commits``, with values + // indicating the thresholds. If multiple entries match, the + // maximum is taken. If no entry matches, the default is 5%. + // + // "regressions_thresholds": { + // "some_benchmark": 0.01, // Threshold of 1% + // "another_benchmark": 0.5, // Threshold of 50% + // }, +} diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/benchmarks/common.py b/benchmarks/benchmarks/common.py new file mode 100644 index 00000000..ac0b7389 --- /dev/null +++ b/benchmarks/benchmarks/common.py @@ -0,0 +1,49 @@ +import numpy as np +import xarray as xr +from xrspatial.gpu_rtx import has_rtx +from xrspatial.utils import has_cuda, has_cupy + + +def get_xr_dataarray(shape, type, different_each_call=False): + # Gaussian bump with noise. + # + # Valid types are "numpy", "cupy" and "rtxpy". Using "numpy" will return + # a numpy-backed xarray DataArray. Using either of the other two will + # return a cupy-backed DataArray but only if the required dependencies are + # available, otherwise a NotImplementedError will be raised so that the + # benchmark will not be run, + # + # Calling with different_each_call=True will ensure that each array + # returned by this function is different by randomly changing the last + # element. This is required for functions that create an rtxpy + # triangulation to avoid them reusing a cached triangulation leading to + # optimistically fast benchmark times. + ny, nx = shape + + x = np.linspace(-1000, 1000, nx) + y = np.linspace(-800, 800, ny) + x2, y2 = np.meshgrid(x, y) + z = 100.0*np.exp(-x2**2 / 5e5 - y2**2 / 2e5) + + rng = np.random.default_rng(71942) + z += rng.normal(0.0, 2.0, (ny, nx)) + + if different_each_call: + z[-1, -1] = np.random.default_rng().normal(0.0, 2.0) + + if type == "numpy": + pass + elif type == "cupy": + if not (has_cuda() and has_cupy()): + raise NotImplementedError() + import cupy + z = cupy.asarray(z) + elif type == "rtxpy": + if not has_rtx(): + raise NotImplementedError() + import cupy + z = cupy.asarray(z) + else: + raise RuntimeError(f"Unrecognised type {type}") + + return xr.DataArray(z, coords=dict(x=x, y=y), dims=["y", "x"]) diff --git a/benchmarks/benchmarks/slope.py b/benchmarks/benchmarks/slope.py new file mode 100644 index 00000000..6a1f5287 --- /dev/null +++ b/benchmarks/benchmarks/slope.py @@ -0,0 +1,14 @@ +from xrspatial import slope +from .common import get_xr_dataarray + + +class Slope: + params = ([100, 300, 1000, 3000, 10000], ["numpy", "cupy"]) + param_names = ("nx", "type") + + def setup(self, nx, type): + ny = nx // 2 + self.xr = get_xr_dataarray((ny, nx), type) + + def time_slope(self, nx, type): + slope(self.xr) diff --git a/benchmarks/benchmarks/viewshed.py b/benchmarks/benchmarks/viewshed.py new file mode 100644 index 00000000..8a8d328d --- /dev/null +++ b/benchmarks/benchmarks/viewshed.py @@ -0,0 +1,18 @@ +from xrspatial import viewshed +from .common import get_xr_dataarray + + +class Viewshed: + # Note there is no option available for cupy without rtxpy. + params = ([100, 300, 1000, 3000], ["numpy", "rtxpy"]) + param_names = ("nx", "type") + + def setup(self, nx, type): + ny = nx // 2 + self.xr = get_xr_dataarray( + (ny, nx), type, different_each_call=(type == "rtxpy")) + self.x = 100 + self.y = 50 + + def time_viewshed(self, nx, type): + viewshed(self.xr, x=self.x, y=self.y, observer_elev=1.0)