-
Notifications
You must be signed in to change notification settings - Fork 21.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Update base for Update on "[Gradient Compression] Allow BatchedPowerS…
…GD to run vanilla allreduce for the first K iterations" Similar to #50973, allow the batched version to run vanilla allreduce for the first K iterations. This may be useful if the batched version can be applied to some use cases where the accuracy requirement is not very strict. Original PR issue: Investigate Applying PowerSGD to Communication Hook for Gradient Compression #47202 Differential Revision: [D26077709](https://our.internmc.facebook.com/intern/diff/D26077709/) [ghstack-poisoned]
- Loading branch information
Showing
364 changed files
with
11,484 additions
and
4,356 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
#!/usr/bin/env python3 | ||
|
||
"""Generates a matrix to be utilized through github actions | ||
Will output a condensed version of the matrix if on a pull request that only | ||
includes the latest version of python we support built on three different | ||
architectures: | ||
* CPU | ||
* Latest CUDA | ||
* Latest ROCM | ||
""" | ||
|
||
import json | ||
import os | ||
import itertools | ||
|
||
CUDA_ARCHES = [ | ||
"10.1", | ||
"10.2", | ||
"11.0" | ||
] | ||
|
||
ROCM_ARCHES = [ | ||
"3.10", | ||
"4.0" | ||
] | ||
|
||
FULL_ARCHES = [ | ||
"cpu", | ||
*CUDA_ARCHES, | ||
*ROCM_ARCHES | ||
] | ||
|
||
CONTAINER_IMAGES = { | ||
**{ | ||
# TODO: Re-do manylinux CUDA image tagging scheme to be similar to | ||
# ROCM so we don't have to do this replacement | ||
gpu_arch: f"pytorch/manylinux-cuda{gpu_arch.replace('.', '')}" | ||
for gpu_arch in CUDA_ARCHES | ||
}, | ||
**{ | ||
gpu_arch: f"pytorch/manylinux-rocm:{gpu_arch}" | ||
for gpu_arch in ROCM_ARCHES | ||
}, | ||
"cpu": "pytorch/manylinux-cpu" | ||
} | ||
|
||
FULL_PYTHON_VERSIONS = [ | ||
"3.6", | ||
"3.7", | ||
"3.8", | ||
"3.9", | ||
] | ||
|
||
|
||
def is_pull_request(): | ||
return os.environ.get("GITHUB_HEAD_REF") | ||
|
||
def generate_matrix(): | ||
python_versions = FULL_PYTHON_VERSIONS | ||
arches = FULL_ARCHES | ||
if is_pull_request(): | ||
python_versions = [python_versions[-1]] | ||
arches = ["cpu", CUDA_ARCHES[-1], ROCM_ARCHES[-1]] | ||
matrix = [] | ||
for item in itertools.product(python_versions, arches): | ||
python_version, arch_version = item | ||
# Not my favorite code here | ||
gpu_arch_type = "cuda" | ||
if "rocm" in CONTAINER_IMAGES[arch_version]: | ||
gpu_arch_type = "rocm" | ||
elif "cpu" in CONTAINER_IMAGES[arch_version]: | ||
gpu_arch_type = "cpu" | ||
matrix.append({ | ||
"python_version": python_version, | ||
"gpu_arch_type": gpu_arch_type, | ||
"gpu_arch_version": arch_version, | ||
"container_image": CONTAINER_IMAGES[arch_version] | ||
}) | ||
return json.dumps({"include": matrix}) | ||
|
||
def main(): | ||
print(generate_matrix()) | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import argparse | ||
import os | ||
import subprocess | ||
import re | ||
|
||
from datetime import datetime | ||
from distutils.util import strtobool | ||
from pathlib import Path | ||
|
||
LEADING_V_PATTERN = re.compile("^v") | ||
TRAILING_RC_PATTERN = re.compile("-rc[0-9]*$") | ||
LEGACY_BASE_VERSION_SUFFIX_PATTERN = re.compile("a0$") | ||
|
||
class NoGitTagException(Exception): | ||
pass | ||
|
||
def get_pytorch_root(): | ||
return Path(subprocess.check_output( | ||
['git', 'rev-parse', '--show-toplevel'] | ||
).decode('ascii').strip()) | ||
|
||
def get_tag(): | ||
root = get_pytorch_root() | ||
# We're on a tag | ||
am_on_tag = ( | ||
subprocess.run( | ||
['git', 'describe', '--tags', '--exact'], | ||
cwd=root, | ||
stdout=subprocess.DEVNULL, | ||
stderr=subprocess.DEVNULL | ||
).returncode == 0 | ||
) | ||
tag = "" | ||
if am_on_tag: | ||
dirty_tag = subprocess.check_output( | ||
['git', 'describe'], | ||
cwd=root | ||
).decode('ascii').strip() | ||
# Strip leading v that we typically do when we tag branches | ||
# ie: v1.7.1 -> 1.7.1 | ||
tag = re.sub(LEADING_V_PATTERN, "", dirty_tag) | ||
# Strip trailing rc pattern | ||
# ie: 1.7.1-rc1 -> 1.7.1 | ||
tag = re.sub(TRAILING_RC_PATTERN, "", tag) | ||
return tag | ||
|
||
def get_base_version(): | ||
root = get_pytorch_root() | ||
dirty_version = open(root / 'version.txt', 'r').read().strip() | ||
# Strips trailing a0 from version.txt, not too sure why it's there in the | ||
# first place | ||
return re.sub(LEGACY_BASE_VERSION_SUFFIX_PATTERN, "", dirty_version) | ||
|
||
class PytorchVersion: | ||
def __init__(self, gpu_arch_type, gpu_arch_version, no_build_suffix): | ||
self.gpu_arch_type = gpu_arch_type | ||
self.gpu_arch_version = gpu_arch_version | ||
self.no_build_suffix = no_build_suffix | ||
|
||
def get_post_build_suffix(self): | ||
# CUDA 10.2 is the version to be uploaded to PyPI so it doesn't have a | ||
# version suffix | ||
if ((self.gpu_arch_type == "cuda" and self.gpu_arch_version == "10.2") | ||
or self.no_build_suffix): | ||
return "" | ||
if self.gpu_arch_type == "cuda": | ||
return f"+cu{self.gpu_arch_version.replace('.', '')}" | ||
return f"+{self.gpu_arch_type}{self.gpu_arch_version}" | ||
|
||
def get_release_version(self): | ||
if not get_tag(): | ||
raise NoGitTagException( | ||
"Not on a git tag, are you sure you want a release version?" | ||
) | ||
return f"{get_tag()}{self.get_post_build_suffix()}" | ||
|
||
def get_nightly_version(self): | ||
date_str = datetime.today().strftime('%Y%m%d') | ||
build_suffix = self.get_post_build_suffix() | ||
return f"{get_base_version()}.dev{date_str}{build_suffix}" | ||
|
||
def main(): | ||
parser = argparse.ArgumentParser( | ||
description="Generate pytorch version for binary builds" | ||
) | ||
parser.add_argument( | ||
"--no-build-suffix", | ||
type=strtobool, | ||
help="Whether or not to add a build suffix typically (+cpu)", | ||
default=os.environ.get("NO_BUILD_SUFFIX", False) | ||
) | ||
parser.add_argument( | ||
"--gpu-arch-type", | ||
type=str, | ||
help="GPU arch you are building for, typically (cpu, cuda, rocm)", | ||
default=os.environ.get("GPU_ARCH_TYPE", "cpu") | ||
) | ||
parser.add_argument( | ||
"--gpu-arch-version", | ||
type=str, | ||
help="GPU arch version, typically (10.2, 4.0), leave blank for CPU", | ||
default=os.environ.get("GPU_ARCH_VERSION", "") | ||
) | ||
args = parser.parse_args() | ||
version_obj = PytorchVersion( | ||
args.gpu_arch_type, | ||
args.gpu_arch_version, | ||
args.no_build_suffix | ||
) | ||
try: | ||
print(version_obj.get_release_version()) | ||
except NoGitTagException: | ||
print(version_obj.get_nightly_version()) | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
name: Build Linux Wheels | ||
|
||
on: | ||
# TODO: These are only runnable from workflow_dispatch, we need to eventually add | ||
# a cron | ||
# TODO: Add an on_release trigger to build on tags | ||
workflow_dispatch: | ||
|
||
jobs: | ||
generate-build-matrix: | ||
if: ${{ github.repository_owner == 'pytorch' }} | ||
runs-on: ubuntu-latest | ||
outputs: | ||
matrix: ${{ steps.set-matrix.outputs.matrix }} | ||
container: | ||
image: python:3.9 | ||
steps: | ||
- name: Clone pytorch/pytorch | ||
uses: actions/checkout@v2 | ||
- name: Generating build matrix | ||
id: set-matrix | ||
run: | | ||
# outputting for debugging purposes | ||
python .github/scripts/generate_binary_build_matrix.py | ||
MATRIX=$(python .github/scripts/generate_binary_build_matrix.py) | ||
echo "::set-output name=matrix::${MATRIX}" | ||
build-wheel: | ||
if: ${{ github.repository_owner == 'pytorch' }} | ||
needs: generate-build-matrix | ||
runs-on: linux.2xlarge | ||
strategy: | ||
matrix: | ||
${{ fromJson(needs.generate-build-matrix.outputs.matrix) }} | ||
container: | ||
image: ${{ matrix.container_image }} | ||
env: | ||
DESIRED_PYTHON: ${{ matrix.python_version }} | ||
# TODO: This is a legacy variable that we eventually want to get rid of in | ||
# favor of GPU_ARCH_VERSION | ||
DESIRED_CUDA: ${{ matrix.gpu_arch_version }} | ||
GPU_ARCH_VERSION: ${{ matrix.GPU_ARCH_VERSION }} | ||
GPU_ARCH_TYPE: ${{ matrix.gpu_arch_type }} | ||
PYTORCH_BUILD_NUMBER: 1 | ||
SKIP_ALL_TESTS: 1 | ||
steps: | ||
- name: Clone pytorch/pytorch | ||
uses: actions/checkout@v2 | ||
with: | ||
path: pytorch | ||
submodules: recursive | ||
- name: Clone pytorch/builder | ||
uses: actions/checkout@v2 | ||
with: | ||
repository: pytorch/builder | ||
path: builder | ||
- name: Generate version string | ||
working-directory: pytorch/ | ||
run: | | ||
version=$(.github/scripts/generate_pytorch_version.py) | ||
echo "Generated version: ${version}" | ||
echo "PYTORCH_BUILD_VERSION=${version}" >> $GITHUB_ENV | ||
# TODO: Remove this once we remove the need for the directories to be | ||
# in specific locations | ||
- name: Symlink repositories to root directory (for legacy scripts purposes) | ||
run: | | ||
ln -s $(pwd)/pytorch /pytorch | ||
ln -s $(pwd)/builder /builder | ||
# TODO: Bundle the correct build script in the base container image so | ||
# that we don't have to do this type of specification | ||
- name: Build PyTorch binary (CUDA specific) | ||
if: ${{ matrix.gpu_arch_type == 'cuda' }} | ||
run: | | ||
/builder/manywheel/build.sh | ||
- name: Build PyTorch binary (ROCM specific) | ||
if: ${{ matrix.gpu_arch_type == 'rocm' }} | ||
run: | | ||
/builder/manywheel/build_rocm.sh | ||
- name: Build PyTorch binary (CPU specific) | ||
if: ${{ matrix.gpu_arch_type == 'cpu' }} | ||
run: | | ||
/builder/manywheel/build_cpu.sh | ||
- uses: actions/upload-artifact@v2 | ||
with: | ||
name: pytorch-wheel-py${{ matrix.python_version }}-${{matrix.gpu_arch_type}}-${{ matrix.gpu_arch_version }} | ||
path: /remote/**/*.whl | ||
# TODO: Add a step here for uploading binaries |
Oops, something went wrong.