Skip to content

Commit

Permalink
Update on "[NCCL] Support NCCL Send/Recv"
Browse files Browse the repository at this point in the history
This diff adds support for Process Group point-to-point operations on NCCL backend based on ncclSend/ncclRecv. See #43995 for more context.


Differential Revision: [D23709848](https://our.internmc.facebook.com/intern/diff/D23709848/)

[ghstack-poisoned]
  • Loading branch information
mingzhe0908 committed Oct 5, 2020
2 parents 83db4bd + 3ab88c3 commit d98ef7e
Show file tree
Hide file tree
Showing 721 changed files with 31,283 additions and 13,318 deletions.
25 changes: 19 additions & 6 deletions .circleci/cimodel/data/pytorch_build_definitions.py
Expand Up @@ -6,7 +6,7 @@
import cimodel.lib.conf_tree as conf_tree
import cimodel.lib.miniutils as miniutils
from cimodel.data.pytorch_build_data import CONFIG_TREE_DATA, TopLevelNode
from cimodel.data.simple.util.branch_filters import gen_filter_dict
from cimodel.data.simple.util.branch_filters import gen_filter_dict, RC_PATTERN
from cimodel.data.simple.util.docker_constants import gen_docker_image


Expand Down Expand Up @@ -110,6 +110,8 @@ def gen_workflow_params(self, phase):
parameters["resource_class"] = resource_class
if phase == "build" and self.rocm_version is not None:
parameters["resource_class"] = "xlarge"
if hasattr(self, 'filters'):
parameters['filters'] = self.filters
return parameters

def gen_workflow_job(self, phase):
Expand Down Expand Up @@ -139,14 +141,16 @@ def gen_workflow_job(self, phase):

# TODO This is a hack to special case some configs just for the workflow list
class HiddenConf(object):
def __init__(self, name, parent_build=None):
def __init__(self, name, parent_build=None, filters=None):
self.name = name
self.parent_build = parent_build
self.filters = filters

def gen_workflow_job(self, phase):
return {
self.gen_build_name(phase): {
"requires": [self.parent_build.gen_build_name("build")]
"requires": [self.parent_build.gen_build_name("build")],
"filters": self.filters,
}
}

Expand All @@ -166,7 +170,8 @@ def gen_workflow_job(self, phase):
"branch": self.branch,
"requires": [self.parent_build],
"context": "org-member",
"filters": gen_filter_dict(branches_list=["nightly"])
"filters": gen_filter_dict(branches_list=["nightly"],
tags_list=RC_PATTERN)
}
}

Expand Down Expand Up @@ -205,7 +210,9 @@ def gen_docs_configs(xenial_parent_config):
configs.append(
HiddenConf(
"pytorch_python_doc_build",
parent_build=xenial_parent_config
parent_build=xenial_parent_config,
filters=gen_filter_dict(branches_list=r"/.*/",
tags_list=RC_PATTERN),
)
)
configs.append(
Expand All @@ -219,7 +226,9 @@ def gen_docs_configs(xenial_parent_config):
configs.append(
HiddenConf(
"pytorch_cpp_doc_build",
parent_build=xenial_parent_config
parent_build=xenial_parent_config,
filters=gen_filter_dict(branches_list=r"/.*/",
tags_list=RC_PATTERN),
)
)
configs.append(
Expand Down Expand Up @@ -348,6 +357,8 @@ def instantiate_configs():

# run docs builds on "pytorch-linux-xenial-py3.6-gcc5.4". Docs builds
# should run on a CPU-only build that runs on all PRs.
# XXX should this be updated to a more modern build? Projects are
# beginning to drop python3.6
if (
distro_name == "xenial"
and fc.find_prop("pyver") == "3.6"
Expand All @@ -358,6 +369,8 @@ def instantiate_configs():
and compiler_name == "gcc"
and fc.find_prop("compiler_version") == "5.4"
):
c.filters = gen_filter_dict(branches_list=r"/.*/",
tags_list=RC_PATTERN)
c.dependent_tests = gen_docs_configs(c)

if cuda_version == "10.2" and python_version == "3.6" and not is_libtorch:
Expand Down
29 changes: 17 additions & 12 deletions .circleci/cimodel/data/simple/docker_definitions.py
@@ -1,6 +1,7 @@
from collections import OrderedDict

from cimodel.lib.miniutils import quote
from cimodel.data.simple.util.branch_filters import gen_filter_dict, RC_PATTERN


# TODO: make this generated from a matrix rather than just a static list
Expand All @@ -24,7 +25,7 @@
"pytorch-linux-xenial-py3.8",
"pytorch-linux-xenial-py3.6-clang7",
"pytorch-linux-xenial-py3.6-gcc4.8",
"pytorch-linux-xenial-py3.6-gcc5.4",
"pytorch-linux-xenial-py3.6-gcc5.4", # this one is used in doc builds
"pytorch-linux-xenial-py3.6-gcc7.2",
"pytorch-linux-xenial-py3.6-gcc7",
"pytorch-linux-bionic-rocm3.7-py3.6",
Expand All @@ -34,16 +35,20 @@

def get_workflow_jobs():
"""Generates a list of docker image build definitions"""
return [
OrderedDict(
ret = []
for image_name in IMAGE_NAMES:
parameters = OrderedDict({
"name": quote(f"docker-{image_name}"),
"image_name": quote(image_name),
})
if image_name == "pytorch-linux-xenial-py3.6-gcc5.4":
# pushing documentation on tags requires CircleCI to also
# build all the dependencies on tags, including this docker image
parameters['filters'] = gen_filter_dict(branches_list=r"/.*/",
tags_list=RC_PATTERN)
ret.append(OrderedDict(
{
"docker_build_job": OrderedDict(
{
"name": quote(f"docker-{image_name}"),
"image_name": quote(image_name),
}
)
"docker_build_job": parameters
}
)
for image_name in IMAGE_NAMES
]
))
return ret
16 changes: 0 additions & 16 deletions .circleci/cimodel/data/simple/ge_config_tests.py
Expand Up @@ -63,12 +63,6 @@ def gen_tree(self):
None,
["ge_config_legacy", "test"],
["pytorch_linux_xenial_py3_6_gcc5_4_build"]),
GeConfigTestJob(
MultiPartVersion([3, 6], "py"),
MultiPartVersion([5, 4], "gcc"),
None,
["ge_config_profiling", "test"],
["pytorch_linux_xenial_py3_6_gcc5_4_build"]),
GeConfigTestJob(
MultiPartVersion([3, 6], "py"),
MultiPartVersion([5, 4], "gcc"),
Expand All @@ -86,16 +80,6 @@ def gen_tree(self):
# TODO Why does the build environment specify cuda10.1, while the
# job name is cuda10_2?
build_env_override="pytorch-linux-xenial-cuda10.1-cudnn7-ge_config_legacy-test"),
GeConfigTestJob(
None,
None,
CudaVersion(10, 2),
["cudnn7", "py3", "ge_config_profiling", "test"],
["pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_build"],
use_cuda_docker=True,
# TODO Why does the build environment specify cuda10.1, while the
# job name is cuda10_2?
build_env_override="pytorch-linux-xenial-cuda10.1-cudnn7-ge_config_profiling-test"),
]


Expand Down
6 changes: 3 additions & 3 deletions .circleci/cimodel/data/simple/ios_definitions.py
@@ -1,7 +1,7 @@
from cimodel.data.simple.util.versions import MultiPartVersion


IOS_VERSION = MultiPartVersion([11, 2, 1])
IOS_VERSION = MultiPartVersion([12, 0, 0])


class ArchVariant:
Expand Down Expand Up @@ -62,8 +62,8 @@ def gen_tree(self):

WORKFLOW_DATA = [
IOSJob(IOS_VERSION, ArchVariant("x86_64"), is_org_member_context=False),
# IOSJob(IOS_VERSION, ArchVariant("arm64")),
# IOSJob(IOS_VERSION, ArchVariant("arm64", True), extra_props={"op_list": "mobilenetv2.yaml"}),
IOSJob(IOS_VERSION, ArchVariant("arm64")),
IOSJob(IOS_VERSION, ArchVariant("arm64", True), extra_props={"op_list": "mobilenetv2.yaml"}),
]


Expand Down
2 changes: 1 addition & 1 deletion .circleci/cimodel/data/simple/nightly_ios.py
Expand Up @@ -60,7 +60,7 @@ def gen_tree(self):


WORKFLOW_DATA = BUILD_CONFIGS + [
# IOSNightlyJob("binary", is_upload=True),
IOSNightlyJob("binary", is_upload=True),
]


Expand Down

0 comments on commit d98ef7e

Please sign in to comment.