Skip to content

Commit

Permalink
Update
Browse files Browse the repository at this point in the history
[ghstack-poisoned]
  • Loading branch information
yf225 committed Jun 19, 2024
2 parents 5d70eec + 4414179 commit 0ba9f4e
Show file tree
Hide file tree
Showing 202 changed files with 4,515 additions and 1,650 deletions.
47 changes: 27 additions & 20 deletions .github/scripts/get_workflow_type.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
from argparse import ArgumentParser
from typing import Any
from typing import Any, Tuple

from github import Auth, Github
from github.Issue import Issue
Expand All @@ -9,6 +9,8 @@
WORKFLOW_LABEL_META = "" # use meta runners
WORKFLOW_LABEL_LF = "lf." # use runners from the linux foundation
LABEL_TYPE_KEY = "label_type"
MESSAGE_KEY = "message"
MESSAGE = "" # Debug message to return to the caller


def parse_args() -> Any:
Expand Down Expand Up @@ -48,45 +50,50 @@ def is_exception_branch(branch: str) -> bool:
return branch.split("/")[0] in {"main", "nightly", "release", "landchecks"}


def get_workflow_type(issue: Issue, username: str) -> str:
def get_workflow_type(issue: Issue, username: str) -> Tuple[str, str]:
try:
user_list = issue.get_comments()[0].body.split()

if user_list[0] == "!":
print("LF Workflows are disabled for everyone. Using meta runners.")
return WORKFLOW_LABEL_META
MESSAGE = "LF Workflows are disabled for everyone. Using meta runners."
return WORKFLOW_LABEL_META, MESSAGE
elif user_list[0] == "*":
print("LF Workflows are enabled for everyone. Using LF runners.")
return WORKFLOW_LABEL_LF
MESSAGE = "LF Workflows are enabled for everyone. Using LF runners."
return WORKFLOW_LABEL_LF, MESSAGE
elif username in user_list:
print(f"LF Workflows are enabled for {username}. Using LF runners.")
return WORKFLOW_LABEL_LF
MESSAGE = f"LF Workflows are enabled for {username}. Using LF runners."
return WORKFLOW_LABEL_LF, MESSAGE
else:
print(f"LF Workflows are disabled for {username}. Using meta runners.")
return WORKFLOW_LABEL_META
MESSAGE = f"LF Workflows are disabled for {username}. Using meta runners."
return WORKFLOW_LABEL_META, MESSAGE
except Exception as e:
print(
f"Failed to get determine workflow type. Falling back to meta runners. Exception: {e}"
)
return WORKFLOW_LABEL_META
MESSAGE = f"Failed to get determine workflow type. Falling back to meta runners. Exception: {e}"
return WORKFLOW_LABEL_META, MESSAGE


def main() -> None:
args = parse_args()

if is_exception_branch(args.github_branch):
print(f"Exception branch: '{args.github_branch}', using meta runners")
output = {LABEL_TYPE_KEY: WORKFLOW_LABEL_META}
output = {
LABEL_TYPE_KEY: WORKFLOW_LABEL_META,
MESSAGE_KEY: f"Exception branch: '{args.github_branch}', using meta runners",
}
else:
try:
gh = get_gh_client(args.github_token)
# The default issue we use - https://github.com/pytorch/test-infra/issues/5132
issue = get_issue(gh, args.github_repo, args.github_issue)

output = {LABEL_TYPE_KEY: get_workflow_type(issue, args.github_user)}
label_type, message = get_workflow_type(issue, args.github_user)
output = {
LABEL_TYPE_KEY: label_type,
MESSAGE_KEY: message,
}
except Exception as e:
print(f"Failed to get issue. Falling back to meta runners. Exception: {e}")
output = {LABEL_TYPE_KEY: WORKFLOW_LABEL_META}
output = {
LABEL_TYPE_KEY: WORKFLOW_LABEL_META,
MESSAGE_KEY: f"Failed to get issue. Falling back to meta runners. Exception: {e}",
}

json_output = json.dumps(output)
print(json_output)
Expand Down
64 changes: 1 addition & 63 deletions .lintrunner.toml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ include_patterns = [
'aten/src/ATen/native/cudnn/*.cpp',
'c10/**/*.h',
'c10/**/*.cpp',
'distributed/c10d/*SymmetricMemory.*',
'torch/csrc/**/*.h',
'torch/csrc/**/*.hpp',
'torch/csrc/**/*.cpp',
Expand Down Expand Up @@ -1389,69 +1390,6 @@ exclude_patterns = [
'torch/contrib/_tensorboard_vis.py',
"torch/cuda/_gpu_trace.py",
'torch/cuda/_memory_viz.py', # mypy: Value of type "object" is not indexable
'torch/distributed/__init__.py',
'torch/distributed/_composable_state.py',
'torch/distributed/_sharded_tensor/__init__.py',
'torch/distributed/_sharding_spec/__init__.py',
'torch/distributed/_tools/__init__.py',
'torch/distributed/_tools/memory_tracker.py',
'torch/distributed/argparse_util.py',
'torch/distributed/c10d_logger.py',
'torch/distributed/collective_utils.py',
'torch/distributed/constants.py',
'torch/distributed/distributed_c10d.py',
'torch/distributed/examples/memory_tracker_example.py',
'torch/distributed/launch.py',
'torch/distributed/launcher/__init__.py',
'torch/distributed/launcher/api.py',
'torch/distributed/logging_handlers.py',
'torch/distributed/nn/__init__.py',
'torch/distributed/nn/api/__init__.py',
'torch/distributed/nn/api/remote_module.py',
'torch/distributed/nn/functional.py',
'torch/distributed/nn/jit/__init__.py',
'torch/distributed/nn/jit/instantiator.py',
'torch/distributed/nn/jit/templates/__init__.py',
'torch/distributed/nn/jit/templates/remote_module_template.py',
'torch/distributed/optim/__init__.py',
'torch/distributed/optim/apply_optimizer_in_backward.py',
'torch/distributed/optim/functional_adadelta.py',
'torch/distributed/optim/functional_adagrad.py',
'torch/distributed/optim/functional_adam.py',
'torch/distributed/optim/functional_adamax.py',
'torch/distributed/optim/functional_adamw.py',
'torch/distributed/optim/functional_rmsprop.py',
'torch/distributed/optim/functional_rprop.py',
'torch/distributed/optim/functional_sgd.py',
'torch/distributed/optim/named_optimizer.py',
'torch/distributed/optim/optimizer.py',
'torch/distributed/optim/post_localSGD_optimizer.py',
'torch/distributed/optim/utils.py',
'torch/distributed/optim/zero_redundancy_optimizer.py',
'torch/distributed/remote_device.py',
'torch/distributed/rendezvous.py',
'torch/distributed/rpc/__init__.py',
'torch/distributed/rpc/_testing/__init__.py',
'torch/distributed/rpc/_testing/faulty_agent_backend_registry.py',
'torch/distributed/rpc/_utils.py',
'torch/distributed/rpc/api.py',
'torch/distributed/rpc/backend_registry.py',
'torch/distributed/rpc/constants.py',
'torch/distributed/rpc/functions.py',
'torch/distributed/rpc/internal.py',
'torch/distributed/rpc/options.py',
'torch/distributed/rpc/rref_proxy.py',
'torch/distributed/rpc/server_process_global_profiler.py',
'torch/distributed/run.py',
'torch/distributed/tensor/__init__.py',
'torch/distributed/tensor/parallel/__init__.py',
'torch/distributed/tensor/parallel/_utils.py',
'torch/distributed/tensor/parallel/_view_with_dim_change.py',
'torch/distributed/tensor/parallel/api.py',
'torch/distributed/tensor/parallel/fsdp.py',
'torch/distributed/tensor/parallel/input_reshard.py',
'torch/distributed/tensor/parallel/multihead_attention_tp.py',
'torch/distributed/tensor/parallel/style.py',
'torch/fft/__init__.py',
'torch/func/__init__.py',
'torch/futures/__init__.py',
Expand Down
1 change: 1 addition & 0 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -744,6 +744,7 @@ cc_library(
"torch/csrc/cuda/python_nccl.cpp",
"torch/csrc/cuda/nccl.cpp",
"torch/csrc/distributed/c10d/intra_node_comm.cu",
"torch/csrc/distributed/c10d/CUDASymmetricMemory.cu",
"torch/csrc/distributed/c10d/Utils.cu",
"torch/csrc/distributed/c10d/quantization/quantization_gpu.cu",
],
Expand Down
1 change: 1 addition & 0 deletions aten/src/ATen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,7 @@ endif()

if(USE_CUDA AND NOT USE_ROCM)
list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/cutlass/include)
list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/cutlass/tools/util/include)
if($ENV{ATEN_STATIC_CUDA})
list(APPEND ATen_CUDA_DEPENDENCY_LIBS
${CUDA_LIBRARIES}
Expand Down
9 changes: 2 additions & 7 deletions aten/src/ATen/FunctionalInverses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ Tensor FunctionalInverses::_nested_view_from_buffer_inverse(const Tensor& base,
return Tensor();
}

Tensor FunctionalInverses::_nested_view_from_jagged_inverse(const Tensor& base, const Tensor& mutated_view, InverseReturnMode inverse_return_mode, const Tensor& offsets, const Tensor& dummy, const std::optional<Tensor>& lengths, int64_t ragged_idx, const c10::optional<Tensor>& min_seqlen, const c10::optional<Tensor>& max_seqlen) {
Tensor FunctionalInverses::_nested_view_from_jagged_inverse(const Tensor& base, const Tensor& mutated_view, InverseReturnMode inverse_return_mode, const Tensor& offsets, const Tensor& dummy, const std::optional<Tensor>& lengths, int64_t ragged_idx) {
auto values = at::_nested_get_values(mutated_view);
if (inverse_return_mode != InverseReturnMode::NeverView) {
return values;
Expand All @@ -317,12 +317,7 @@ Tensor FunctionalInverses::_nested_get_values_inverse(const Tensor& base, const
auto lengths = at::_nested_get_lengths(base);
auto ragged_idx = at::_nested_get_ragged_idx(base);
auto dummy = at::_nested_get_jagged_dummy(base);
auto min_seqlen = at::_nested_get_min_seqlen(base);
auto max_seqlen = at::_nested_get_max_seqlen(base);
auto nt = at::_nested_view_from_jagged(
mutated_view, offsets, dummy, lengths, ragged_idx,
(min_seqlen.defined() ? c10::optional<Tensor>(min_seqlen) : c10::nullopt),
(max_seqlen.defined() ? c10::optional<Tensor>(max_seqlen) : c10::nullopt));
auto nt = at::_nested_view_from_jagged(mutated_view, offsets, dummy, lengths, ragged_idx);

if (inverse_return_mode != InverseReturnMode::NeverView) {
return nt;
Expand Down
8 changes: 7 additions & 1 deletion aten/src/ATen/cuda/Atomic.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,13 @@ static inline __device__ void gpuAtomicAddNoReturn(double *address, double val)

/* Special case fp32 atomic. */
#if defined(USE_ROCM)
static inline __device__ void gpuAtomicAddNoReturn(float *address, float val) { atomicAddNoRet(address, val); }
static inline __device__ void gpuAtomicAddNoReturn(float *address, float val) {
#if defined(__gfx908__)
atomicAddNoRet(address, val);
#else
(void)unsafeAtomicAdd(address, val);
#endif
}
#else
static inline __device__ void gpuAtomicAddNoReturn(float *address, float val) { gpuAtomicAdd(address, val); }
#endif
Expand Down
37 changes: 37 additions & 0 deletions aten/src/ATen/cuda/detail/LazyNVRTC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,43 @@ CUDA_STUB3(cuLinkComplete, CUlinkState, void **, size_t *);
CUDA_STUB3(cuFuncSetAttribute, CUfunction, CUfunction_attribute, int);
CUDA_STUB3(cuFuncGetAttribute, int*, CUfunction_attribute, CUfunction);

#if defined(CUDA_VERSION) && CUDA_VERSION >= 12000
CUresult CUDAAPI
cuTensorMapEncodeTiled(
CUtensorMap* tensorMap,
CUtensorMapDataType tensorDataType,
cuuint32_t tensorRank,
void* globalAddress,
const cuuint64_t* globalDim,
const cuuint64_t* globalStrides,
const cuuint32_t* boxDim,
const cuuint32_t* elementStrides,
CUtensorMapInterleave interleave,
CUtensorMapSwizzle swizzle,
CUtensorMapL2promotion l2Promotion,
CUtensorMapFloatOOBfill oobFill) {
auto fn = reinterpret_cast<decltype(&cuTensorMapEncodeTiled)>(
getCUDALibrary().sym(__func__));
if (!fn)
throw std::runtime_error("Can't get cuTensorMapEncodeTiled");
lazyNVRTC.cuTensorMapEncodeTiled = fn;
return fn(
tensorMap,
tensorDataType,
tensorRank,
globalAddress,
globalDim,
globalStrides,
boxDim,
elementStrides,
interleave,
swizzle,
l2Promotion,
oobFill);
}

#endif

// Irregularly shaped functions
CUresult CUDAAPI cuLaunchKernel(CUfunction f,
unsigned int gridDimX,
Expand Down
15 changes: 12 additions & 3 deletions aten/src/ATen/cuda/nvrtc_stub/ATenNVRTC.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,16 +59,25 @@ namespace at { namespace cuda {
_(cuLinkAddData) \
_(cuLinkComplete) \
_(cuFuncSetAttribute) \
_(cuFuncGetAttribute)
_(cuFuncGetAttribute) \

#if defined(CUDA_VERSION) && CUDA_VERSION >= 12000
#define AT_FORALL_NVRTC_EXTENDED(_) \
AT_FORALL_NVRTC_BASE(_) \
_(cuTensorMapEncodeTiled)
#else
#define AT_FORALL_NVRTC_EXTENDED(_) \
AT_FORALL_NVRTC_BASE(_)
#endif

#if defined(CUDA_VERSION) && CUDA_VERSION >= 11010
#define AT_FORALL_NVRTC(_) \
AT_FORALL_NVRTC_BASE(_) \
AT_FORALL_NVRTC_EXTENDED(_) \
_(nvrtcGetCUBINSize) \
_(nvrtcGetCUBIN)
#else
#define AT_FORALL_NVRTC(_) \
AT_FORALL_NVRTC_BASE(_)
AT_FORALL_NVRTC_EXTENDED(_)
#endif

#else
Expand Down
Loading

0 comments on commit 0ba9f4e

Please sign in to comment.