Skip to content

Commit

Permalink
Update
Browse files Browse the repository at this point in the history
[ghstack-poisoned]
  • Loading branch information
jainapurva committed Apr 29, 2024
2 parents ec6dad7 + 8f27ff2 commit bb9302f
Show file tree
Hide file tree
Showing 397 changed files with 7,167 additions and 40,154 deletions.
1 change: 1 addition & 0 deletions .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ per-file-ignores =
torch/ao/quantization/fx/_decomposed.py: TOR901
torch/distributed/_functional_collectives.py: TOR901
torch/distributed/_spmd/data_parallel.py: TOR901
torch/distributed/_tensor/_collective_utils.py: TOR901
optional-ascii-coding = True
exclude =
./.git,
Expand Down
13 changes: 13 additions & 0 deletions .github/label_to_label.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Use this to auto apply labels based on other labels. Applies to both PRs and
# issues. Currently only supports any and all
- any:
- "module: custom operators"
- "module: aotdispatch"
then:
- "module: pt2-dispatcher"
- any:
- "module: dynamo"
- "module: pt2-dispatcher"
- "module: inductor"
then:
- "oncall: pt2"
1 change: 1 addition & 0 deletions .github/pytorch-probot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,4 @@ retryable_workflows:
- linux-binary
- windows-binary
labeler_config: labeler.yml
label_to_label_config: label_to_label.yml
2 changes: 1 addition & 1 deletion .github/workflows/_win-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ jobs:
retry_wait_seconds: 30
command: |
set -eu
python3 -m pip install rockset==1.0.3
python3 -m pip install rockset==1.0.3 'xdoctest>=1.1.0'
- name: Start monitoring script
id: monitor-script
Expand Down
3 changes: 0 additions & 3 deletions .lintrunner.toml
Original file line number Diff line number Diff line change
Expand Up @@ -1051,8 +1051,6 @@ exclude_patterns = [
'test/quantization/fx/test_numeric_suite_fx.py',
'test/quantization/fx/test_quantize_fx.py',
'test/quantization/fx/test_subgraph_rewriter.py',
'test/test_custom_op_testing.py',
'test/test_dataloader.py',
'test/test_datapipe.py',
'test/test_decomp.py',
'test/test_deploy.py',
Expand All @@ -1065,7 +1063,6 @@ exclude_patterns = [
'test/test_function_schema.py',
'test/test_functional_autograd_benchmark.py',
'test/test_functional_optim.py',
'test/test_functionalization.py',
'test/test_functionalization_of_rng_ops.py',
'test/test_futures.py',
'test/test_fx.py',
Expand Down
52 changes: 1 addition & 51 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -446,30 +446,13 @@ cu_library(
# caffe2
CAFFE2_COPTS = COMMON_COPTS + [
"-Dcaffe2_EXPORTS",
"-DCAFFE2_USE_GLOO",
"-DCAFFE2_USE_CUDNN",
"-DCAFFE2_BUILD_MAIN_LIB",
"-fvisibility-inlines-hidden",
"-fno-math-errno",
"-fno-trapping-math",
]

filegroup(
name = "caffe2_contrib_srcs",
srcs = [
"caffe2/contrib/aten/aten_op.cc",
"caffe2/contrib/gloo/allgather_ops.cc",
"caffe2/contrib/gloo/allreduce_ops.cc",
"caffe2/contrib/gloo/barrier_ops.cc",
"caffe2/contrib/gloo/broadcast_ops.cc",
"caffe2/contrib/gloo/common.cc",
"caffe2/contrib/gloo/common_world_ops.cc",
"caffe2/contrib/gloo/context.cc",
"caffe2/contrib/gloo/reduce_scatter_ops.cc",
"caffe2/contrib/gloo/store_handler.cc",
],
)

filegroup(
name = "caffe2_core_srcs",
srcs = [
Expand Down Expand Up @@ -1024,10 +1007,6 @@ filegroup(
filegroup(
name = "caffe2_cuda_cpp_srcs",
srcs = [
"caffe2/contrib/aten/aten_op_gpu.cc",
"caffe2/contrib/gloo/allreduce_ops_gpu.cc",
"caffe2/contrib/gloo/broadcast_ops_gpu.cc",
"caffe2/contrib/gloo/common_world_ops_gpu.cc",
"caffe2/core/blob_serialization_gpu.cc",
"caffe2/core/common_cudnn.cc",
"caffe2/core/common_gpu.cc",
Expand Down Expand Up @@ -1271,35 +1250,10 @@ cc_library(
],
)

py_binary(
name = "gen_op",
srcs = ["caffe2/contrib/aten/gen_op.py"],
deps = ["//torchgen"],
)

genrule(
name = "generated_caffe2_aten_op_headers",
srcs = [
"caffe2/contrib/aten/aten_op_template.h",
"aten/src/ATen/Declarations.yaml",
],
outs = ["caffe2/caffe2/contrib/aten/gen_aten_op.h"],
cmd = """
$(location :gen_op) \
--output_prefix gen_ \
--install_dir $(@D) \
--aten_root `dirname $(location aten/src/ATen/Declarations.yaml)`/../.. \
--template_dir `dirname $(location caffe2/contrib/aten/aten_op_template.h)` \
--yaml_dir `dirname $(location aten/src/ATen/Declarations.yaml)`""",
tools = [":gen_op"],
)

cc_library(
name = "caffe2_headers",
hdrs = glob(
[
"caffe2/contrib/aten/*.h",
"caffe2/contrib/gloo/*.h",
"caffe2/core/*.h",
"caffe2/core/nomnigraph/include/nomnigraph/Converters/*.h",
"caffe2/core/nomnigraph/include/nomnigraph/Generated/*.h",
Expand Down Expand Up @@ -1338,10 +1292,9 @@ cc_library(
) + if_cuda(glob([
"caffe2/**/*.cuh",
"caffe2/image/*.h",
])) + [":generated_caffe2_aten_op_headers"],
])),
copts = CAFFE2_COPTS,
includes = [
"caffe2/contrib/aten",
"caffe2/core/nomnigraph/include",
],
visibility = ["//visibility:public"],
Expand Down Expand Up @@ -1385,7 +1338,6 @@ cc_library(
"caffe2/db/create_db_op.cc",
"caffe2/db/protodb.cc",
"caffe2/share/contrib/depthwise/depthwise3x3_conv_op.cc",
":caffe2_contrib_srcs",
":caffe2_core_srcs",
":caffe2_distributed_srcs",
":caffe2_ideep_srcs",
Expand Down Expand Up @@ -1419,7 +1371,6 @@ cc_library(
"@fbgemm//:fbgemm_src_headers",
"@fmt",
"@foxi",
"@gloo",
"@onnx",
] + if_cuda(
[
Expand Down Expand Up @@ -1467,7 +1418,6 @@ cu_library(
"@cuda//:curand",
"@cudnn",
"@eigen",
"@gloo",
"@tensorpipe//:tensorpipe_cuda",
],
alwayslink = True,
Expand Down
8 changes: 4 additions & 4 deletions aten/src/ATen/NestedTensorImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ inline std::vector<int64_t> construct_opt_sizes(const at::Tensor& sizes) {
std::vector<int64_t> result(1, sizes.sizes()[0]);
if (sizes.dim() > 0) {
size_t nested_dim = result.size();
int64_t* sizes_ptr = sizes.data_ptr<int64_t>();
const int64_t* sizes_ptr = sizes.const_data_ptr<int64_t>();
result.resize(nested_dim + sizes.sizes()[1]);
int64_t sizes_size_0 = sizes.sizes()[0];
int64_t sizes_size_1 = sizes.sizes()[1];
Expand Down Expand Up @@ -114,7 +114,7 @@ at::Tensor construct_nested_strides(const at::Tensor& sizes) {
return sizes;
}
at::Tensor strides = sizes.new_empty(sizes.sizes());
const int64_t* sizes_ptr = sizes.data_ptr<int64_t>();
const int64_t* sizes_ptr = sizes.const_data_ptr<int64_t>();
int64_t* strides_ptr = strides.data_ptr<int64_t>();
for (int64_t i = 0; i < sizes.size(0); i++) {
strides_ptr[orig_dim - 1] = 1;
Expand Down Expand Up @@ -152,7 +152,7 @@ at::Tensor construct_offsets(const at::Tensor& sizes) {
std::iota(offsets_ptr, offsets_ptr + ntensors, 0);
return offsets;
}
const int64_t* sizes_ptr = sizes.data_ptr<int64_t>();
const int64_t* sizes_ptr = sizes.const_data_ptr<int64_t>();
offsets_ptr[0] = 0;
for (const auto i : c10::irange(ntensors - 1)) {
const int64_t row_product = std::accumulate(sizes_ptr, sizes_ptr + orig_dim, 1, std::multiplies());
Expand Down Expand Up @@ -344,7 +344,7 @@ int64_t get_numel_from_nested_size_tensor(const at::Tensor& tensor) {
static_cast<uint64_t>(std::numeric_limits<int64_t>::max()),
static_cast<uint64_t>(std::numeric_limits<size_t>::max()));

const int64_t* sizes_ptr = tensor.data_ptr<int64_t>();
const int64_t* sizes_ptr = tensor.const_data_ptr<int64_t>();
const auto nt_dim = tensor.size(1);
uint64_t num_elements{0};

Expand Down
11 changes: 6 additions & 5 deletions aten/src/ATen/NestedTensorImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,8 @@ inline bool nested_tensor_impl_is_contiguous(const NestedTensorImpl* nt) {
}
const Tensor &sizemat = nt->get_nested_sizes(),
&stridemat = nt->get_nested_strides();
int64_t* offsets_ptr = nt->get_storage_offsets().data_ptr<int64_t>();
const int64_t* offsets_ptr =
nt->get_storage_offsets().const_data_ptr<int64_t>();
int64_t orig_dim = sizemat.size(1);
// nesting scalars
if (orig_dim == 0) {
Expand All @@ -243,8 +244,8 @@ inline bool nested_tensor_impl_is_contiguous(const NestedTensorImpl* nt) {
// nesting tensors
else {
// if any underlying tensor is non-contiguous
const int64_t *sizemat_ptr = sizemat.data_ptr<int64_t>(),
*stridemat_ptr = stridemat.data_ptr<int64_t>();
const int64_t *sizemat_ptr = sizemat.const_data_ptr<int64_t>(),
*stridemat_ptr = stridemat.const_data_ptr<int64_t>();
for (int64_t i = 0; i < ntensors; i++) {
if (stridemat_ptr[orig_dim - 1] != 1) {
return false;
Expand All @@ -263,8 +264,8 @@ inline bool nested_tensor_impl_is_contiguous(const NestedTensorImpl* nt) {
if (offsets_ptr[0] != 0) {
return false;
}
sizemat_ptr = sizemat.data_ptr<int64_t>();
stridemat_ptr = stridemat.data_ptr<int64_t>();
sizemat_ptr = sizemat.const_data_ptr<int64_t>();
stridemat_ptr = stridemat.const_data_ptr<int64_t>();
for (int64_t i = 1; i < ntensors; i++) {
if (offsets_ptr[i] !=
offsets_ptr[i - 1] + *sizemat_ptr * *stridemat_ptr) {
Expand Down
2 changes: 1 addition & 1 deletion aten/src/ATen/autocast_mode.h
Original file line number Diff line number Diff line change
Expand Up @@ -728,7 +728,7 @@ copy pasted in from VariableTypeEverything.cpp with appropriate substitutions.

// KERNEL_PRIVATEUSEONE/KERNEL_DIFFERENT_REDISPATCH_SIGNATURE_PRIVATEUSEONE
// registration (OP, POLICY) or (OP, OVERLOAD, POLICY) for AutocastPrivateUse1
#define KERNEL_PRIVATEUSEONE(OP, ...) \
#define KERNEL_PRIVATEUSEONE(...) \
KERNEL(c10::DeviceType::PrivateUse1, __VA_ARGS__)

#define KERNEL_DIFFERENT_REDISPATCH_SIGNATURE_PRIVATEUSEONE( \
Expand Down
8 changes: 4 additions & 4 deletions aten/src/ATen/core/Formatting.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ static std::tuple<double, int> __printFormat(std::ostream& stream, const Tensor&
return std::make_tuple(1., 0);
}
bool intMode = true;
auto self_p = self.data_ptr<double>();
auto self_p = self.const_data_ptr<double>();
for (const auto i : c10::irange(size)) {
auto z = self_p[i];
if(std::isfinite(z)) {
Expand Down Expand Up @@ -189,7 +189,7 @@ static void __printMatrix(std::ostream& stream, const Tensor& self, int64_t line
}
for (const auto l : c10::irange(self.size(0))) {
Tensor row = self.select(0,l);
double *row_ptr = row.data_ptr<double>();
const double *row_ptr = row.const_data_ptr<double>();
for (const auto c : c10::irange(firstColumn, lastColumn+1)) {
stream << std::setw(sz) << row_ptr[c]/scale;
if(c == lastColumn) {
Expand Down Expand Up @@ -279,15 +279,15 @@ std::ostream& print(std::ostream& stream, const Tensor & tensor_, int64_t linesi
tensor = tensor_.to(kCPU, kDouble).contiguous();
}
if(tensor.ndimension() == 0) {
stream << defaultfloat << tensor.data_ptr<double>()[0] << '\n';
stream << defaultfloat << tensor.const_data_ptr<double>()[0] << '\n';
stream << "[ " << tensor_.toString() << "{}";
} else if(tensor.ndimension() == 1) {
if (tensor.numel() > 0) {
auto [scale, sz] = __printFormat(stream, tensor);
if(scale != 1) {
printScale(stream, scale);
}
double* tensor_p = tensor.data_ptr<double>();
const double* tensor_p = tensor.const_data_ptr<double>();
for (const auto i : c10::irange(tensor.size(0))) {
stream << std::setw(sz) << tensor_p[i]/scale << '\n';
}
Expand Down
48 changes: 30 additions & 18 deletions aten/src/ATen/cpu/vec/vec256/vec256_convert.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,32 +126,44 @@ struct VecConvert<int32_t, 1, uint8_t, 1> {
}
};

template <typename dst_t, typename src_t>
struct VecConvert<
dst_t,
1,
src_t,
1,
typename std::enable_if_t<
(is_reduced_floating_point_v<dst_t> && is_8bit_integer_v<src_t>) ||
(is_reduced_floating_point_v<src_t> && is_8bit_integer_v<dst_t>),
void>> {
static inline VectorizedN<dst_t, 1> apply(const VectorizedN<src_t, 1>& src) {
VectorizedN<float, 1> tmp_fp32 = VecConvert<float, 1, src_t, 1>::apply(src);
return VecConvert<dst_t, 1, float, 1>::apply(tmp_fp32);
}
};

template <typename dst_t>
struct VecConvert<
dst_t,
1,
float,
1,
typename std::enable_if_t<
std::is_same_v<dst_t, unsigned char> || std::is_same_v<dst_t, signed char>,
void>> {
static inline VectorizedN<dst_t, 1> apply(
const VectorizedN<float, 1>& src) {
dst_t,
1,
float,
1,
typename std::enable_if_t<is_8bit_integer_v<dst_t>,
void>> {
static inline VectorizedN<dst_t, 1> apply(const VectorizedN<float, 1>& src) {
return convert_float_to_int8<dst_t>(src[0]);
}
};

template <typename src_t>
struct VecConvert<
float,
1,
src_t,
1,
typename std::enable_if_t<
std::is_same_v<src_t, unsigned char> || std::is_same_v<src_t, signed char>,
void>> {
static inline VectorizedN<float, 1> apply(
const VectorizedN<src_t, 1>& src) {
float,
1,
src_t,
1,
typename std::enable_if_t<is_8bit_integer_v<src_t>,
void>> {
static inline VectorizedN<float, 1> apply(const VectorizedN<src_t, 1>& src) {
return convert_int8_to_float<src_t>(src[0]);
}
};
Expand Down

0 comments on commit bb9302f

Please sign in to comment.