Skip to content

Commit

Permalink
Merge branch 'pytorch:main' into batching-rules-matrix_exp
Browse files Browse the repository at this point in the history
  • Loading branch information
Xiao215 committed Jan 19, 2024
2 parents 736249d + f316c35 commit 9444380
Show file tree
Hide file tree
Showing 44 changed files with 1,221 additions and 461 deletions.
2 changes: 0 additions & 2 deletions .ci/pytorch/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -274,8 +274,6 @@ test_dynamo_shard() {
--exclude-inductor-tests \
--exclude-jit-executor \
--exclude-distributed-tests \
--exclude \
test_jit \
--shard "$1" "$NUM_TEST_SHARDS" \
--verbose
assert_git_not_dirty
Expand Down
3 changes: 2 additions & 1 deletion .github/scripts/generate_ci_workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,8 @@ class OperatingSystem:
BinaryBuildWorkflow(
os=OperatingSystem.MACOS_ARM64,
package_type="conda",
cross_compile_arm64=True,
cross_compile_arm64=False,
macos_runner="macos-13-xlarge",
build_configs=generate_binary_build_matrix.generate_conda_matrix(
OperatingSystem.MACOS_ARM64
),
Expand Down
14 changes: 13 additions & 1 deletion .github/workflows/docker-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@ on:
branches:
- nightly
tags:
# We want to run this build on final release tag
# Final Release tags look like: v1.11.0
- v[0-9]+.[0-9]+.[0-9]+
# Release candidate tags look like: v1.11.0-rc1
- v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
- ciflow/nightly/*

concurrency:
Expand Down Expand Up @@ -101,6 +103,16 @@ jobs:
echo "${RUNNER_TEMP}/bin" >> "${GITHUB_PATH}"
# Generate PyTorch version to use
echo "PYTORCH_VERSION=$(python3 .github/scripts/generate_pytorch_version.py --no-build-suffix)" >> "${GITHUB_ENV}"
- name: Setup test specific variables
if: ${{ startsWith(github.event.ref, 'refs/tags/v') }}
run: |
if [[ ${{ github.event.ref }} =~ ^refs/tags/v[0-9]+\.[0-9]+\.[0-9]+-rc[0-9]+$ ]]; then
{
echo "DOCKER_IMAGE=pytorch-test";
echo "INSTALL_CHANNEL=pytorch-test";
echo "TRITON_VERSION=$(cut -f 1 .ci/docker/triton_version.txt)";
} >> "${GITHUB_ENV}"
fi
- name: Setup nightly specific variables
if: ${{ github.event.ref == 'refs/heads/nightly' || startsWith(github.event.ref, 'refs/tags/ciflow/nightly/') }}
run: |
Expand Down
12 changes: 5 additions & 7 deletions .github/workflows/generated-macos-arm64-binary-conda-nightly.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 13 additions & 13 deletions aten/src/ATen/native/Loss.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include <ATen/ops/kl_div_native.h>
#include <ATen/ops/l1_loss_native.h>
#include <ATen/ops/log.h>
#include <ATen/ops/log_sigmoid.h>
#include <ATen/ops/margin_ranking_loss_native.h>
#include <ATen/ops/mean.h>
#include <ATen/ops/min.h>
Expand Down Expand Up @@ -358,21 +359,20 @@ Tensor binary_cross_entropy_with_logits(const Tensor& input, const Tensor& targe
c10::MaybeOwned<Tensor> pos_weight_maybe_owned = at::borrow_from_optional_tensor(pos_weight_opt);
const Tensor& pos_weight = *pos_weight_maybe_owned;

Tensor loss;
auto max_val = (-input).clamp_min_(0);
if (pos_weight.defined()) {
// pos_weight need to be broadcasted, thus mul(target) is not inplace.
auto log_weight = (pos_weight - 1).mul(target).add_(1);
loss = (1 - target).mul_(input).add_(log_weight.mul_(((-max_val).exp_().add_((-input - max_val).exp_())).log_().add_(max_val)));
} else {
loss = (1 - target).mul_(input).add_(max_val).add_((-max_val).exp_().add_((-input -max_val).exp_()).log_());
}
Tensor loss;
if (pos_weight.defined()) {
// pos_weight need to be broadcasted, thus mul(target) is not inplace.
auto log_weight = (pos_weight - 1).mul(target).add_(1);
loss = (1 - target).mul_(input).sub_(log_weight.mul_(at::log_sigmoid(input)));
} else {
loss = (1 - target).mul_(input).sub_(at::log_sigmoid(input));
}

if (weight.defined()) {
loss.mul_(weight);
}
if (weight.defined()) {
loss.mul_(weight);
}

return apply_loss_reduction(loss, reduction);
return apply_loss_reduction(loss, reduction);
}

Tensor poisson_nll_loss(const Tensor& input, const Tensor& target, const bool log_input, const bool full, const double eps, const int64_t reduction)
Expand Down
14 changes: 14 additions & 0 deletions aten/src/ATen/native/cuda/linalg/BatchLinearAlgebraLibBlas.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,20 @@ static void apply_triangular_solve_batched(const Tensor& A, const Tensor& B, boo
}

void triangular_solve_batched_cublas(const Tensor& A, const Tensor& B, bool left, bool upper, TransposeType transpose, bool unitriangular) {
// Workaround the following a bug on CUDA < 12.1
// RuntimeError: CUDA error: CUBLAS_STATUS_EXECUTION_FAILED when calling `cublasStrsmBatched
// See https://github.com/pytorch/pytorch/issues/79191#issuecomment-1154222580
#if defined(CUSOLVER_VERSION) && CUSOLVER_VERSION < 12100
constexpr auto max_batch_size = 524280;
if (B.size(-1) > max_batch_size) {
auto n_chunks = (B.size(-1) + max_batch_size - 1) / max_batch_size; // ceildiv
auto splits = B.split(n_chunks, /*dim=*/-1);
for (const Tensor& b : splits) {
triangular_solve_batched_cublas(A, b, left, upper, transpose, unitriangular);
}
return;
}
#endif
AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES(A.scalar_type(), "triangular_solve_cuda", [&]{
apply_triangular_solve_batched<scalar_t>(A, B, left, upper, transpose, unitriangular);
});
Expand Down
6 changes: 4 additions & 2 deletions aten/src/ATen/native/quantized/cpu/qconv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1658,6 +1658,8 @@ static at::Tensor _quantized_convolution_onednn(
auto upper_bound_value =
unary_scalars[1].get().toOptional<at::Scalar>().value().to<float>();
op_attr = ideep::attr_t::fuse_clamp(lower_bound_value, upper_bound_value);
} else if (has_unary_post_op && unary_attr.value()=="hardswish") {
op_attr = ideep::attr_t::fuse_hardswish();
} else {
op_attr = ideep::attr_t();
}
Expand Down Expand Up @@ -1851,8 +1853,8 @@ class QConvoneDNN final {
} else {
// Conv2D post op check
TORCH_CHECK(
attr == "none" || attr == "relu" || attr == "hardtanh",
"none post_op or post_op relu/hardtanh is supported for quantized pointwise conv2d. Got unary_post_op: ",
attr == "none" || attr == "relu" || attr == "hardtanh" || attr == "hardswish",
"none post_op or post_op relu/hardtanh/hardswish is supported for quantized pointwise conv2d. Got unary_post_op: ",
attr,
".")
}
Expand Down

0 comments on commit 9444380

Please sign in to comment.