Skip to content

Commit

Permalink
Merge branch 'pytorch:main' into hpu_storage_serialization
Browse files Browse the repository at this point in the history
  • Loading branch information
ppiskorski committed Jun 6, 2023
2 parents b76f54a + 3a38acf commit ca82ec1
Show file tree
Hide file tree
Showing 15 changed files with 269 additions and 139 deletions.
14 changes: 14 additions & 0 deletions .ci/docker/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,20 @@ case "$image" in
CONDA_CMAKE=yes
TRITON=yes
;;
pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc7)
CUDA_VERSION=12.1.0
CUDNN_VERSION=8
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=7
PROTOBUF=yes
DB=yes
VISION=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
UCC_COMMIT=${_UCC_COMMIT}
CONDA_CMAKE=yes
TRITON=yes
;;
pytorch-linux-bionic-cuda11.8-cudnn8-py3-gcc9)
CUDA_VERSION=11.8.0
CUDNN_VERSION=8
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/docker-builds.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ jobs:
matrix:
include:
- docker-image-name: pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc9
- docker-image-name: pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc7
- docker-image-name: pytorch-linux-bionic-cuda11.8-cudnn8-py3-gcc9
- docker-image-name: pytorch-linux-bionic-cuda11.8-cudnn8-py3-gcc7
- docker-image-name: pytorch-linux-bionic-py3.8-clang9
Expand Down
67 changes: 39 additions & 28 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -181,27 +181,38 @@ jobs:
with:
build-environment: linux-bionic-cuda11.8-py3.10-gcc7
docker-image-name: pytorch-linux-bionic-cuda11.8-cudnn8-py3-gcc7
test-matrix: |
{ include: [
{ config: "distributed", shard: 1, num_shards: 3, runner: "linux.8xlarge.nvidia.gpu" },
{ config: "distributed", shard: 2, num_shards: 3, runner: "linux.8xlarge.nvidia.gpu" },
{ config: "distributed", shard: 3, num_shards: 3, runner: "linux.8xlarge.nvidia.gpu" },
]}
linux-bionic-cuda12_1-py3_10-gcc7-build:
name: linux-bionic-cuda12.1-py3.10-gcc7
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-bionic-cuda12.1-py3.10-gcc7
docker-image-name: pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc7
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 5, runner: "linux.4xlarge.nvidia.gpu" },
{ config: "default", shard: 2, num_shards: 5, runner: "linux.4xlarge.nvidia.gpu" },
{ config: "default", shard: 3, num_shards: 5, runner: "linux.4xlarge.nvidia.gpu" },
{ config: "default", shard: 4, num_shards: 5, runner: "linux.4xlarge.nvidia.gpu" },
{ config: "default", shard: 5, num_shards: 5, runner: "linux.4xlarge.nvidia.gpu" },
{ config: "distributed", shard: 1, num_shards: 3, runner: "linux.8xlarge.nvidia.gpu" },
{ config: "distributed", shard: 2, num_shards: 3, runner: "linux.8xlarge.nvidia.gpu" },
{ config: "distributed", shard: 3, num_shards: 3, runner: "linux.8xlarge.nvidia.gpu" },
{ config: "deploy", shard: 1, num_shards: 1, runner: "linux.4xlarge.nvidia.gpu" },
]}
linux-bionic-cuda11_8-py3_10-gcc7-test:
name: linux-bionic-cuda11.8-py3.10-gcc7
linux-bionic-cuda12_1-py3_10-gcc7-test:
name: linux-bionic-cuda12.1-py3.10-gcc7
uses: ./.github/workflows/_linux-test.yml
needs: linux-bionic-cuda11_8-py3_10-gcc7-build
needs: linux-bionic-cuda12_1-py3_10-gcc7-build
with:
build-environment: linux-bionic-cuda11.8-py3.10-gcc7
docker-image: ${{ needs.linux-bionic-cuda11_8-py3_10-gcc7-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-bionic-cuda11_8-py3_10-gcc7-build.outputs.test-matrix }}
timeout-minutes: 360
build-environment: linux-bionic-cuda12.1-py3.10-gcc7
docker-image: ${{ needs.linux-bionic-cuda12_1-py3_10-gcc7-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-bionic-cuda12_1-py3_10-gcc7-build.outputs.test-matrix }}

linux-focal-py3-clang7-mobile-build:
name: linux-focal-py3-clang7-mobile-build
Expand Down Expand Up @@ -272,25 +283,25 @@ jobs:
{ config: "default", shard: 3, num_shards: 3, runner: "windows.4xlarge.nonephemeral" },
]}
linux-bionic-cpu-py3_10-gcc7-bazel-test:
name: linux-bionic-cpu-py3.10-gcc7-bazel-test
linux-bionic-cpu-py3_10-gcc9-bazel-test:
name: linux-bionic-cpu-py3.10-gcc9-bazel-test
uses: ./.github/workflows/_bazel-build-test.yml
with:
build-environment: linux-bionic-cuda11.8-py3.10-gcc7-bazel-test
docker-image-name: pytorch-linux-bionic-cuda11.8-cudnn8-py3-gcc7
build-environment: linux-bionic-cuda12.1-py3.10-gcc9-bazel-test
docker-image-name: pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc9
cuda-version: cpu
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 1, runner: "linux.4xlarge" },
]}
linux-bionic-cuda11_8-py3_10-gcc7-bazel-test:
name: linux-bionic-cuda11.8-py3.10-gcc7-bazel-test
linux-bionic-cuda12_1-py3_10-gcc9-bazel-test:
name: linux-bionic-cuda12.1-py3.10-gcc9-bazel-test
uses: ./.github/workflows/_bazel-build-test.yml
with:
build-environment: linux-bionic-cuda11.8-py3.10-gcc7-bazel-test
docker-image-name: pytorch-linux-bionic-cuda11.8-cudnn8-py3-gcc7
cuda-version: "11.8"
build-environment: linux-bionic-cuda12.1-py3.10-gcc9-bazel-test
docker-image-name: pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc9
cuda-version: "12.1"
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 1, runner: "linux.4xlarge.nvidia.gpu" },
Expand Down Expand Up @@ -346,12 +357,12 @@ jobs:
{ config: "default", shard: 3, num_shards: 3, runner: "linux.rocm.gpu" },
]}
linux-bionic-cuda11_8-py3_10-gcc7-sm86-build:
name: linux-bionic-cuda11.8-py3.10-gcc7-sm86
linux-bionic-cuda12_1-py3_10-gcc9-sm86-build:
name: linux-bionic-cuda12.1-py3.10-gcc9-sm86
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-bionic-cuda11.8-py3.10-gcc7-sm86
docker-image-name: pytorch-linux-bionic-cuda11.8-cudnn8-py3-gcc7
build-environment: linux-bionic-cuda12.1-py3.10-gcc9-sm86
docker-image-name: pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc9
cuda-arch-list: 8.6
test-matrix: |
{ include: [
Expand All @@ -362,11 +373,11 @@ jobs:
{ config: "default", shard: 5, num_shards: 5, runner: "linux.g5.4xlarge.nvidia.gpu" },
]}
linux-bionic-cuda11_8-py3_10-gcc7-sm86-test:
name: linux-bionic-cuda11.8-py3.10-gcc7-sm86
linux-bionic-cuda12_1-py3_10-gcc9-sm86-test:
name: linux-bionic-cuda12.1-py3.10-gcc9-sm86
uses: ./.github/workflows/_linux-test.yml
needs: linux-bionic-cuda11_8-py3_10-gcc7-sm86-build
needs: linux-bionic-cuda12_1-py3_10-gcc9-sm86-build
with:
build-environment: linux-bionic-cuda11.8-py3.10-gcc7-sm86
docker-image: ${{ needs.linux-bionic-cuda11_8-py3_10-gcc7-sm86-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-bionic-cuda11_8-py3_10-gcc7-sm86-build.outputs.test-matrix }}
build-environment: linux-bionic-cuda12.1-py3.10-gcc9-sm86
docker-image: ${{ needs.linux-bionic-cuda12_1-py3_10-gcc9-sm86-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-bionic-cuda12_1-py3_10-gcc9-sm86-build.outputs.test-matrix }}
42 changes: 21 additions & 21 deletions .github/workflows/trunk.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,48 +29,48 @@ jobs:
{ config: "default", shard: 1, num_shards: 1 },
]}
linux-bionic-cuda11_8-py3_10-gcc7-build:
name: linux-bionic-cuda11.8-py3.10-gcc7
linux-bionic-cuda12_1-py3_10-gcc9-build:
name: linux-bionic-cuda12.1-py3.10-gcc9
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-bionic-cuda11.8-py3.10-gcc7
docker-image-name: pytorch-linux-bionic-cuda11.8-cudnn8-py3-gcc7
build-environment: linux-bionic-cuda12.1-py3.10-gcc9
docker-image-name: pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc9
test-matrix: |
{ include: [
{ config: "nogpu_AVX512", shard: 1, num_shards: 1, runner: "linux.2xlarge" },
{ config: "nogpu_NO_AVX2", shard: 1, num_shards: 1, runner: "linux.2xlarge" },
{ config: "jit_legacy", shard: 1, num_shards: 1, runner: "linux.4xlarge.nvidia.gpu" },
]}
linux-bionic-cuda11_8-py3_10-gcc7-test:
name: linux-bionic-cuda11.8-py3.10-gcc7
linux-bionic-cuda12_1-py3_10-gcc9-test:
name: linux-bionic-cuda12.1-py3.10-gcc9
uses: ./.github/workflows/_linux-test.yml
needs: linux-bionic-cuda11_8-py3_10-gcc7-build
needs: linux-bionic-cuda12_1-py3_10-gcc9-build
with:
build-environment: linux-bionic-cuda11.8-py3.10-gcc7
docker-image: ${{ needs.linux-bionic-cuda11_8-py3_10-gcc7-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-bionic-cuda11_8-py3_10-gcc7-build.outputs.test-matrix }}
build-environment: linux-bionic-cuda12.1-py3.10-gcc9
docker-image: ${{ needs.linux-bionic-cuda12_1-py3_10-gcc9-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-bionic-cuda12_1-py3_10-gcc9-build.outputs.test-matrix }}

libtorch-linux-bionic-cuda12_1-py3_7-gcc9-debug-build:
name: libtorch-linux-bionic-cuda12.1-py3.7-gcc9-debug
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: libtorch-linux-bionic-cuda12.1-py3.7-gcc9
docker-image-name: pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc9
build-environment: libtorch-linux-bionic-cuda11.8-py3.7-gcc9
docker-image-name: pytorch-linux-bionic-cuda11.8-cudnn8-py3-gcc9
build-generates-artifacts: false
runner: linux.4xlarge
runner: linux.2xlarge
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 1 },
]}
# no-ops builds test USE_PER_OPERATOR_HEADERS=0 where ATen/ops is not generated
linux-bionic-cuda11_8-py3_10-gcc7-no-ops-build:
name: linux-bionic-cuda11.8-py3.10-gcc7-no-ops
linux-bionic-cuda12_1-py3_10-gcc9-no-ops-build:
name: linux-bionic-cuda12.1-py3.10-gcc9-no-ops
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-bionic-cuda11.8-py3.10-gcc7-no-ops
docker-image-name: pytorch-linux-bionic-cuda11.8-cudnn8-py3-gcc7
build-environment: linux-bionic-cuda12.1-py3.10-gcc9-no-ops
docker-image-name: pytorch-linux-bionic-cuda12.1-cudnn8-py3-gcc9
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 1 },
Expand Down Expand Up @@ -157,12 +157,12 @@ jobs:
cuda-version: cpu
test-matrix: ${{ needs.win-vs2019-cpu-py3-build.outputs.test-matrix }}

win-vs2019-cuda11_7-py3-build:
name: win-vs2019-cuda11.7-py3
win-vs2019-cuda11_8-py3-build:
name: win-vs2019-cuda11.8-py3
uses: ./.github/workflows/_win-build.yml
with:
build-environment: win-vs2019-cuda11.7-py3
cuda-version: "11.7"
build-environment: win-vs2019-cuda11.8-py3
cuda-version: "11.8"
sync-tag: win-cuda-build
test-matrix: |
{ include: [
Expand Down
10 changes: 5 additions & 5 deletions aten/src/ATen/native/LinearAlgebra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3144,21 +3144,21 @@ Tensor linalg_tensorsolve(const Tensor& self, const Tensor& other, OptionalIntAr
}

// result_shape is self_.sizes[-(an-other.dim):]
std::vector<int64_t> result_shape = self_.sizes().slice(other.dim(), ndim - other.dim()).vec();
std::vector<c10::SymInt> result_shape = self_.sym_sizes().slice(other.dim(), ndim - other.dim()).vec();

int64_t result_product = c10::multiply_integers(result_shape.begin(), result_shape.end());
int64_t other_product = c10::multiply_integers(other.sizes().begin(), other.sizes().end());
c10::SymInt result_product = c10::multiply_integers(result_shape.begin(), result_shape.end());
c10::SymInt other_product = c10::multiply_integers(other.sym_sizes().begin(), other.sym_sizes().end());

// Check whether the self tensor can be reshaped to the 2D square matrix
TORCH_CHECK(result_product == other_product,
"Expected self to satisfy the requirement prod(self.shape[other.ndim:]) == prod(self.shape[:other.ndim]), but got ",
result_product, " != ", other_product);

self_ = self_.reshape({result_product, result_product});
self_ = self_.reshape_symint({result_product, result_product});

// normally `other` would be flattened by at::linalg_solve expects 2D input
Tensor result = at::linalg_solve(self_, other.flatten());
return result.reshape(result_shape);
return result.reshape_symint(result_shape);
}

Tensor& linalg_tensorsolve_out(const Tensor& self, const Tensor& other, OptionalIntArrayRef dims, Tensor& result) {
Expand Down
4 changes: 2 additions & 2 deletions aten/src/ATen/native/LinearAlgebraUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -540,8 +540,8 @@ static inline void checkNotComplexTolerance(const Tensor& tol, const c10::string
This rule is compatible with NumPy, see https://github.com/numpy/numpy/blob/v1.20.0/numpy/linalg/linalg.py#L384-L389
*/
static inline bool linalg_solve_is_vector_rhs(const Tensor& input, const Tensor& other) {
auto expected_batched_rhs_shape = IntArrayRef(input.sizes().data(), input.dim() - 1); // input.shape[:-1]
bool vector_case = other.dim() == 1 || (input.dim() - 1 == other.dim() && other.sizes().equals(expected_batched_rhs_shape));
auto expected_batched_rhs_shape = SymIntArrayRef(input.sym_sizes().data(), input.dim() - 1); // input.shape[:-1]
bool vector_case = other.dim() == 1 || (input.dim() - 1 == other.dim() && other.sym_sizes().equals(expected_batched_rhs_shape));
return vector_case;
}

Expand Down

0 comments on commit ca82ec1

Please sign in to comment.