Update base for Update on "[BE][SparseAdam] cleaner way to verify no …

…sparse params" Context: #47724 fixed the problem that SparseAdam could not handle generators by using the `list(...)` construct. However, this meant that SparseAdam deviated from other optimizers in that it could _accept_ a raw Tensors/Parameter vs requiring a container of them. This is not really a big deal. So why this PR? I do think this PR is cleaner. It uses the fact that the Optimizer parent class already containerizes parameters into parameter groups, so we could reuse that here by calling `super().__init__` first and then filter the param_groups after. This change would also make SparseAdam consistent with the rest of our optimizers in that only containerized params are accepted, which technically is BC breaking but would be minor, I believe. [ghstack-poisoned]
pytorch · Nov 28, 2023 · 5622adb · 5622adb
2 parents 97d2b43 + e6a8052
commit 5622adb
Show file tree

Hide file tree

Showing 370 changed files with 15,007 additions and 4,324 deletions.
diff --git a/.ci/docker/ci_commit_pins/executorch.txt b/.ci/docker/ci_commit_pins/executorch.txt
@@ -1 +1 @@
-9682172576d5d9a10f3162ad91e0a32b384a3b7c
+5159de436ced71c78bc1c22e3c1d93654c429227
diff --git a/.ci/docker/ci_commit_pins/triton-rocm.txt b/.ci/docker/ci_commit_pins/triton-rocm.txt
@@ -1 +1 @@
-e8a35b3968780e48df1374482d56cc6cdbb9e351
+dafe1459823b9549417ed95e9720f1b594fab329
diff --git a/.ci/docker/requirements-ci.txt b/.ci/docker/requirements-ci.txt
@@ -75,10 +75,10 @@ librosa>=0.6.2 ; python_version < "3.11"
 #Pinned versions:
 #test that import:
 
-mypy==1.6.0
+mypy==1.7.0
 # Pin MyPy version because new errors are likely to appear with each release
 #Description: linter
-#Pinned versions: 1.6.0
+#Pinned versions: 1.7.0
 #test that import: test_typing.py, test_type_hints.py
 
 networkx==2.8.8
@@ -292,3 +292,9 @@ tensorboard==2.13.0
 #Description: Also included in .ci/docker/requirements-docs.txt
 #Pinned versions:
 #test that import: test_tensorboard
+
+pywavelets==1.4.1
+#Description: This is a requirement of scikit-image, we need to pin
+# it here because 1.5.0 conflicts with numpy 1.21.2 used in CI
+#Pinned versions: 1.4.1
+#test that import:
diff --git a/.ci/pytorch/test.sh b/.ci/pytorch/test.sh
@@ -332,7 +332,7 @@ if [[ "${TEST_CONFIG}" == *dynamic* ]]; then
   DYNAMO_BENCHMARK_FLAGS+=(--dynamic-shapes --dynamic-batch-only)
 fi
 
-if [[ "${TEST_CONFIG}" == *cpu_accuracy* ]]; then
+if [[ "${TEST_CONFIG}" == *cpu_inductor* ]]; then
   DYNAMO_BENCHMARK_FLAGS+=(--device cpu)
 else
   DYNAMO_BENCHMARK_FLAGS+=(--device cuda)
@@ -451,19 +451,12 @@ test_single_dynamo_benchmark() {
       "${DYNAMO_BENCHMARK_FLAGS[@]}" \
       "$@" "${partition_flags[@]}" \
       --output "$TEST_REPORTS_DIR/${name}_${suite}.csv"
-
-    if [[ "${TEST_CONFIG}" == *inductor* ]] && [[ "${TEST_CONFIG}" != *cpu_accuracy* ]]; then
-      # other jobs (e.g. periodic, cpu-accuracy) may have different set of expected models.
-      python benchmarks/dynamo/check_accuracy.py \
-        --actual "$TEST_REPORTS_DIR/${name}_$suite.csv" \
-        --expected "benchmarks/dynamo/ci_expected_accuracy/${TEST_CONFIG}_${name}.csv"
-      python benchmarks/dynamo/check_graph_breaks.py \
-        --actual "$TEST_REPORTS_DIR/${name}_$suite.csv" \
-        --expected "benchmarks/dynamo/ci_expected_accuracy/${TEST_CONFIG}_${name}.csv"
-    else
-      python benchmarks/dynamo/check_csv.py \
-        -f "$TEST_REPORTS_DIR/${name}_${suite}.csv"
-    fi
+    python benchmarks/dynamo/check_accuracy.py \
+      --actual "$TEST_REPORTS_DIR/${name}_$suite.csv" \
+      --expected "benchmarks/dynamo/ci_expected_accuracy/${TEST_CONFIG}_${name}.csv"
+    python benchmarks/dynamo/check_graph_breaks.py \
+      --actual "$TEST_REPORTS_DIR/${name}_$suite.csv" \
+      --expected "benchmarks/dynamo/ci_expected_accuracy/${TEST_CONFIG}_${name}.csv"
   fi
 }
 
@@ -481,7 +474,7 @@ test_dynamo_benchmark() {
   elif [[ "${TEST_CONFIG}" == *perf* ]]; then
     test_single_dynamo_benchmark "dashboard" "$suite" "$shard_id" "$@"
   else
-    if [[ "${TEST_CONFIG}" == *cpu_accuracy* ]]; then
+    if [[ "${TEST_CONFIG}" == *cpu_inductor* ]]; then
       test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --inference --float32 "$@"
     elif [[ "${TEST_CONFIG}" == *aot_inductor* ]]; then
       test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --inference --bfloat16 "$@"
@@ -1063,7 +1056,7 @@ elif [[ "${TEST_CONFIG}" == *timm* ]]; then
   id=$((SHARD_NUMBER-1))
   test_dynamo_benchmark timm_models "$id"
 elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
-  if [[ "${TEST_CONFIG}" == *cpu_accuracy* ]]; then
+  if [[ "${TEST_CONFIG}" == *cpu_inductor* ]]; then
     install_torchaudio cpu
   else
     install_torchaudio cuda
@@ -1080,7 +1073,7 @@ elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
     checkout_install_torchbench
     # Do this after checkout_install_torchbench to ensure we clobber any
     # nightlies that torchbench may pull in
-    if [[ "${TEST_CONFIG}" != *cpu_accuracy* ]]; then
+    if [[ "${TEST_CONFIG}" != *cpu_inductor* ]]; then
       install_torchrec_and_fbgemm
     fi
     PYTHONPATH=$(pwd)/torchbench test_dynamo_benchmark torchbench "$id"

diff --git a/.circleci/scripts/binary_populate_env.sh b/.circleci/scripts/binary_populate_env.sh
@@ -77,15 +77,8 @@ else
   export PYTORCH_BUILD_VERSION="${BASE_BUILD_VERSION}+$DESIRED_CUDA"
 fi
 
-# The build with with-pypi-cudnn suffix is only applicabe to
-# pypi small wheel Linux x86 build
-if [[ -n "${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:-}" ]] && [[ "$(uname)" == 'Linux' && "$(uname -m)" == "x86_64" ]]; then
-  export PYTORCH_BUILD_VERSION="${PYTORCH_BUILD_VERSION}-with-pypi-cudnn"
-fi
-
 export PYTORCH_BUILD_NUMBER=1
 
-
 JAVA_HOME=
 BUILD_JNI=OFF
 if [[ "$PACKAGE_TYPE" == libtorch ]]; then

diff --git a/.circleci/scripts/binary_upload.sh b/.circleci/scripts/binary_upload.sh
@@ -16,11 +16,6 @@ UPLOAD_BUCKET="s3://pytorch"
 BACKUP_BUCKET="s3://pytorch-backup"
 BUILD_NAME=${BUILD_NAME:-}
 
-# this is temporary change to upload pypi-cudnn builds to separate folder
-if [[ ${BUILD_NAME} == *with-pypi-cudnn* ]]; then
-  UPLOAD_SUBFOLDER="${UPLOAD_SUBFOLDER}_pypi_cudnn"
-fi
-
 DRY_RUN=${DRY_RUN:-enabled}
 # Don't actually do work unless explicit
 ANACONDA="true anaconda"

diff --git a/.flake8 b/.flake8
@@ -18,14 +18,17 @@ ignore =
     # these ignores are from flake8-comprehensions; please fix!
     C407,
     # these ignores are from flake8-logging-format; please fix!
-    G100,G101,G200,G201,G202
+    G100,G101,G200
     # these ignores are from flake8-simplify. please fix or ignore with commented reason
     SIM105,SIM108,SIM110,SIM111,SIM113,SIM114,SIM115,SIM116,SIM117,SIM118,SIM119,SIM12,
     # flake8-simplify code styles
     SIM102,SIM103,SIM106,SIM112,
     # TorchFix codes that don't make sense for PyTorch itself:
     # removed and deprecated PyTorch functions.
     TOR001,TOR101,
+    # TODO(kit1980): fix all TOR102 issues
+    # `torch.load` without `weights_only` parameter is unsafe
+    TOR102,
 per-file-ignores =
     __init__.py: F401
     torch/utils/cpp_extension.py: B950

diff --git a/.github/ci_commit_pins/audio.txt b/.github/ci_commit_pins/audio.txt
@@ -1 +1 @@
-a8f4e97bd5356a7a77510cdf6a3a62e25a5dc602
+db624844f5c95bb7618fe5a5f532bf9b68efeb45
diff --git a/.github/ci_commit_pins/vision.txt b/.github/ci_commit_pins/vision.txt
@@ -1 +1 @@
-4433680aa57439ed684f9854fac3443b76e03c03
+893b4abdc0c9df36c241c58769810f69e35dab48
diff --git a/.github/labeler.yml b/.github/labeler.yml
@@ -69,8 +69,8 @@
 - .ci/docker/ci_commit_pins/triton.txt
 
 "module: distributed":
-- /torch/csrc/distributed/**
-- /torch/distributed/**
-- /torch/nn/parallel/**
-- /test/distributed/**
-- /torch/testing/_internal/distributed/**
+- torch/csrc/distributed/**
+- torch/distributed/**
+- torch/nn/parallel/**
+- test/distributed/**
+- torch/testing/_internal/distributed/**
diff --git a/.github/merge_rules.yaml b/.github/merge_rules.yaml
@@ -74,6 +74,7 @@
 
 - name: OSS CI / pytorchbot
   patterns:
+  - .github/ci_commit_pins/audio.txt
   - .github/ci_commit_pins/vision.txt
   - .github/ci_commit_pins/torchdynamo.txt
   - .ci/docker/ci_commit_pins/triton.txt
@@ -84,6 +85,19 @@
   - EasyCLA
   - Lint
   - pull
+  - inductor
+
+- name: OSS CI /pytorchbot / Executorch
+  patterns:
+  - .ci/docker/ci_commit_pins/executorch.txt
+  approved_by:
+  - pytorchbot
+  ignore_flaky_failures: false
+  mandatory_checks_name:
+  - EasyCLA
+  - Lint
+  - pull / linux-jammy-py3-clang12-executorch / build
+  - pull / linux-jammy-py3-clang12-executorch / test (executorch, 1, 1, linux.2xlarge)
 
 - name: OSS CI / pytorchbot / XLA
   patterns:

diff --git a/.github/scripts/generate_binary_build_matrix.py b/.github/scripts/generate_binary_build_matrix.py
@@ -10,9 +10,9 @@
     * Latest ROCM
 """
 
+import os
 from typing import Dict, List, Optional, Tuple
 
-
 CUDA_ARCHES = ["11.8", "12.1"]
 
 
@@ -95,7 +95,7 @@ def arch_type(arch_version: str) -> str:
 
 
 # This can be updated to the release version when cutting release branch, i.e. 2.1
-DEFAULT_TAG = "main"
+DEFAULT_TAG = os.getenv("RELEASE_VERSION_TAG", "main")
 
 WHEEL_CONTAINER_IMAGES = {
     **{
@@ -264,7 +264,6 @@ def generate_wheels_matrix(
     os: str,
     arches: Optional[List[str]] = None,
     python_versions: Optional[List[str]] = None,
-    gen_special_an_non_special_wheel: bool = True,
 ) -> List[Dict[str, str]]:
     package_type = "wheel"
     if os == "linux" or os == "linux-aarch64":
@@ -298,8 +297,7 @@ def generate_wheels_matrix(
                 else arch_version
             )
 
-            # special 12.1 wheels package without dependencies
-            # dependency downloaded via pip install
+            # 12.1 linux wheels require PYTORCH_EXTRA_INSTALL_REQUIREMENTS to install
             if arch_version == "12.1" and os == "linux":
                 ret.append(
                     {
@@ -313,35 +311,33 @@ def generate_wheels_matrix(
                         "container_image": WHEEL_CONTAINER_IMAGES[arch_version],
                         "package_type": package_type,
                         "pytorch_extra_install_requirements": PYTORCH_EXTRA_INSTALL_REQUIREMENTS,
-                        "build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}-with-pypi-cudnn".replace(  # noqa: B950
+                        "build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}".replace(  # noqa: B950
                             ".", "_"
                         ),
                     }
                 )
-                if not gen_special_an_non_special_wheel:
-                    continue
-
-            ret.append(
-                {
-                    "python_version": python_version,
-                    "gpu_arch_type": gpu_arch_type,
-                    "gpu_arch_version": gpu_arch_version,
-                    "desired_cuda": translate_desired_cuda(
-                        gpu_arch_type, gpu_arch_version
-                    ),
-                    "devtoolset": "cxx11-abi"
-                    if arch_version == "cpu-cxx11-abi"
-                    else "",
-                    "container_image": WHEEL_CONTAINER_IMAGES[arch_version],
-                    "package_type": package_type,
-                    "build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}".replace(
-                        ".", "_"
-                    ),
-                    "pytorch_extra_install_requirements": PYTORCH_EXTRA_INSTALL_REQUIREMENTS
-                    if os != "linux"
-                    else "",
-                }
-            )
+            else:
+                ret.append(
+                    {
+                        "python_version": python_version,
+                        "gpu_arch_type": gpu_arch_type,
+                        "gpu_arch_version": gpu_arch_version,
+                        "desired_cuda": translate_desired_cuda(
+                            gpu_arch_type, gpu_arch_version
+                        ),
+                        "devtoolset": "cxx11-abi"
+                        if arch_version == "cpu-cxx11-abi"
+                        else "",
+                        "container_image": WHEEL_CONTAINER_IMAGES[arch_version],
+                        "package_type": package_type,
+                        "build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}".replace(
+                            ".", "_"
+                        ),
+                        "pytorch_extra_install_requirements": PYTORCH_EXTRA_INSTALL_REQUIREMENTS
+                        if os != "linux"
+                        else "",
+                    }
+                )
     return ret
 
 

diff --git a/.github/scripts/generate_ci_workflows.py b/.github/scripts/generate_ci_workflows.py
@@ -158,7 +158,6 @@ class OperatingSystem:
             OperatingSystem.LINUX,
             arches=["11.8", "12.1"],
             python_versions=["3.8"],
-            gen_special_an_non_special_wheel=False,
         ),
         branches="main",
     ),

diff --git a/.github/scripts/github_utils.py b/.github/scripts/github_utils.py
@@ -178,4 +178,14 @@ def gh_fetch_merge_base(org: str, repo: str, base: str, head: str) -> str:
 
 def gh_update_pr_state(org: str, repo: str, pr_num: int, state: str = "open") -> None:
     url = f"{GITHUB_API_URL}/repos/{org}/{repo}/pulls/{pr_num}"
-    gh_fetch_url(url, method="PATCH", data={"state": state})
+    try:
+        gh_fetch_url(url, method="PATCH", data={"state": state})
+    except HTTPError as err:
+        # When trying to open the pull request, error 422 means that the branch
+        # has been deleted and the API couldn't re-open it
+        if err.code == 422 and state == "open":
+            warnings.warn(
+                f"Failed to open {pr_num} because its head branch has been deleted: {err}"
+            )
+        else:
+            raise
diff --git a/.github/scripts/tag_docker_images_for_release.py b/.github/scripts/tag_docker_images_for_release.py
@@ -0,0 +1,64 @@
+import argparse
+import subprocess
+from typing import Dict
+
+import generate_binary_build_matrix
+
+
+def tag_image(
+    image: str,
+    default_tag: str,
+    release_version: str,
+    dry_run: str,
+    tagged_images: Dict[str, bool],
+) -> None:
+    if image in tagged_images:
+        return
+    release_image = image.replace(f"-{default_tag}", f"-{release_version}")
+    print(f"Tagging {image} to {release_image} , dry_run: {dry_run}")
+
+    if dry_run == "disabled":
+        subprocess.check_call(["docker", "pull", image])
+        subprocess.check_call(["docker", "tag", image, release_image])
+        subprocess.check_call(["docker", "push", release_image])
+    tagged_images[image] = True
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--version",
+        help="Version to tag",
+        type=str,
+        default="2.2",
+    )
+    parser.add_argument(
+        "--dry-run",
+        help="No Runtime Error check",
+        type=str,
+        choices=["enabled", "disabled"],
+        default="enabled",
+    )
+
+    options = parser.parse_args()
+    tagged_images: Dict[str, bool] = dict()
+    platform_images = [
+        generate_binary_build_matrix.WHEEL_CONTAINER_IMAGES,
+        generate_binary_build_matrix.LIBTORCH_CONTAINER_IMAGES,
+        generate_binary_build_matrix.CONDA_CONTAINER_IMAGES,
+    ]
+    default_tag = generate_binary_build_matrix.DEFAULT_TAG
+
+    for platform_image in platform_images:  # type: ignore[attr-defined]
+        for arch in platform_image.keys():  # type: ignore[attr-defined]
+            tag_image(
+                platform_image[arch],  # type: ignore[index]
+                default_tag,
+                options.version,
+                options.dry_run,
+                tagged_images,
+            )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.github/workflows/_linux-build.yml b/.github/workflows/_linux-build.yml
@@ -93,6 +93,15 @@ jobs:
         with:
           docker-image-name: ${{ inputs.docker-image-name }}
 
+      - name: Use following to pull public copy of the image
+        id: print-ghcr-mirror
+        env:
+          ECR_DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
+        shell: bash
+        run: |
+          tag=${ECR_DOCKER_IMAGE##*/}
+          echo "docker pull ghcr.io/pytorch/ci-image:${tag/:/-}"
+
       - name: Pull docker image
         uses: pytorch/test-infra/.github/actions/pull-docker-image@main
         with: