diff --git a/.ci/scripts/gather_test_models.py b/.ci/scripts/gather_test_models.py old mode 100644 new mode 100755 index 2cdcc1287e9..ee9eb1e745b --- a/.ci/scripts/gather_test_models.py +++ b/.ci/scripts/gather_test_models.py @@ -30,6 +30,17 @@ } } +DEFAULT_TIMEOUT = 90 +CUSTOM_TIMEOUT = { + # Just some examples on how custom timeout can be set + "linux": { + "mobilebert": 90, + }, + "macos": { + "mobilebert": 90, + }, +} + def parse_args() -> Any: from argparse import ArgumentParser @@ -96,12 +107,13 @@ def export_models_for_ci() -> dict[str, dict]: "model": "mv3", "backend": backend, "runner": "linux.2xlarge", + "timeout": DEFAULT_TIMEOUT, } models["include"].append(record) # Add all models for CMake E2E validation # CMake supports both linux and macos - for (name, backend) in itertools.product( + for name, backend in itertools.product( MODEL_NAME_TO_MODEL.keys(), ["portable", "xnnpack"] ): if not model_should_run_on_event(name, event): @@ -121,8 +133,13 @@ def export_models_for_ci() -> dict[str, dict]: "model": name, "backend": backend, "runner": DEFAULT_RUNNERS.get(target_os, "linux.2xlarge"), + "timeout": DEFAULT_TIMEOUT, } + # Set the custom timeout if needed + if target_os in CUSTOM_TIMEOUT and name in CUSTOM_TIMEOUT[target_os]: + record["timeout"] = CUSTOM_TIMEOUT[target_os].get(name, DEFAULT_TIMEOUT) + # NB: Some model requires much bigger Linux runner to avoid # running OOM. The team is investigating the root cause if target_os in CUSTOM_RUNNERS and name in CUSTOM_RUNNERS.get(target_os, {}): diff --git a/.github/workflows/_unittest.yml b/.github/workflows/_unittest.yml index 723dfa05372..93a63bbfc8e 100644 --- a/.github/workflows/_unittest.yml +++ b/.github/workflows/_unittest.yml @@ -24,7 +24,7 @@ jobs: docker-image: ${{ inputs.docker-image }} submodules: 'true' ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 60 + timeout: 90 script: | set -eux diff --git a/.github/workflows/doc-build.yml b/.github/workflows/doc-build.yml index 12b0f35269b..ee5cfb859b3 100644 --- a/.github/workflows/doc-build.yml +++ b/.github/workflows/doc-build.yml @@ -26,6 +26,7 @@ jobs: submodules: 'true' repository: pytorch/executorch upload-artifact: docs + timeout: 90 script: | # The generic Linux job chooses to use base env, not the one setup by the image CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") @@ -91,6 +92,7 @@ jobs: repository: pytorch/executorch download-artifact: docs ref: gh-pages + timeout: 90 script: | set -euo pipefail diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index ad67f053396..a974b72930a 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -43,7 +43,7 @@ jobs: docker-image: executorch-ubuntu-22.04-clang12 submodules: 'true' ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 90 + timeout: ${{ matrix.timeout }} script: | # The generic Linux job chooses to use base env, not the one setup by the image CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index 16dba2b407f..0acb1c5f0ee 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -44,7 +44,7 @@ jobs: python-version: '3.11' submodules: 'true' ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 90 + timeout: ${{ matrix.timeout }} script: | WORKSPACE=$(pwd) pushd "${WORKSPACE}/pytorch/executorch" diff --git a/examples/models/llama2/model.py b/examples/models/llama2/model.py index 235253bfff8..2eeaada39b9 100644 --- a/examples/models/llama2/model.py +++ b/examples/models/llama2/model.py @@ -393,7 +393,6 @@ def forward( ) -> Union[ torch.Tensor, Tuple[torch.Tensor, List[torch.Tensor], List[torch.Tensor]] ]: - if self.use_kv_cache: assert ( cache_k is not None and cache_v is not None and start_pos is not None diff --git a/exir/backend/utils.py b/exir/backend/utils.py index 2e56455e130..c2ee3afac4b 100644 --- a/exir/backend/utils.py +++ b/exir/backend/utils.py @@ -25,7 +25,8 @@ log: logging.Logger = logging.getLogger(__name__) -@lru_cache(maxsize=128) +# NB: Set this to None to handle validation from MobileBert +@lru_cache(maxsize=None) def is_same_node( node_left: Iterable[torch.fx.Node], node_right: Iterable[torch.fx.Node],