Skip to content

Commit

Permalink
[release/2.3] Fix miopenStatusInternalError caused in new ROCm6.0 CI …
Browse files Browse the repository at this point in the history
…docker images (#126942)

* Update setting of journal_mode to delete

* Revert "[Release only] Pin rocm docker images (#126452)"

This reverts commit ee68b41.

* Replace tabs with spaces for lint
  • Loading branch information
jithunnair-amd committed May 23, 2024
1 parent 71dd2de commit 661c3de
Show file tree
Hide file tree
Showing 8 changed files with 14 additions and 31 deletions.
12 changes: 10 additions & 2 deletions .ci/docker/common/install_rocm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,11 @@ install_ubuntu() {
if [[ $(ver $ROCM_VERSION) -ge $(ver 6.0) ]]; then
for kdb in /opt/rocm/share/miopen/db/*.kdb
do
sqlite3 $kdb "PRAGMA journal_mode=off; PRAGMA VACUUM;"
# journal_mode=delete seems to work on some kdbs that have "wal" as initial journal_mode
sqlite3 $kdb "PRAGMA journal_mode=delete; PRAGMA VACUUM;"
JOURNAL_MODE=$(sqlite3 $kdb "PRAGMA journal_mode;")
# Both "delete and "off" work in cases where user doesn't have write permissions to directory where kdbs are installed
if [[ $JOURNAL_MODE != "delete" ]] && [[ $JOURNAL_MODE != "off" ]]; then echo "kdb journal_mode change failed" && exit 1; fi
done
fi

Expand Down Expand Up @@ -163,7 +167,11 @@ install_centos() {
if [[ $(ver $ROCM_VERSION) -ge $(ver 6.0) ]]; then
for kdb in /opt/rocm/share/miopen/db/*.kdb
do
sqlite3 $kdb "PRAGMA journal_mode=off; PRAGMA VACUUM;"
# journal_mode=delete seems to work on some kdbs that have "wal" as initial journal_mode
sqlite3 $kdb "PRAGMA journal_mode=delete; PRAGMA VACUUM;"
JOURNAL_MODE=$(sqlite3 $kdb "PRAGMA journal_mode;")
# Both "delete" and "off" work in cases where user doesn't have write permissions to directory where kdbs are installed
if [[ $JOURNAL_MODE != "delete" ]] && [[ $JOURNAL_MODE != "off" ]]; then echo "kdb journal_mode change failed" && exit 1; fi
done
fi

Expand Down
27 changes: 4 additions & 23 deletions .github/workflows/_linux-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,6 @@ on:
required: true
type: string
description: Name of the base docker image to build with.
docker-image-tag:
required: false
type: string
description: Name of the base docker image tag
default: ""
build-generates-artifacts:
required: false
type: boolean
Expand Down Expand Up @@ -74,7 +69,7 @@ jobs:
runs-on: ${{ inputs.runner }}
timeout-minutes: 240
outputs:
docker-image: ${{ steps.calculate-docker.outputs.docker-image }}
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
test-matrix: ${{ steps.filter.outputs.test-matrix }}
steps:
- name: Setup SSH (Click me for login details)
Expand All @@ -98,24 +93,10 @@ jobs:
with:
docker-image-name: ${{ inputs.docker-image-name }}

- name: Override docker image tag if pinned
id: calculate-docker
env:
ECR_DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
shell: bash
run: |
export NEW_TAG=${{ inputs.docker-image-tag }}
if [[ ${NEW_TAG} != '' ]]; then
IMAGE=${ECR_DOCKER_IMAGE%:*}
echo "docker-image=${IMAGE}:${NEW_TAG}" >> "${GITHUB_OUTPUT}"
else
echo "docker-image=${ECR_DOCKER_IMAGE}" >> "${GITHUB_OUTPUT}"
fi
- name: Use following to pull public copy of the image
id: print-ghcr-mirror
env:
ECR_DOCKER_IMAGE: ${{ steps.calculate-docker.outputs.docker-image }}
ECR_DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
shell: bash
run: |
tag=${ECR_DOCKER_IMAGE##*/}
Expand All @@ -124,7 +105,7 @@ jobs:
- name: Pull docker image
uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.3
with:
docker-image: ${{ steps.calculate-docker.outputs.docker-image }}
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

- name: Parse ref
id: parse-ref
Expand Down Expand Up @@ -168,7 +149,7 @@ jobs:
XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
TORCH_CUDA_ARCH_LIST: ${{ inputs.cuda-arch-list }}
DOCKER_IMAGE: ${{ steps.calculate-docker.outputs.docker-image }}
DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
XLA_CUDA: ${{ contains(inputs.build-environment, 'xla') && '0' || '' }}
DEBUG: ${{ inputs.build-with-debug && '1' || '0' }}
OUR_GITHUB_JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/inductor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ jobs:
with:
build-environment: linux-focal-rocm6.0-py3.8
docker-image-name: pytorch-linux-focal-rocm-n-py3
docker-image-tag: cea4be730564c18dd285a12828c7c449490b10b9
test-matrix: |
{ include: [
{ config: "inductor", shard: 1, num_shards: 1, runner: "linux.rocm.gpu.2" },
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/periodic.yml
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,6 @@ jobs:
with:
build-environment: linux-focal-rocm6.0-py3.8
docker-image-name: pytorch-linux-focal-rocm-n-py3
docker-image-tag: cea4be730564c18dd285a12828c7c449490b10b9
test-matrix: |
{ include: [
{ config: "distributed", shard: 1, num_shards: 2, runner: "linux.rocm.gpu" },
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,6 @@ jobs:
with:
build-environment: linux-focal-rocm6.0-py3.8
docker-image-name: pytorch-linux-focal-rocm-n-py3
docker-image-tag: cea4be730564c18dd285a12828c7c449490b10b9
sync-tag: rocm-build
test-matrix: |
{ include: [
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/rocm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ jobs:
with:
build-environment: linux-focal-rocm6.0-py3.8
docker-image-name: pytorch-linux-focal-rocm-n-py3
docker-image-tag: cea4be730564c18dd285a12828c7c449490b10b9
sync-tag: rocm-build
test-matrix: |
{ include: [
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/slow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,6 @@ jobs:
with:
build-environment: linux-focal-rocm6.0-py3.8
docker-image-name: pytorch-linux-focal-rocm-n-py3
docker-image-tag: cea4be730564c18dd285a12828c7c449490b10b9
test-matrix: |
{ include: [
{ config: "slow", shard: 1, num_shards: 1, runner: "linux.rocm.gpu" },
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/trunk.yml
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,6 @@ jobs:
with:
build-environment: linux-focal-rocm6.0-py3.8
docker-image-name: pytorch-linux-focal-rocm-n-py3
docker-image-tag: cea4be730564c18dd285a12828c7c449490b10b9
sync-tag: rocm-build
test-matrix: |
{ include: [
Expand Down

0 comments on commit 661c3de

Please sign in to comment.