Skip to content

Commit c875c84

Browse files
author
Vincent Moens
committed
[CI] Fix CI issues (#2084)
(cherry picked from commit 730dd45)
1 parent a7a0925 commit c875c84

File tree

6 files changed

+35
-10
lines changed

6 files changed

+35
-10
lines changed

.github/unittest/linux_libs/scripts_habitat/setup_env.sh

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,11 @@ if [ ! -d "${env_dir}" ]; then
3939
conda create --prefix "${env_dir}" -y python="$PYTHON_VERSION"
4040
fi
4141
conda activate "${env_dir}"
42-
#pip3 uninstall cython -y
43-
#pip uninstall cython -y
44-
#conda uninstall cython -y
42+
43+
# set debug variables
44+
conda env config vars set MAGNUM_LOG=debug HABITAT_SIM_LOG=debug
45+
conda deactivate && conda activate "${env_dir}"
46+
4547
pip3 install "cython<3"
4648
conda install -c anaconda cython="<3.0.0" -y
4749

.github/workflows/test-linux-habitat.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,14 @@ jobs:
1919
tests:
2020
strategy:
2121
matrix:
22-
python_version: ["3.9"] # "3.8", "3.9", "3.10", "3.11"
23-
cuda_arch_version: ["11.6"] # "11.6", "11.7"
22+
python_version: ["3.9"]
23+
cuda_arch_version: ["12.1"]
2424
fail-fast: false
2525
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
2626
with:
2727
runner: linux.g5.4xlarge.nvidia.gpu
2828
repository: pytorch/rl
29-
docker-image: "nvidia/cuda:12.2.0-devel-ubuntu22.04"
29+
docker-image: "nvidia/cuda:12.1.1-devel-ubuntu22.04"
3030
gpu-arch-type: cuda
3131
gpu-arch-version: ${{ matrix.cuda_arch_version }}
3232
timeout: 90

.github/workflows/test-linux-libs.yml

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,14 +53,16 @@ jobs:
5353
unittests-brax:
5454
strategy:
5555
matrix:
56-
python_version: ["3.9"]
56+
python_version: ["3.11"]
5757
cuda_arch_version: ["12.1"]
58+
if: ${{ github.event_name == 'push' || contains(github.event.pull_request.labels.*.name, 'Environments') }}
5859
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
5960
with:
6061
repository: pytorch/rl
6162
runner: "linux.g5.4xlarge.nvidia.gpu"
6263
gpu-arch-type: cuda
6364
gpu-arch-version: "11.7"
65+
docker-image: "pytorch/manylinux-cuda124"
6466
timeout: 120
6567
script: |
6668
if [[ "${{ github.ref }}" =~ release/* ]]; then
@@ -73,7 +75,7 @@ jobs:
7375
7476
set -euo pipefail
7577
76-
export PYTHON_VERSION="3.9"
78+
export PYTHON_VERSION="3.11"
7779
export CU_VERSION="12.1"
7880
export TAR_OPTIONS="--no-same-owner"
7981
export UPLOAD_CHANNEL="nightly"
@@ -123,7 +125,7 @@ jobs:
123125
matrix:
124126
python_version: ["3.9"]
125127
cuda_arch_version: ["12.1"]
126-
if: ${{ github.event_name == 'push' || contains(github.event.pull_request.labels.*.name, 'Data') }}
128+
if: ${{ github.event_name == 'push' || contains(github.event.pull_request.labels.*.name, 'Environments') }}
127129
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
128130
with:
129131
repository: pytorch/rl
@@ -224,12 +226,14 @@ jobs:
224226
matrix:
225227
python_version: ["3.9"]
226228
cuda_arch_version: ["12.1"]
229+
if: ${{ github.event_name == 'push' || contains(github.event.pull_request.labels.*.name, 'Environments') }}
227230
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
228231
with:
229232
repository: pytorch/rl
230233
runner: "linux.g5.4xlarge.nvidia.gpu"
231234
gpu-arch-type: cuda
232235
gpu-arch-version: "11.7"
236+
docker-image: "pytorch/manylinux-cuda124"
233237
timeout: 120
234238
script: |
235239
if [[ "${{ github.ref }}" =~ release/* ]]; then
@@ -324,12 +328,14 @@ jobs:
324328
bash .github/unittest/linux_libs/scripts_openx/post_process.sh
325329
326330
unittests-pettingzoo:
331+
if: ${{ github.event_name == 'push' || contains(github.event.pull_request.labels.*.name, 'Environments') }}
327332
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
328333
with:
329334
repository: pytorch/rl
330335
runner: "linux.g5.4xlarge.nvidia.gpu"
331336
gpu-arch-type: cuda
332337
gpu-arch-version: "11.7"
338+
docker-image: "pytorch/manylinux-cuda124"
333339
timeout: 120
334340
script: |
335341
if [[ "${{ github.ref }}" =~ release/* ]]; then
@@ -360,6 +366,7 @@ jobs:
360366
matrix:
361367
python_version: ["3.9"]
362368
cuda_arch_version: ["12.1"]
369+
if: ${{ github.event_name == 'push' || contains(github.event.pull_request.labels.*.name, 'Environments') }}
363370
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
364371
with:
365372
repository: pytorch/rl
@@ -468,6 +475,7 @@ jobs:
468475
runner: "linux.g5.4xlarge.nvidia.gpu"
469476
gpu-arch-type: cuda
470477
gpu-arch-version: "11.7"
478+
docker-image: "pytorch/manylinux-cuda124"
471479
timeout: 120
472480
script: |
473481
if [[ "${{ github.ref }}" =~ release/* ]]; then
@@ -532,12 +540,14 @@ jobs:
532540
matrix:
533541
python_version: ["3.9"]
534542
cuda_arch_version: ["12.1"]
543+
if: ${{ github.event_name == 'push' || contains(github.event.pull_request.labels.*.name, 'Environments') }}
535544
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
536545
with:
537546
repository: pytorch/rl
538547
runner: "linux.g5.4xlarge.nvidia.gpu"
539548
gpu-arch-type: cuda
540549
gpu-arch-version: "11.7"
550+
docker-image: "pytorch/manylinux-cuda124"
541551
timeout: 120
542552
script: |
543553
if [[ "${{ github.ref }}" =~ release/* ]]; then

test/test_env.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,10 @@
109109

110110
IS_OSX = platform == "darwin"
111111
IS_WIN = platform == "win32"
112+
if IS_WIN:
113+
mp_ctx = "spawn"
114+
else:
115+
mp_ctx = "fork"
112116

113117
## TO BE FIXED: DiscreteActionProjection queries a randint on each worker, which leads to divergent results between
114118
## the serial and parallel batched envs

test/test_modules.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
import numpy as np
99
import pytest
1010
import torch
11-
from _utils_internal import get_default_devices
11+
12+
from _utils_internal import get_default_devices, retry
1213
from mocking_classes import MockBatchedUnLockedEnv
1314
from packaging import version
1415
from tensordict import TensorDict
@@ -889,6 +890,7 @@ def _get_mock_input_td(
889890
)
890891
return td
891892

893+
@retry(AssertionError, 3)
892894
@pytest.mark.parametrize("n_agents", [1, 3])
893895
@pytest.mark.parametrize("share_params", [True, False])
894896
@pytest.mark.parametrize("centralised", [True, False])

test/test_transforms.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import sys
1414
from copy import copy
1515
from functools import partial
16+
from sys import platform
1617

1718
import numpy as np
1819
import pytest
@@ -119,6 +120,12 @@
119120
from torchrl.envs.utils import check_env_specs, step_mdp
120121
from torchrl.modules import GRUModule, LSTMModule, MLP, ProbabilisticActor, TanhNormal
121122

123+
IS_WIN = platform == "win32"
124+
if IS_WIN:
125+
mp_ctx = "spawn"
126+
else:
127+
mp_ctx = "fork"
128+
122129
TIMEOUT = 100.0
123130

124131
_has_gymnasium = importlib.util.find_spec("gymnasium") is not None

0 commit comments

Comments
 (0)