diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml new file mode 100644 index 00000000..a5457f58 --- /dev/null +++ b/.github/workflows/lint.yaml @@ -0,0 +1,36 @@ +name: Lint + +on: + push: + branches: + - main + pull_request: + +jobs: + lint: + runs-on: ubuntu-20.04 + steps: + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: "3.10" + architecture: x64 + - name: Checkout + uses: actions/checkout@v2 + - name: Install Dependencies + run: | + set -eux + + sudo apt-get install -y protobuf-compiler + + pip install .[dev] -v + - name: Run Python Lint + run: | + set -eux + + black --check . + - name: Run Rust Lint + run: | + set -eux + + cargo fmt --check diff --git a/.github/workflows/unittest.yaml b/.github/workflows/unittest.yaml new file mode 100644 index 00000000..9512c544 --- /dev/null +++ b/.github/workflows/unittest.yaml @@ -0,0 +1,42 @@ +name: Unit Tests + +on: + push: + branches: + - main + pull_request: + +jobs: + unittest: + strategy: + matrix: + os: + - ubuntu-20.04 + - "linux.4xlarge.nvidia.gpu" + runs-on: ${{ matrix.os }} + steps: + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: "3.10" + architecture: x64 + - name: Checkout + uses: actions/checkout@v2 + - name: Install Dependencies + run: | + set -eux + + sudo apt-get install -y protobuf-compiler + + pip install -e .[dev] -v + - name: Run Python Tests + run: | + set -eux + + pytest -v + - name: Run Rust Lint + run: | + set -eux + + cargo test -v + diff --git a/torchft/process_group_test.py b/torchft/process_group_test.py index f5399396..3b3aafd9 100644 --- a/torchft/process_group_test.py +++ b/torchft/process_group_test.py @@ -4,7 +4,7 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -from unittest import TestCase +from unittest import TestCase, skipUnless import torch from torch.distributed import TCPStore, ReduceOp @@ -73,6 +73,7 @@ def test_dummy(self) -> None: m = torch.nn.parallel.DistributedDataParallel(m, process_group=pg) m(torch.rand(2, 3)) + @skipUnless(torch.cuda.is_available(), "needs CUDA") def test_baby_nccl(self) -> None: store = TCPStore( host_name="localhost", port=0, is_master=True, wait_for_workers=False